From: Greg Kroah-Hartman Date: Sat, 26 Oct 2013 18:11:00 +0000 (+0100) Subject: 3.10-stable patches X-Git-Tag: v3.4.68~12 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=7f7c74eb32825e00d1c6d64406dd7a25dbd6ff35;p=thirdparty%2Fkernel%2Fstable-queue.git 3.10-stable patches added patches: be2net-pass-if_id-for-v1-and-v2-versions-of-tx_create-cmd.patch bnx2x-record-rx-queue-for-lro-packets.patch bridge-correctly-clamp-max-forward_delay-when-enabling-stp.patch can-dev-fix-nlmsg-size-calculation-in-can_get_size.patch connector-use-nlmsg_len-to-check-message-length.patch davinci_emac.c-fix-iff_allmulti-setup.patch farsync-fix-info-leak-in-ioctl.patch inet-fix-possible-memory-corruption-with-udp_cork-and-ufo.patch ipv4-fix-ineffective-source-address-selection.patch ipv6-always-prefer-rt6i_gateway-if-present.patch ipv6-fill-rt6i_gateway-with-nexthop-address.patch ipv6-probe-routes-asynchronous-in-rt6_probe.patch l2tp-fix-build-warning-with-ipv6-disabled.patch l2tp-fix-kernel-panic-when-using-ipv4-mapped-ipv6-addresses.patch l2tp-must-disable-bh-before-calling-l2tp_xmit_skb.patch net-do-not-call-sock_put-on-timewait-sockets.patch net-dst-provide-accessor-function-to-dst-xfrm.patch netfilter-nf_conntrack-fix-rt6i_gateway-checks-for-h.323-helper.patch net-fix-cipso-packet-validation-when-netlabel.patch net-heap-overflow-in-__audit_sockaddr.patch net-mv643xx_eth-fix-orphaned-statistics-timer-crash.patch net-mv643xx_eth-update-statistics-timer-from-timer-context-only.patch net-secure_seq-fix-warning-when-config_ipv6-and-config_inet-are-not-selected.patch net-unix-inherit-sock_pass-cred-sec-flags-from-socket-to-fix-race.patch net-vlan-fix-nlmsg-size-calculation-in-vlan_get_size.patch proc-connector-fix-info-leaks.patch sctp-perform-software-checksum-if-packet-has-to-be-fragmented.patch sctp-use-software-crc32-checksum-when-xfrm-transform-will-happen.patch tcp-do-not-forget-fin-in-tcp_shifted_skb.patch tcp-fix-incorrect-ca_state-in-tail-loss-probe.patch tcp-must-unclone-packets-before-mangling-them.patch tcp-tso-packets-automatic-sizing.patch tcp-tsq-can-use-a-dynamic-limit.patch unix_diag-fix-info-leak.patch virtio-net-don-t-respond-to-cpu-hotplug-notifier-if-we-re-not-ready.patch virtio-net-fix-the-race-between-channels-setting-and-refill.patch virtio-net-refill-only-when-device-is-up-during-setting-queues.patch vti-get-rid-of-nf-mark-rule-in-prerouting.patch wanxl-fix-info-leak-in-ioctl.patch xen-netback-don-t-destroy-the-netdev-until-the-vif-is-shut-down.patch --- diff --git a/queue-3.10/be2net-pass-if_id-for-v1-and-v2-versions-of-tx_create-cmd.patch b/queue-3.10/be2net-pass-if_id-for-v1-and-v2-versions-of-tx_create-cmd.patch new file mode 100644 index 00000000000..3646462268e --- /dev/null +++ b/queue-3.10/be2net-pass-if_id-for-v1-and-v2-versions-of-tx_create-cmd.patch @@ -0,0 +1,40 @@ +From 4b31d5065d914d114569331952ee13e4be1e4dd9 Mon Sep 17 00:00:00 2001 +From: Vasundhara Volam +Date: Thu, 17 Oct 2013 11:47:14 +0530 +Subject: be2net: pass if_id for v1 and V2 versions of TX_CREATE cmd + +From: Vasundhara Volam + +[ Upstream commit 0fb88d61bc60779dde88b0fc268da17eb81d0412 ] + +It is a required field for all TX_CREATE cmd versions > 0. +This fixes a driver initialization failure, caused by recent SH-R Firmwares +(versions > 10.0.639.0) failing the TX_CREATE cmd when if_id field is +not passed. + +Signed-off-by: Sathya Perla +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/emulex/benet/be_cmds.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/emulex/benet/be_cmds.c ++++ b/drivers/net/ethernet/emulex/benet/be_cmds.c +@@ -1150,7 +1150,6 @@ int be_cmd_txq_create(struct be_adapter + + if (lancer_chip(adapter)) { + req->hdr.version = 1; +- req->if_id = cpu_to_le16(adapter->if_handle); + } else if (BEx_chip(adapter)) { + if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) + req->hdr.version = 2; +@@ -1158,6 +1157,8 @@ int be_cmd_txq_create(struct be_adapter + req->hdr.version = 2; + } + ++ if (req->hdr.version > 0) ++ req->if_id = cpu_to_le16(adapter->if_handle); + req->num_pages = PAGES_4K_SPANNED(q_mem->va, q_mem->size); + req->ulp_num = BE_ULP1_NUM; + req->type = BE_ETH_TX_RING_TYPE_STANDARD; diff --git a/queue-3.10/bnx2x-record-rx-queue-for-lro-packets.patch b/queue-3.10/bnx2x-record-rx-queue-for-lro-packets.patch new file mode 100644 index 00000000000..f37e3de4588 --- /dev/null +++ b/queue-3.10/bnx2x-record-rx-queue-for-lro-packets.patch @@ -0,0 +1,50 @@ +From 4f7ca4d57c7bfa31961d3336bcefba0d1f26e98b Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Sat, 12 Oct 2013 14:08:34 -0700 +Subject: bnx2x: record rx queue for LRO packets + +From: Eric Dumazet + +[ Upstream commit 60e66fee56b2256dcb1dc2ea1b2ddcb6e273857d ] + +RPS support is kind of broken on bnx2x, because only non LRO packets +get proper rx queue information. This triggers reorders, as it seems +bnx2x like to generate a non LRO packet for segment including TCP PUSH +flag : (this might be pure coincidence, but all the reorders I've +seen involve segments with a PUSH) + +11:13:34.335847 IP A > B: . 415808:447136(31328) ack 1 win 457 +11:13:34.335992 IP A > B: . 447136:448560(1424) ack 1 win 457 +11:13:34.336391 IP A > B: . 448560:479888(31328) ack 1 win 457 +11:13:34.336425 IP A > B: P 511216:512640(1424) ack 1 win 457 +11:13:34.336423 IP A > B: . 479888:511216(31328) ack 1 win 457 +11:13:34.336924 IP A > B: . 512640:543968(31328) ack 1 win 457 +11:13:34.336963 IP A > B: . 543968:575296(31328) ack 1 win 457 + +We must call skb_record_rx_queue() to properly give to RPS (and more +generally for TX queue selection on forward path) the receive queue +information. + +Similar fix is needed for skb_mark_napi_id(), but will be handled +in a separate patch to ease stable backports. + +Signed-off-by: Eric Dumazet +Cc: Willem de Bruijn +Cc: Eilon Greenstein +Acked-by: Dmitry Kravkov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c ++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +@@ -670,6 +670,7 @@ static void bnx2x_gro_receive(struct bnx + } + } + #endif ++ skb_record_rx_queue(skb, fp->rx_queue); + napi_gro_receive(&fp->napi, skb); + } + diff --git a/queue-3.10/bridge-correctly-clamp-max-forward_delay-when-enabling-stp.patch b/queue-3.10/bridge-correctly-clamp-max-forward_delay-when-enabling-stp.patch new file mode 100644 index 00000000000..1881857b071 --- /dev/null +++ b/queue-3.10/bridge-correctly-clamp-max-forward_delay-when-enabling-stp.patch @@ -0,0 +1,39 @@ +From f32221bb96eb57fee9f4f41685582b6502841d30 Mon Sep 17 00:00:00 2001 +From: Vlad Yasevich +Date: Tue, 15 Oct 2013 14:57:45 -0400 +Subject: bridge: Correctly clamp MAX forward_delay when enabling STP + +From: Vlad Yasevich + +[ Upstream commit 4b6c7879d84ad06a2ac5b964808ed599187a188d ] + +Commit be4f154d5ef0ca147ab6bcd38857a774133f5450 + bridge: Clamp forward_delay when enabling STP +had a typo when attempting to clamp maximum forward delay. + +It is possible to set bridge_forward_delay to be higher then +permitted maximum when STP is off. When turning STP on, the +higher then allowed delay has to be clamed down to max value. + +Signed-off-by: Vlad Yasevich +CC: Herbert Xu +CC: Stephen Hemminger +Reviewed-by: Veaceslav Falico +Acked-by: Herbert Xu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_stp_if.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/bridge/br_stp_if.c ++++ b/net/bridge/br_stp_if.c +@@ -134,7 +134,7 @@ static void br_stp_start(struct net_brid + + if (br->bridge_forward_delay < BR_MIN_FORWARD_DELAY) + __br_set_forward_delay(br, BR_MIN_FORWARD_DELAY); +- else if (br->bridge_forward_delay < BR_MAX_FORWARD_DELAY) ++ else if (br->bridge_forward_delay > BR_MAX_FORWARD_DELAY) + __br_set_forward_delay(br, BR_MAX_FORWARD_DELAY); + + if (r == 0) { diff --git a/queue-3.10/can-dev-fix-nlmsg-size-calculation-in-can_get_size.patch b/queue-3.10/can-dev-fix-nlmsg-size-calculation-in-can_get_size.patch new file mode 100644 index 00000000000..cf042531e16 --- /dev/null +++ b/queue-3.10/can-dev-fix-nlmsg-size-calculation-in-can_get_size.patch @@ -0,0 +1,41 @@ +From c0c0139e3ce94e915d8feb2e330e7e603c08bf0d Mon Sep 17 00:00:00 2001 +From: Marc Kleine-Budde +Date: Sat, 5 Oct 2013 21:25:17 +0200 +Subject: can: dev: fix nlmsg size calculation in can_get_size() + +From: Marc Kleine-Budde + +[ Upstream commit fe119a05f8ca481623a8d02efcc984332e612528 ] + +This patch fixes the calculation of the nlmsg size, by adding the missing +nla_total_size(). + +Signed-off-by: Marc Kleine-Budde +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/can/dev.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/drivers/net/can/dev.c ++++ b/drivers/net/can/dev.c +@@ -705,14 +705,14 @@ static size_t can_get_size(const struct + size_t size; + + size = nla_total_size(sizeof(u32)); /* IFLA_CAN_STATE */ +- size += sizeof(struct can_ctrlmode); /* IFLA_CAN_CTRLMODE */ ++ size += nla_total_size(sizeof(struct can_ctrlmode)); /* IFLA_CAN_CTRLMODE */ + size += nla_total_size(sizeof(u32)); /* IFLA_CAN_RESTART_MS */ +- size += sizeof(struct can_bittiming); /* IFLA_CAN_BITTIMING */ +- size += sizeof(struct can_clock); /* IFLA_CAN_CLOCK */ ++ size += nla_total_size(sizeof(struct can_bittiming)); /* IFLA_CAN_BITTIMING */ ++ size += nla_total_size(sizeof(struct can_clock)); /* IFLA_CAN_CLOCK */ + if (priv->do_get_berr_counter) /* IFLA_CAN_BERR_COUNTER */ +- size += sizeof(struct can_berr_counter); ++ size += nla_total_size(sizeof(struct can_berr_counter)); + if (priv->bittiming_const) /* IFLA_CAN_BITTIMING_CONST */ +- size += sizeof(struct can_bittiming_const); ++ size += nla_total_size(sizeof(struct can_bittiming_const)); + + return size; + } diff --git a/queue-3.10/connector-use-nlmsg_len-to-check-message-length.patch b/queue-3.10/connector-use-nlmsg_len-to-check-message-length.patch new file mode 100644 index 00000000000..a191f901bfd --- /dev/null +++ b/queue-3.10/connector-use-nlmsg_len-to-check-message-length.patch @@ -0,0 +1,45 @@ +From 42cbebac929fb9cfd19789494468b6c54316546a Mon Sep 17 00:00:00 2001 +From: Mathias Krause +Date: Mon, 30 Sep 2013 22:03:07 +0200 +Subject: connector: use nlmsg_len() to check message length + +From: Mathias Krause + +[ Upstream commit 162b2bedc084d2d908a04c93383ba02348b648b0 ] + +The current code tests the length of the whole netlink message to be +at least as long to fit a cn_msg. This is wrong as nlmsg_len includes +the length of the netlink message header. Use nlmsg_len() instead to +fix this "off-by-NLMSG_HDRLEN" size check. + +Signed-off-by: Mathias Krause +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/connector/connector.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +--- a/drivers/connector/connector.c ++++ b/drivers/connector/connector.c +@@ -157,17 +157,18 @@ static int cn_call_callback(struct sk_bu + static void cn_rx_skb(struct sk_buff *__skb) + { + struct nlmsghdr *nlh; +- int err; + struct sk_buff *skb; ++ int len, err; + + skb = skb_get(__skb); + + if (skb->len >= NLMSG_HDRLEN) { + nlh = nlmsg_hdr(skb); ++ len = nlmsg_len(nlh); + +- if (nlh->nlmsg_len < sizeof(struct cn_msg) || ++ if (len < (int)sizeof(struct cn_msg) || + skb->len < nlh->nlmsg_len || +- nlh->nlmsg_len > CONNECTOR_MAX_MSG_SIZE) { ++ len > CONNECTOR_MAX_MSG_SIZE) { + kfree_skb(skb); + return; + } diff --git a/queue-3.10/davinci_emac.c-fix-iff_allmulti-setup.patch b/queue-3.10/davinci_emac.c-fix-iff_allmulti-setup.patch new file mode 100644 index 00000000000..678583e565e --- /dev/null +++ b/queue-3.10/davinci_emac.c-fix-iff_allmulti-setup.patch @@ -0,0 +1,42 @@ +From f550bd5ee140624e6ca5147f0bb43b2062b4ff73 Mon Sep 17 00:00:00 2001 +From: Mariusz Ceier +Date: Mon, 21 Oct 2013 19:45:04 +0200 +Subject: davinci_emac.c: Fix IFF_ALLMULTI setup + +From: Mariusz Ceier + +[ Upstream commit d69e0f7ea95fef8059251325a79c004bac01f018 ] + +When IFF_ALLMULTI flag is set on interface and IFF_PROMISC isn't, +emac_dev_mcast_set should only enable RX of multicasts and reset +MACHASH registers. + +It does this, but afterwards it either sets up multicast MACs +filtering or disables RX of multicasts and resets MACHASH registers +again, rendering IFF_ALLMULTI flag useless. + +This patch fixes emac_dev_mcast_set, so that multicast MACs filtering and +disabling of RX of multicasts are skipped when IFF_ALLMULTI flag is set. + +Tested with kernel 2.6.37. + +Signed-off-by: Mariusz Ceier +Acked-by: Mugunthan V N +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/ti/davinci_emac.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/drivers/net/ethernet/ti/davinci_emac.c ++++ b/drivers/net/ethernet/ti/davinci_emac.c +@@ -876,8 +876,7 @@ static void emac_dev_mcast_set(struct ne + netdev_mc_count(ndev) > EMAC_DEF_MAX_MULTICAST_ADDRESSES) { + mbp_enable = (mbp_enable | EMAC_MBP_RXMCAST); + emac_add_mcast(priv, EMAC_ALL_MULTI_SET, NULL); +- } +- if (!netdev_mc_empty(ndev)) { ++ } else if (!netdev_mc_empty(ndev)) { + struct netdev_hw_addr *ha; + + mbp_enable = (mbp_enable | EMAC_MBP_RXMCAST); diff --git a/queue-3.10/farsync-fix-info-leak-in-ioctl.patch b/queue-3.10/farsync-fix-info-leak-in-ioctl.patch new file mode 100644 index 00000000000..6e8314c5d3e --- /dev/null +++ b/queue-3.10/farsync-fix-info-leak-in-ioctl.patch @@ -0,0 +1,30 @@ +From 530fd8e305b59abdb5f2a04069a28d11506ded22 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Salva=20Peir=C3=B3?= +Date: Fri, 11 Oct 2013 12:50:03 +0300 +Subject: farsync: fix info leak in ioctl + +From: =?UTF-8?q?Salva=20Peir=C3=B3?= + +[ Upstream commit 96b340406724d87e4621284ebac5e059d67b2194 ] + +The fst_get_iface() code fails to initialize the two padding bytes of +struct sync_serial_settings after the ->loopback member. Add an explicit +memset(0) before filling the structure to avoid the info leak. + +Signed-off-by: Dan Carpenter +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/wan/farsync.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/wan/farsync.c ++++ b/drivers/net/wan/farsync.c +@@ -1972,6 +1972,7 @@ fst_get_iface(struct fst_card_info *card + } + + i = port->index; ++ memset(&sync, 0, sizeof(sync)); + sync.clock_rate = FST_RDL(card, portConfig[i].lineSpeed); + /* Lucky card and linux use same encoding here */ + sync.clock_type = FST_RDB(card, portConfig[i].internalClock) == diff --git a/queue-3.10/inet-fix-possible-memory-corruption-with-udp_cork-and-ufo.patch b/queue-3.10/inet-fix-possible-memory-corruption-with-udp_cork-and-ufo.patch new file mode 100644 index 00000000000..7642c12ff11 --- /dev/null +++ b/queue-3.10/inet-fix-possible-memory-corruption-with-udp_cork-and-ufo.patch @@ -0,0 +1,76 @@ +From 712bef379f963a0a16b71b2038987aad3882407d Mon Sep 17 00:00:00 2001 +From: Hannes Frederic Sowa +Date: Tue, 22 Oct 2013 00:07:47 +0200 +Subject: inet: fix possible memory corruption with UDP_CORK and UFO + +From: Hannes Frederic Sowa + +[ This is a simplified -stable version of a set of upstream commits. ] + +This is a replacement patch only for stable which does fix the problems +handled by the following two commits in -net: + +"ip_output: do skb ufo init for peeked non ufo skb as well" (e93b7d748be887cd7639b113ba7d7ef792a7efb9) +"ip6_output: do skb ufo init for peeked non ufo skb as well" (c547dbf55d5f8cf615ccc0e7265e98db27d3fb8b) + +Three frames are written on a corked udp socket for which the output +netdevice has UFO enabled. If the first and third frame are smaller than +the mtu and the second one is bigger, we enqueue the second frame with +skb_append_datato_frags without initializing the gso fields. This leads +to the third frame appended regulary and thus constructing an invalid skb. + +This fixes the problem by always using skb_append_datato_frags as soon +as the first frag got enqueued to the skb without marking the packet +as SKB_GSO_UDP. + +The problem with only two frames for ipv6 was fixed by "ipv6: udp +packets following an UFO enqueued packet need also be handled by UFO" +(2811ebac2521ceac84f2bdae402455baa6a7fb47). + +Signed-off-by: Hannes Frederic Sowa +Cc: Jiri Pirko +Cc: Eric Dumazet +Cc: David Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/skbuff.h | 5 +++++ + net/ipv4/ip_output.c | 2 +- + net/ipv6/ip6_output.c | 2 +- + 3 files changed, 7 insertions(+), 2 deletions(-) + +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -1308,6 +1308,11 @@ static inline int skb_pagelen(const stru + return len + skb_headlen(skb); + } + ++static inline bool skb_has_frags(const struct sk_buff *skb) ++{ ++ return skb_shinfo(skb)->nr_frags; ++} ++ + /** + * __skb_fill_page_desc - initialise a paged fragment in an skb + * @skb: buffer containing fragment to be initialised +--- a/net/ipv4/ip_output.c ++++ b/net/ipv4/ip_output.c +@@ -844,7 +844,7 @@ static int __ip_append_data(struct sock + csummode = CHECKSUM_PARTIAL; + + cork->length += length; +- if (((length > mtu) || (skb && skb_is_gso(skb))) && ++ if (((length > mtu) || (skb && skb_has_frags(skb))) && + (sk->sk_protocol == IPPROTO_UDP) && + (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len) { + err = ip_ufo_append_data(sk, queue, getfrag, from, length, +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -1250,7 +1250,7 @@ int ip6_append_data(struct sock *sk, int + skb = skb_peek_tail(&sk->sk_write_queue); + cork->length += length; + if (((length > mtu) || +- (skb && skb_is_gso(skb))) && ++ (skb && skb_has_frags(skb))) && + (sk->sk_protocol == IPPROTO_UDP) && + (rt->dst.dev->features & NETIF_F_UFO)) { + err = ip6_ufo_append_data(sk, getfrag, from, length, diff --git a/queue-3.10/ipv4-fix-ineffective-source-address-selection.patch b/queue-3.10/ipv4-fix-ineffective-source-address-selection.patch new file mode 100644 index 00000000000..383a5297a12 --- /dev/null +++ b/queue-3.10/ipv4-fix-ineffective-source-address-selection.patch @@ -0,0 +1,33 @@ +From 1f5ac4303b598cfd40fa73284ed59c5ac7cc3cc1 Mon Sep 17 00:00:00 2001 +From: Jiri Benc +Date: Fri, 4 Oct 2013 17:04:48 +0200 +Subject: ipv4: fix ineffective source address selection + +From: Jiri Benc + +[ Upstream commit 0a7e22609067ff524fc7bbd45c6951dd08561667 ] + +When sending out multicast messages, the source address in inet->mc_addr is +ignored and rewritten by an autoselected one. This is caused by a typo in +commit 813b3b5db831 ("ipv4: Use caller's on-stack flowi as-is in output +route lookups"). + +Signed-off-by: Jiri Benc +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/route.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -2020,7 +2020,7 @@ struct rtable *__ip_route_output_key(str + RT_SCOPE_LINK); + goto make_route; + } +- if (fl4->saddr) { ++ if (!fl4->saddr) { + if (ipv4_is_multicast(fl4->daddr)) + fl4->saddr = inet_select_addr(dev_out, 0, + fl4->flowi4_scope); diff --git a/queue-3.10/ipv6-always-prefer-rt6i_gateway-if-present.patch b/queue-3.10/ipv6-always-prefer-rt6i_gateway-if-present.patch new file mode 100644 index 00000000000..e1561cdabd4 --- /dev/null +++ b/queue-3.10/ipv6-always-prefer-rt6i_gateway-if-present.patch @@ -0,0 +1,53 @@ +From 00e98866626ff457d664087b16c5acd2599be85a Mon Sep 17 00:00:00 2001 +From: Julian Anastasov +Date: Sun, 20 Oct 2013 15:43:03 +0300 +Subject: ipv6: always prefer rt6i_gateway if present + +From: Julian Anastasov + +[ Upstream commit 96dc809514fb2328605198a0602b67554d8cce7b ] + +In v3.9 6fd6ce2056de2709 ("ipv6: Do not depend on rt->n in +ip6_finish_output2()." changed the behaviour of ip6_finish_output2() +such that the recently introduced rt6_nexthop() is used +instead of an assigned neighbor. + +As rt6_nexthop() prefers rt6i_gateway only for gatewayed +routes this causes a problem for users like IPVS, xt_TEE and +RAW(hdrincl) if they want to use different address for routing +compared to the destination address. + +Another case is when redirect can create RTF_DYNAMIC +route without RTF_GATEWAY flag, we ignore the rt6i_gateway +in rt6_nexthop(). + +Fix the above problems by considering the rt6i_gateway if +present, so that traffic routed to address on local subnet is +not wrongly diverted to the destination address. + +Thanks to Simon Horman and Phil Oester for spotting the +problematic commit. + +Thanks to Hannes Frederic Sowa for his review and help in testing. + +Reported-by: Phil Oester +Reported-by: Mark Brooks +Signed-off-by: Julian Anastasov +Acked-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/ip6_route.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/include/net/ip6_route.h ++++ b/include/net/ip6_route.h +@@ -196,7 +196,7 @@ static inline int ip6_skb_dst_mtu(struct + + static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt, struct in6_addr *dest) + { +- if (rt->rt6i_flags & RTF_GATEWAY) ++ if (rt->rt6i_flags & RTF_GATEWAY || !ipv6_addr_any(&rt->rt6i_gateway)) + return &rt->rt6i_gateway; + return dest; + } diff --git a/queue-3.10/ipv6-fill-rt6i_gateway-with-nexthop-address.patch b/queue-3.10/ipv6-fill-rt6i_gateway-with-nexthop-address.patch new file mode 100644 index 00000000000..f69d8e98eb0 --- /dev/null +++ b/queue-3.10/ipv6-fill-rt6i_gateway-with-nexthop-address.patch @@ -0,0 +1,101 @@ +From 0e2569b8d8d11c12c3406680f417bf145ffc8258 Mon Sep 17 00:00:00 2001 +From: Julian Anastasov +Date: Sun, 20 Oct 2013 15:43:04 +0300 +Subject: ipv6: fill rt6i_gateway with nexthop address + +From: Julian Anastasov + +[ Upstream commit 550bab42f83308c9d6ab04a980cc4333cef1c8fa ] + +Make sure rt6i_gateway contains nexthop information in +all routes returned from lookup or when routes are directly +attached to skb for generated ICMP packets. + +The effect of this patch should be a faster version of +rt6_nexthop() and the consideration of local addresses as +nexthop. + +Signed-off-by: Julian Anastasov +Acked-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/ip6_route.h | 6 ++---- + net/ipv6/ip6_output.c | 4 ++-- + net/ipv6/route.c | 8 ++++++-- + 3 files changed, 10 insertions(+), 8 deletions(-) + +--- a/include/net/ip6_route.h ++++ b/include/net/ip6_route.h +@@ -194,11 +194,9 @@ static inline int ip6_skb_dst_mtu(struct + skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); + } + +-static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt, struct in6_addr *dest) ++static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt) + { +- if (rt->rt6i_flags & RTF_GATEWAY || !ipv6_addr_any(&rt->rt6i_gateway)) +- return &rt->rt6i_gateway; +- return dest; ++ return &rt->rt6i_gateway; + } + + #endif +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -130,7 +130,7 @@ static int ip6_finish_output2(struct sk_ + } + + rcu_read_lock_bh(); +- nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); ++ nexthop = rt6_nexthop((struct rt6_info *)dst); + neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); + if (unlikely(!neigh)) + neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); +@@ -898,7 +898,7 @@ static int ip6_dst_lookup_tail(struct so + */ + rt = (struct rt6_info *) *dst; + rcu_read_lock_bh(); +- n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr)); ++ n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt)); + err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; + rcu_read_unlock_bh(); + +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -848,7 +848,6 @@ static struct rt6_info *rt6_alloc_cow(st + if (ort->rt6i_dst.plen != 128 && + ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) + rt->rt6i_flags |= RTF_ANYCAST; +- rt->rt6i_gateway = *daddr; + } + + rt->rt6i_flags |= RTF_CACHE; +@@ -1245,6 +1244,7 @@ struct dst_entry *icmp6_dst_alloc(struct + rt->dst.flags |= DST_HOST; + rt->dst.output = ip6_output; + atomic_set(&rt->dst.__refcnt, 1); ++ rt->rt6i_gateway = fl6->daddr; + rt->rt6i_dst.addr = fl6->daddr; + rt->rt6i_dst.plen = 128; + rt->rt6i_idev = idev; +@@ -1801,7 +1801,10 @@ static struct rt6_info *ip6_rt_copy(stru + in6_dev_hold(rt->rt6i_idev); + rt->dst.lastuse = jiffies; + +- rt->rt6i_gateway = ort->rt6i_gateway; ++ if (ort->rt6i_flags & RTF_GATEWAY) ++ rt->rt6i_gateway = ort->rt6i_gateway; ++ else ++ rt->rt6i_gateway = *dest; + rt->rt6i_flags = ort->rt6i_flags; + if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) == + (RTF_DEFAULT | RTF_ADDRCONF)) +@@ -2088,6 +2091,7 @@ struct rt6_info *addrconf_dst_alloc(stru + else + rt->rt6i_flags |= RTF_LOCAL; + ++ rt->rt6i_gateway = *addr; + rt->rt6i_dst.addr = *addr; + rt->rt6i_dst.plen = 128; + rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); diff --git a/queue-3.10/ipv6-probe-routes-asynchronous-in-rt6_probe.patch b/queue-3.10/ipv6-probe-routes-asynchronous-in-rt6_probe.patch new file mode 100644 index 00000000000..137d5a4bc09 --- /dev/null +++ b/queue-3.10/ipv6-probe-routes-asynchronous-in-rt6_probe.patch @@ -0,0 +1,82 @@ +From 769af606834de4263e33410c40f60f60244b2466 Mon Sep 17 00:00:00 2001 +From: Hannes Frederic Sowa +Date: Mon, 21 Oct 2013 06:17:15 +0200 +Subject: ipv6: probe routes asynchronous in rt6_probe + +From: Hannes Frederic Sowa + +[ Upstream commit c2f17e827b419918c856131f592df9521e1a38e3 ] + +Routes need to be probed asynchronous otherwise the call stack gets +exhausted when the kernel attemps to deliver another skb inline, like +e.g. xt_TEE does, and we probe at the same time. + +We update neigh->updated still at once, otherwise we would send to +many probes. + +Cc: Julian Anastasov +Signed-off-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/route.c | 38 +++++++++++++++++++++++++++++++------- + 1 file changed, 31 insertions(+), 7 deletions(-) + +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -473,6 +473,24 @@ out: + } + + #ifdef CONFIG_IPV6_ROUTER_PREF ++struct __rt6_probe_work { ++ struct work_struct work; ++ struct in6_addr target; ++ struct net_device *dev; ++}; ++ ++static void rt6_probe_deferred(struct work_struct *w) ++{ ++ struct in6_addr mcaddr; ++ struct __rt6_probe_work *work = ++ container_of(w, struct __rt6_probe_work, work); ++ ++ addrconf_addr_solict_mult(&work->target, &mcaddr); ++ ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL); ++ dev_put(work->dev); ++ kfree(w); ++} ++ + static void rt6_probe(struct rt6_info *rt) + { + struct neighbour *neigh; +@@ -496,17 +514,23 @@ static void rt6_probe(struct rt6_info *r + + if (!neigh || + time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { +- struct in6_addr mcaddr; +- struct in6_addr *target; ++ struct __rt6_probe_work *work; + +- if (neigh) { ++ work = kmalloc(sizeof(*work), GFP_ATOMIC); ++ ++ if (neigh && work) + neigh->updated = jiffies; ++ ++ if (neigh) + write_unlock(&neigh->lock); +- } + +- target = (struct in6_addr *)&rt->rt6i_gateway; +- addrconf_addr_solict_mult(target, &mcaddr); +- ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL); ++ if (work) { ++ INIT_WORK(&work->work, rt6_probe_deferred); ++ work->target = rt->rt6i_gateway; ++ dev_hold(rt->dst.dev); ++ work->dev = rt->dst.dev; ++ schedule_work(&work->work); ++ } + } else { + out: + write_unlock(&neigh->lock); diff --git a/queue-3.10/l2tp-fix-build-warning-with-ipv6-disabled.patch b/queue-3.10/l2tp-fix-build-warning-with-ipv6-disabled.patch new file mode 100644 index 00000000000..5ada8ed8fdb --- /dev/null +++ b/queue-3.10/l2tp-fix-build-warning-with-ipv6-disabled.patch @@ -0,0 +1,73 @@ +From ab0191a681ea440154e2919ab57f7ffa881ab22e Mon Sep 17 00:00:00 2001 +From: "David S. Miller" +Date: Tue, 8 Oct 2013 15:44:26 -0400 +Subject: l2tp: Fix build warning with ipv6 disabled. + +From: "David S. Miller" + +[ Upstream commit 8d8a51e26a6d415e1470759f2cf5f3ee3ee86196 ] + +net/l2tp/l2tp_core.c: In function ‘l2tp_verify_udp_checksum’: +net/l2tp/l2tp_core.c:499:22: warning: unused variable ‘tunnel’ [-Wunused-variable] + +Create a helper "l2tp_tunnel()" to facilitate this, and as a side +effect get rid of a bunch of unnecessary void pointer casts. + +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/l2tp/l2tp_core.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +--- a/net/l2tp/l2tp_core.c ++++ b/net/l2tp/l2tp_core.c +@@ -115,6 +115,11 @@ struct l2tp_net { + static void l2tp_session_set_header_len(struct l2tp_session *session, int version); + static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel); + ++static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk) ++{ ++ return sk->sk_user_data; ++} ++ + static inline struct l2tp_net *l2tp_pernet(struct net *net) + { + BUG_ON(!net); +@@ -499,7 +504,6 @@ out: + static inline int l2tp_verify_udp_checksum(struct sock *sk, + struct sk_buff *skb) + { +- struct l2tp_tunnel *tunnel = (struct l2tp_tunnel *)sk->sk_user_data; + struct udphdr *uh = udp_hdr(skb); + u16 ulen = ntohs(uh->len); + __wsum psum; +@@ -508,7 +512,7 @@ static inline int l2tp_verify_udp_checks + return 0; + + #if IS_ENABLED(CONFIG_IPV6) +- if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) { ++ if (sk->sk_family == PF_INET6 && !l2tp_tunnel(sk)->v4mapped) { + if (!uh->check) { + LIMIT_NETDEBUG(KERN_INFO "L2TP: IPv6: checksum is 0\n"); + return 1; +@@ -1248,10 +1252,9 @@ EXPORT_SYMBOL_GPL(l2tp_xmit_skb); + */ + static void l2tp_tunnel_destruct(struct sock *sk) + { +- struct l2tp_tunnel *tunnel; ++ struct l2tp_tunnel *tunnel = l2tp_tunnel(sk); + struct l2tp_net *pn; + +- tunnel = sk->sk_user_data; + if (tunnel == NULL) + goto end; + +@@ -1619,7 +1622,7 @@ int l2tp_tunnel_create(struct net *net, + } + + /* Check if this socket has already been prepped */ +- tunnel = (struct l2tp_tunnel *)sk->sk_user_data; ++ tunnel = l2tp_tunnel(sk); + if (tunnel != NULL) { + /* This socket has already been prepped */ + err = -EBUSY; diff --git a/queue-3.10/l2tp-fix-kernel-panic-when-using-ipv4-mapped-ipv6-addresses.patch b/queue-3.10/l2tp-fix-kernel-panic-when-using-ipv4-mapped-ipv6-addresses.patch new file mode 100644 index 00000000000..19fc48da1d0 --- /dev/null +++ b/queue-3.10/l2tp-fix-kernel-panic-when-using-ipv4-mapped-ipv6-addresses.patch @@ -0,0 +1,141 @@ +From 28a35005a423a72f6864289d39afdb3095014fed Mon Sep 17 00:00:00 2001 +From: François CACHEREUL +Date: Wed, 2 Oct 2013 10:16:02 +0200 +Subject: l2tp: fix kernel panic when using IPv4-mapped IPv6 addresses + +From: François CACHEREUL + +[ Upstream commit e18503f41f9b12132c95d7c31ca6ee5155e44e5c ] + +IPv4 mapped addresses cause kernel panic. +The patch juste check whether the IPv6 address is an IPv4 mapped +address. If so, use IPv4 API instead of IPv6. + +[ 940.026915] general protection fault: 0000 [#1] +[ 940.026915] Modules linked in: l2tp_ppp l2tp_netlink l2tp_core pppox ppp_generic slhc loop psmouse +[ 940.026915] CPU: 0 PID: 3184 Comm: memcheck-amd64- Not tainted 3.11.0+ #1 +[ 940.026915] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 +[ 940.026915] task: ffff880007130e20 ti: ffff88000737e000 task.ti: ffff88000737e000 +[ 940.026915] RIP: 0010:[] [] ip6_xmit+0x276/0x326 +[ 940.026915] RSP: 0018:ffff88000737fd28 EFLAGS: 00010286 +[ 940.026915] RAX: c748521a75ceff48 RBX: ffff880000c30800 RCX: 0000000000000000 +[ 940.026915] RDX: ffff88000075cc4e RSI: 0000000000000028 RDI: ffff8800060e5a40 +[ 940.026915] RBP: ffff8800060e5a40 R08: 0000000000000000 R09: ffff88000075cc90 +[ 940.026915] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88000737fda0 +[ 940.026915] R13: 0000000000000000 R14: 0000000000002000 R15: ffff880005d3b580 +[ 940.026915] FS: 00007f163dc5e800(0000) GS:ffffffff81623000(0000) knlGS:0000000000000000 +[ 940.026915] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 940.026915] CR2: 00000004032dc940 CR3: 0000000005c25000 CR4: 00000000000006f0 +[ 940.026915] Stack: +[ 940.026915] ffff88000075cc4e ffffffff81694e90 ffff880000c30b38 0000000000000020 +[ 940.026915] 11000000523c4bac ffff88000737fdb4 0000000000000000 ffff880000c30800 +[ 940.026915] ffff880005d3b580 ffff880000c30b38 ffff8800060e5a40 0000000000000020 +[ 940.026915] Call Trace: +[ 940.026915] [] ? inet6_csk_xmit+0xa4/0xc4 +[ 940.026915] [] ? l2tp_xmit_skb+0x503/0x55a [l2tp_core] +[ 940.026915] [] ? pskb_expand_head+0x161/0x214 +[ 940.026915] [] ? pppol2tp_xmit+0xf2/0x143 [l2tp_ppp] +[ 940.026915] [] ? ppp_channel_push+0x36/0x8b [ppp_generic] +[ 940.026915] [] ? ppp_write+0xaf/0xc5 [ppp_generic] +[ 940.026915] [] ? vfs_write+0xa2/0x106 +[ 940.026915] [] ? SyS_write+0x56/0x8a +[ 940.026915] [] ? system_call_fastpath+0x16/0x1b +[ 940.026915] Code: 00 49 8b 8f d8 00 00 00 66 83 7c 11 02 00 74 60 49 +8b 47 58 48 83 e0 fe 48 8b 80 18 01 00 00 48 85 c0 74 13 48 8b 80 78 02 +00 00 <48> ff 40 28 41 8b 57 68 48 01 50 30 48 8b 54 24 08 49 c7 c1 51 +[ 940.026915] RIP [] ip6_xmit+0x276/0x326 +[ 940.026915] RSP +[ 940.057945] ---[ end trace be8aba9a61c8b7f3 ]--- +[ 940.058583] Kernel panic - not syncing: Fatal exception in interrupt + +Signed-off-by: François CACHEREUL +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/l2tp/l2tp_core.c | 27 +++++++++++++++++++++++---- + net/l2tp/l2tp_core.h | 3 +++ + 2 files changed, 26 insertions(+), 4 deletions(-) + +--- a/net/l2tp/l2tp_core.c ++++ b/net/l2tp/l2tp_core.c +@@ -499,6 +499,7 @@ out: + static inline int l2tp_verify_udp_checksum(struct sock *sk, + struct sk_buff *skb) + { ++ struct l2tp_tunnel *tunnel = (struct l2tp_tunnel *)sk->sk_user_data; + struct udphdr *uh = udp_hdr(skb); + u16 ulen = ntohs(uh->len); + __wsum psum; +@@ -507,7 +508,7 @@ static inline int l2tp_verify_udp_checks + return 0; + + #if IS_ENABLED(CONFIG_IPV6) +- if (sk->sk_family == PF_INET6) { ++ if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) { + if (!uh->check) { + LIMIT_NETDEBUG(KERN_INFO "L2TP: IPv6: checksum is 0\n"); + return 1; +@@ -1071,7 +1072,7 @@ static int l2tp_xmit_core(struct l2tp_se + /* Queue the packet to IP for output */ + skb->local_df = 1; + #if IS_ENABLED(CONFIG_IPV6) +- if (skb->sk->sk_family == PF_INET6) ++ if (skb->sk->sk_family == PF_INET6 && !tunnel->v4mapped) + error = inet6_csk_xmit(skb, NULL); + else + #endif +@@ -1198,7 +1199,7 @@ int l2tp_xmit_skb(struct l2tp_session *s + + /* Calculate UDP checksum if configured to do so */ + #if IS_ENABLED(CONFIG_IPV6) +- if (sk->sk_family == PF_INET6) ++ if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) + l2tp_xmit_ipv6_csum(sk, skb, udp_len); + else + #endif +@@ -1647,6 +1648,24 @@ int l2tp_tunnel_create(struct net *net, + if (cfg != NULL) + tunnel->debug = cfg->debug; + ++#if IS_ENABLED(CONFIG_IPV6) ++ if (sk->sk_family == PF_INET6) { ++ struct ipv6_pinfo *np = inet6_sk(sk); ++ ++ if (ipv6_addr_v4mapped(&np->saddr) && ++ ipv6_addr_v4mapped(&np->daddr)) { ++ struct inet_sock *inet = inet_sk(sk); ++ ++ tunnel->v4mapped = true; ++ inet->inet_saddr = np->saddr.s6_addr32[3]; ++ inet->inet_rcv_saddr = np->rcv_saddr.s6_addr32[3]; ++ inet->inet_daddr = np->daddr.s6_addr32[3]; ++ } else { ++ tunnel->v4mapped = false; ++ } ++ } ++#endif ++ + /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */ + tunnel->encap = encap; + if (encap == L2TP_ENCAPTYPE_UDP) { +@@ -1655,7 +1674,7 @@ int l2tp_tunnel_create(struct net *net, + udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv; + udp_sk(sk)->encap_destroy = l2tp_udp_encap_destroy; + #if IS_ENABLED(CONFIG_IPV6) +- if (sk->sk_family == PF_INET6) ++ if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) + udpv6_encap_enable(); + else + #endif +--- a/net/l2tp/l2tp_core.h ++++ b/net/l2tp/l2tp_core.h +@@ -189,6 +189,9 @@ struct l2tp_tunnel { + struct sock *sock; /* Parent socket */ + int fd; /* Parent fd, if tunnel socket + * was created by userspace */ ++#if IS_ENABLED(CONFIG_IPV6) ++ bool v4mapped; ++#endif + + struct work_struct del_work; + diff --git a/queue-3.10/l2tp-must-disable-bh-before-calling-l2tp_xmit_skb.patch b/queue-3.10/l2tp-must-disable-bh-before-calling-l2tp_xmit_skb.patch new file mode 100644 index 00000000000..97b0067593d --- /dev/null +++ b/queue-3.10/l2tp-must-disable-bh-before-calling-l2tp_xmit_skb.patch @@ -0,0 +1,201 @@ +From daac285a84ceab3b38ab5dbb16228cd6b99469ec Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Thu, 10 Oct 2013 06:30:09 -0700 +Subject: l2tp: must disable bh before calling l2tp_xmit_skb() + +From: Eric Dumazet + +[ Upstream commit 455cc32bf128e114455d11ad919321ab89a2c312 ] + +François Cachereul made a very nice bug report and suspected +the bh_lock_sock() / bh_unlok_sock() pair used in l2tp_xmit_skb() from +process context was not good. + +This problem was added by commit 6af88da14ee284aaad6e4326da09a89191ab6165 +("l2tp: Fix locking in l2tp_core.c"). + +l2tp_eth_dev_xmit() runs from BH context, so we must disable BH +from other l2tp_xmit_skb() users. + +[ 452.060011] BUG: soft lockup - CPU#1 stuck for 23s! [accel-pppd:6662] +[ 452.061757] Modules linked in: l2tp_ppp l2tp_netlink l2tp_core pppoe pppox +ppp_generic slhc ipv6 ext3 mbcache jbd virtio_balloon xfs exportfs dm_mod +virtio_blk ata_generic virtio_net floppy ata_piix libata virtio_pci virtio_ring virtio [last unloaded: scsi_wait_scan] +[ 452.064012] CPU 1 +[ 452.080015] BUG: soft lockup - CPU#2 stuck for 23s! [accel-pppd:6643] +[ 452.080015] CPU 2 +[ 452.080015] +[ 452.080015] Pid: 6643, comm: accel-pppd Not tainted 3.2.46.mini #1 Bochs Bochs +[ 452.080015] RIP: 0010:[] [] do_raw_spin_lock+0x17/0x1f +[ 452.080015] RSP: 0018:ffff88007125fc18 EFLAGS: 00000293 +[ 452.080015] RAX: 000000000000aba9 RBX: ffffffff811d0703 RCX: 0000000000000000 +[ 452.080015] RDX: 00000000000000ab RSI: ffff8800711f6896 RDI: ffff8800745c8110 +[ 452.080015] RBP: ffff88007125fc18 R08: 0000000000000020 R09: 0000000000000000 +[ 452.080015] R10: 0000000000000000 R11: 0000000000000280 R12: 0000000000000286 +[ 452.080015] R13: 0000000000000020 R14: 0000000000000240 R15: 0000000000000000 +[ 452.080015] FS: 00007fdc0cc24700(0000) GS:ffff8800b6f00000(0000) knlGS:0000000000000000 +[ 452.080015] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 452.080015] CR2: 00007fdb054899b8 CR3: 0000000074404000 CR4: 00000000000006a0 +[ 452.080015] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[ 452.080015] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 +[ 452.080015] Process accel-pppd (pid: 6643, threadinfo ffff88007125e000, task ffff8800b27e6dd0) +[ 452.080015] Stack: +[ 452.080015] ffff88007125fc28 ffffffff81256559 ffff88007125fc98 ffffffffa01b2bd1 +[ 452.080015] ffff88007125fc58 000000000000000c 00000000029490d0 0000009c71dbe25e +[ 452.080015] 000000000000005c 000000080000000e 0000000000000000 ffff880071170600 +[ 452.080015] Call Trace: +[ 452.080015] [] _raw_spin_lock+0xe/0x10 +[ 452.080015] [] l2tp_xmit_skb+0x189/0x4ac [l2tp_core] +[ 452.080015] [] pppol2tp_sendmsg+0x15e/0x19c [l2tp_ppp] +[ 452.080015] [] __sock_sendmsg_nosec+0x22/0x24 +[ 452.080015] [] sock_sendmsg+0xa1/0xb6 +[ 452.080015] [] ? __schedule+0x5c1/0x616 +[ 452.080015] [] ? __dequeue_signal+0xb7/0x10c +[ 452.080015] [] ? fget_light+0x75/0x89 +[ 452.080015] [] ? sockfd_lookup_light+0x20/0x56 +[ 452.080015] [] sys_sendto+0x10c/0x13b +[ 452.080015] [] system_call_fastpath+0x16/0x1b +[ 452.080015] Code: 81 48 89 e5 72 0c 31 c0 48 81 ff 45 66 25 81 0f 92 c0 5d c3 55 b8 00 01 00 00 48 89 e5 f0 66 0f c1 07 0f b6 d4 38 d0 74 06 f3 90 <8a> 07 eb f6 5d c3 90 90 55 48 89 e5 9c 58 0f 1f 44 00 00 5d c3 +[ 452.080015] Call Trace: +[ 452.080015] [] _raw_spin_lock+0xe/0x10 +[ 452.080015] [] l2tp_xmit_skb+0x189/0x4ac [l2tp_core] +[ 452.080015] [] pppol2tp_sendmsg+0x15e/0x19c [l2tp_ppp] +[ 452.080015] [] __sock_sendmsg_nosec+0x22/0x24 +[ 452.080015] [] sock_sendmsg+0xa1/0xb6 +[ 452.080015] [] ? __schedule+0x5c1/0x616 +[ 452.080015] [] ? __dequeue_signal+0xb7/0x10c +[ 452.080015] [] ? fget_light+0x75/0x89 +[ 452.080015] [] ? sockfd_lookup_light+0x20/0x56 +[ 452.080015] [] sys_sendto+0x10c/0x13b +[ 452.080015] [] system_call_fastpath+0x16/0x1b +[ 452.064012] +[ 452.064012] Pid: 6662, comm: accel-pppd Not tainted 3.2.46.mini #1 Bochs Bochs +[ 452.064012] RIP: 0010:[] [] do_raw_spin_lock+0x19/0x1f +[ 452.064012] RSP: 0018:ffff8800b6e83ba0 EFLAGS: 00000297 +[ 452.064012] RAX: 000000000000aaa9 RBX: ffff8800b6e83b40 RCX: 0000000000000002 +[ 452.064012] RDX: 00000000000000aa RSI: 000000000000000a RDI: ffff8800745c8110 +[ 452.064012] RBP: ffff8800b6e83ba0 R08: 000000000000c802 R09: 000000000000001c +[ 452.064012] R10: ffff880071096c4e R11: 0000000000000006 R12: ffff8800b6e83b18 +[ 452.064012] R13: ffffffff8125d51e R14: ffff8800b6e83ba0 R15: ffff880072a589c0 +[ 452.064012] FS: 00007fdc0b81e700(0000) GS:ffff8800b6e80000(0000) knlGS:0000000000000000 +[ 452.064012] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 452.064012] CR2: 0000000000625208 CR3: 0000000074404000 CR4: 00000000000006a0 +[ 452.064012] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[ 452.064012] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 +[ 452.064012] Process accel-pppd (pid: 6662, threadinfo ffff88007129a000, task ffff8800744f7410) +[ 452.064012] Stack: +[ 452.064012] ffff8800b6e83bb0 ffffffff81256559 ffff8800b6e83bc0 ffffffff8121c64a +[ 452.064012] ffff8800b6e83bf0 ffffffff8121ec7a ffff880072a589c0 ffff880071096c62 +[ 452.064012] 0000000000000011 ffffffff81430024 ffff8800b6e83c80 ffffffff8121f276 +[ 452.064012] Call Trace: +[ 452.064012] +[ 452.064012] [] _raw_spin_lock+0xe/0x10 +[ 452.064012] [] spin_lock+0x9/0xb +[ 452.064012] [] udp_queue_rcv_skb+0x186/0x269 +[ 452.064012] [] __udp4_lib_rcv+0x297/0x4ae +[ 452.064012] [] ? raw_rcv+0xe9/0xf0 +[ 452.064012] [] udp_rcv+0x1a/0x1c +[ 452.064012] [] ip_local_deliver_finish+0x12b/0x1a5 +[ 452.064012] [] ip_local_deliver+0x53/0x84 +[ 452.064012] [] ip_rcv_finish+0x2bc/0x2f3 +[ 452.064012] [] ip_rcv+0x210/0x269 +[ 452.064012] [] ? kvm_clock_get_cycles+0x9/0xb +[ 452.064012] [] __netif_receive_skb+0x3a5/0x3f7 +[ 452.064012] [] netif_receive_skb+0x57/0x5e +[ 452.064012] [] ? __netdev_alloc_skb+0x1f/0x3b +[ 452.064012] [] virtnet_poll+0x4ba/0x5a4 [virtio_net] +[ 452.064012] [] net_rx_action+0x73/0x184 +[ 452.064012] [] ? l2tp_xmit_skb+0x27a/0x4ac [l2tp_core] +[ 452.064012] [] __do_softirq+0xc3/0x1a8 +[ 452.064012] [] ? ack_APIC_irq+0x10/0x12 +[ 452.064012] [] ? _raw_spin_lock+0xe/0x10 +[ 452.064012] [] call_softirq+0x1c/0x26 +[ 452.064012] [] do_softirq+0x45/0x82 +[ 452.064012] [] irq_exit+0x42/0x9c +[ 452.064012] [] do_IRQ+0x8e/0xa5 +[ 452.064012] [] common_interrupt+0x6e/0x6e +[ 452.064012] +[ 452.064012] [] ? kfree+0x8a/0xa3 +[ 452.064012] [] ? l2tp_xmit_skb+0x27a/0x4ac [l2tp_core] +[ 452.064012] [] ? l2tp_xmit_skb+0x1dd/0x4ac [l2tp_core] +[ 452.064012] [] pppol2tp_sendmsg+0x15e/0x19c [l2tp_ppp] +[ 452.064012] [] __sock_sendmsg_nosec+0x22/0x24 +[ 452.064012] [] sock_sendmsg+0xa1/0xb6 +[ 452.064012] [] ? __schedule+0x5c1/0x616 +[ 452.064012] [] ? __dequeue_signal+0xb7/0x10c +[ 452.064012] [] ? fget_light+0x75/0x89 +[ 452.064012] [] ? sockfd_lookup_light+0x20/0x56 +[ 452.064012] [] sys_sendto+0x10c/0x13b +[ 452.064012] [] system_call_fastpath+0x16/0x1b +[ 452.064012] Code: 89 e5 72 0c 31 c0 48 81 ff 45 66 25 81 0f 92 c0 5d c3 55 b8 00 01 00 00 48 89 e5 f0 66 0f c1 07 0f b6 d4 38 d0 74 06 f3 90 8a 07 f6 5d c3 90 90 55 48 89 e5 9c 58 0f 1f 44 00 00 5d c3 55 48 +[ 452.064012] Call Trace: +[ 452.064012] [] _raw_spin_lock+0xe/0x10 +[ 452.064012] [] spin_lock+0x9/0xb +[ 452.064012] [] udp_queue_rcv_skb+0x186/0x269 +[ 452.064012] [] __udp4_lib_rcv+0x297/0x4ae +[ 452.064012] [] ? raw_rcv+0xe9/0xf0 +[ 452.064012] [] udp_rcv+0x1a/0x1c +[ 452.064012] [] ip_local_deliver_finish+0x12b/0x1a5 +[ 452.064012] [] ip_local_deliver+0x53/0x84 +[ 452.064012] [] ip_rcv_finish+0x2bc/0x2f3 +[ 452.064012] [] ip_rcv+0x210/0x269 +[ 452.064012] [] ? kvm_clock_get_cycles+0x9/0xb +[ 452.064012] [] __netif_receive_skb+0x3a5/0x3f7 +[ 452.064012] [] netif_receive_skb+0x57/0x5e +[ 452.064012] [] ? __netdev_alloc_skb+0x1f/0x3b +[ 452.064012] [] virtnet_poll+0x4ba/0x5a4 [virtio_net] +[ 452.064012] [] net_rx_action+0x73/0x184 +[ 452.064012] [] ? l2tp_xmit_skb+0x27a/0x4ac [l2tp_core] +[ 452.064012] [] __do_softirq+0xc3/0x1a8 +[ 452.064012] [] ? ack_APIC_irq+0x10/0x12 +[ 452.064012] [] ? _raw_spin_lock+0xe/0x10 +[ 452.064012] [] call_softirq+0x1c/0x26 +[ 452.064012] [] do_softirq+0x45/0x82 +[ 452.064012] [] irq_exit+0x42/0x9c +[ 452.064012] [] do_IRQ+0x8e/0xa5 +[ 452.064012] [] common_interrupt+0x6e/0x6e +[ 452.064012] [] ? kfree+0x8a/0xa3 +[ 452.064012] [] ? l2tp_xmit_skb+0x27a/0x4ac [l2tp_core] +[ 452.064012] [] ? l2tp_xmit_skb+0x1dd/0x4ac [l2tp_core] +[ 452.064012] [] pppol2tp_sendmsg+0x15e/0x19c [l2tp_ppp] +[ 452.064012] [] __sock_sendmsg_nosec+0x22/0x24 +[ 452.064012] [] sock_sendmsg+0xa1/0xb6 +[ 452.064012] [] ? __schedule+0x5c1/0x616 +[ 452.064012] [] ? __dequeue_signal+0xb7/0x10c +[ 452.064012] [] ? fget_light+0x75/0x89 +[ 452.064012] [] ? sockfd_lookup_light+0x20/0x56 +[ 452.064012] [] sys_sendto+0x10c/0x13b +[ 452.064012] [] system_call_fastpath+0x16/0x1b + +Reported-by: François Cachereul +Tested-by: François Cachereul +Signed-off-by: Eric Dumazet +Cc: James Chapman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/l2tp/l2tp_ppp.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/net/l2tp/l2tp_ppp.c ++++ b/net/l2tp/l2tp_ppp.c +@@ -353,7 +353,9 @@ static int pppol2tp_sendmsg(struct kiocb + goto error_put_sess_tun; + } + ++ local_bh_disable(); + l2tp_xmit_skb(session, skb, session->hdr_len); ++ local_bh_enable(); + + sock_put(ps->tunnel_sock); + sock_put(sk); +@@ -422,7 +424,9 @@ static int pppol2tp_xmit(struct ppp_chan + skb->data[0] = ppph[0]; + skb->data[1] = ppph[1]; + ++ local_bh_disable(); + l2tp_xmit_skb(session, skb, session->hdr_len); ++ local_bh_enable(); + + sock_put(sk_tun); + sock_put(sk); diff --git a/queue-3.10/net-do-not-call-sock_put-on-timewait-sockets.patch b/queue-3.10/net-do-not-call-sock_put-on-timewait-sockets.patch new file mode 100644 index 00000000000..517a5317593 --- /dev/null +++ b/queue-3.10/net-do-not-call-sock_put-on-timewait-sockets.patch @@ -0,0 +1,44 @@ +From fd536bb144371fdc29680abdb3ad9984008eefde Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Tue, 1 Oct 2013 21:04:11 -0700 +Subject: net: do not call sock_put() on TIMEWAIT sockets + +From: Eric Dumazet + +[ Upstream commit 80ad1d61e72d626e30ebe8529a0455e660ca4693 ] + +commit 3ab5aee7fe84 ("net: Convert TCP & DCCP hash tables to use RCU / +hlist_nulls") incorrectly used sock_put() on TIMEWAIT sockets. + +We should instead use inet_twsk_put() + +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/inet_hashtables.c | 2 +- + net/ipv6/inet6_hashtables.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +--- a/net/ipv4/inet_hashtables.c ++++ b/net/ipv4/inet_hashtables.c +@@ -287,7 +287,7 @@ begintw: + if (unlikely(!INET_TW_MATCH(sk, net, acookie, + saddr, daddr, ports, + dif))) { +- sock_put(sk); ++ inet_twsk_put(inet_twsk(sk)); + goto begintw; + } + goto out; +--- a/net/ipv6/inet6_hashtables.c ++++ b/net/ipv6/inet6_hashtables.c +@@ -116,7 +116,7 @@ begintw: + } + if (unlikely(!INET6_TW_MATCH(sk, net, saddr, daddr, + ports, dif))) { +- sock_put(sk); ++ inet_twsk_put(inet_twsk(sk)); + goto begintw; + } + goto out; diff --git a/queue-3.10/net-dst-provide-accessor-function-to-dst-xfrm.patch b/queue-3.10/net-dst-provide-accessor-function-to-dst-xfrm.patch new file mode 100644 index 00000000000..56b7281db0d --- /dev/null +++ b/queue-3.10/net-dst-provide-accessor-function-to-dst-xfrm.patch @@ -0,0 +1,45 @@ +From 57a6592ecaa46634f5c4776831051a45c45b6631 Mon Sep 17 00:00:00 2001 +From: Vlad Yasevich +Date: Tue, 15 Oct 2013 22:01:29 -0400 +Subject: net: dst: provide accessor function to dst->xfrm + +From: Vlad Yasevich + +[ Upstream commit e87b3998d795123b4139bc3f25490dd236f68212 ] + +dst->xfrm is conditionally defined. Provide accessor funtion that +is always available. + +Signed-off-by: Vlad Yasevich +Acked-by: Neil Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/dst.h | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +--- a/include/net/dst.h ++++ b/include/net/dst.h +@@ -477,10 +477,22 @@ static inline struct dst_entry *xfrm_loo + { + return dst_orig; + } ++ ++static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst) ++{ ++ return NULL; ++} ++ + #else + extern struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, + const struct flowi *fl, struct sock *sk, + int flags); ++ ++/* skb attached with this dst needs transformation if dst->xfrm is valid */ ++static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst) ++{ ++ return dst->xfrm; ++} + #endif + + #endif /* _NET_DST_H */ diff --git a/queue-3.10/net-fix-cipso-packet-validation-when-netlabel.patch b/queue-3.10/net-fix-cipso-packet-validation-when-netlabel.patch new file mode 100644 index 00000000000..e7affa4ac1e --- /dev/null +++ b/queue-3.10/net-fix-cipso-packet-validation-when-netlabel.patch @@ -0,0 +1,54 @@ +From be5d1256150392579a4c9ae1370734f0b4150cb5 Mon Sep 17 00:00:00 2001 +From: Seif Mazareeb +Date: Thu, 17 Oct 2013 20:33:21 -0700 +Subject: net: fix cipso packet validation when !NETLABEL + +From: Seif Mazareeb + +[ Upstream commit f2e5ddcc0d12f9c4c7b254358ad245c9dddce13b ] + +When CONFIG_NETLABEL is disabled, the cipso_v4_validate() function could loop +forever in the main loop if opt[opt_iter +1] == 0, this will causing a kernel +crash in an SMP system, since the CPU executing this function will +stall /not respond to IPIs. + +This problem can be reproduced by running the IP Stack Integrity Checker +(http://isic.sourceforge.net) using the following command on a Linux machine +connected to DUT: + +"icmpsic -s rand -d -r 123456" +wait (1-2 min) + +Signed-off-by: Seif Mazareeb +Acked-by: Paul Moore +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/cipso_ipv4.h | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/include/net/cipso_ipv4.h ++++ b/include/net/cipso_ipv4.h +@@ -290,6 +290,7 @@ static inline int cipso_v4_validate(cons + unsigned char err_offset = 0; + u8 opt_len = opt[1]; + u8 opt_iter; ++ u8 tag_len; + + if (opt_len < 8) { + err_offset = 1; +@@ -302,11 +303,12 @@ static inline int cipso_v4_validate(cons + } + + for (opt_iter = 6; opt_iter < opt_len;) { +- if (opt[opt_iter + 1] > (opt_len - opt_iter)) { ++ tag_len = opt[opt_iter + 1]; ++ if ((tag_len == 0) || (opt[opt_iter + 1] > (opt_len - opt_iter))) { + err_offset = opt_iter + 1; + goto out; + } +- opt_iter += opt[opt_iter + 1]; ++ opt_iter += tag_len; + } + + out: diff --git a/queue-3.10/net-heap-overflow-in-__audit_sockaddr.patch b/queue-3.10/net-heap-overflow-in-__audit_sockaddr.patch new file mode 100644 index 00000000000..c7bfe17b1f6 --- /dev/null +++ b/queue-3.10/net-heap-overflow-in-__audit_sockaddr.patch @@ -0,0 +1,86 @@ +From 0448baee71329faff72f18e9a69b9fb39bfdb0fd Mon Sep 17 00:00:00 2001 +From: Dan Carpenter +Date: Thu, 3 Oct 2013 00:27:20 +0300 +Subject: net: heap overflow in __audit_sockaddr() + +From: Dan Carpenter + +[ Upstream commit 1661bf364ae9c506bc8795fef70d1532931be1e8 ] + +We need to cap ->msg_namelen or it leads to a buffer overflow when we +to the memcpy() in __audit_sockaddr(). It requires CAP_AUDIT_CONTROL to +exploit this bug. + +The call tree is: +___sys_recvmsg() + move_addr_to_user() + audit_sockaddr() + __audit_sockaddr() + +Reported-by: Jüri Aedla +Signed-off-by: Dan Carpenter +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/compat.c | 2 ++ + net/socket.c | 24 ++++++++++++++++++++---- + 2 files changed, 22 insertions(+), 4 deletions(-) + +--- a/net/compat.c ++++ b/net/compat.c +@@ -71,6 +71,8 @@ int get_compat_msghdr(struct msghdr *kms + __get_user(kmsg->msg_controllen, &umsg->msg_controllen) || + __get_user(kmsg->msg_flags, &umsg->msg_flags)) + return -EFAULT; ++ if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) ++ return -EINVAL; + kmsg->msg_name = compat_ptr(tmp1); + kmsg->msg_iov = compat_ptr(tmp2); + kmsg->msg_control = compat_ptr(tmp3); +--- a/net/socket.c ++++ b/net/socket.c +@@ -1956,6 +1956,16 @@ struct used_address { + unsigned int name_len; + }; + ++static int copy_msghdr_from_user(struct msghdr *kmsg, ++ struct msghdr __user *umsg) ++{ ++ if (copy_from_user(kmsg, umsg, sizeof(struct msghdr))) ++ return -EFAULT; ++ if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) ++ return -EINVAL; ++ return 0; ++} ++ + static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg, + struct msghdr *msg_sys, unsigned int flags, + struct used_address *used_address) +@@ -1974,8 +1984,11 @@ static int ___sys_sendmsg(struct socket + if (MSG_CMSG_COMPAT & flags) { + if (get_compat_msghdr(msg_sys, msg_compat)) + return -EFAULT; +- } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) +- return -EFAULT; ++ } else { ++ err = copy_msghdr_from_user(msg_sys, msg); ++ if (err) ++ return err; ++ } + + if (msg_sys->msg_iovlen > UIO_FASTIOV) { + err = -EMSGSIZE; +@@ -2183,8 +2196,11 @@ static int ___sys_recvmsg(struct socket + if (MSG_CMSG_COMPAT & flags) { + if (get_compat_msghdr(msg_sys, msg_compat)) + return -EFAULT; +- } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) +- return -EFAULT; ++ } else { ++ err = copy_msghdr_from_user(msg_sys, msg); ++ if (err) ++ return err; ++ } + + if (msg_sys->msg_iovlen > UIO_FASTIOV) { + err = -EMSGSIZE; diff --git a/queue-3.10/net-mv643xx_eth-fix-orphaned-statistics-timer-crash.patch b/queue-3.10/net-mv643xx_eth-fix-orphaned-statistics-timer-crash.patch new file mode 100644 index 00000000000..63b0faf5d54 --- /dev/null +++ b/queue-3.10/net-mv643xx_eth-fix-orphaned-statistics-timer-crash.patch @@ -0,0 +1,41 @@ +From 5bb866b1498b47c368a916ede5bc6e33dc639e11 Mon Sep 17 00:00:00 2001 +From: Sebastian Hesselbarth +Date: Wed, 2 Oct 2013 12:57:21 +0200 +Subject: net: mv643xx_eth: fix orphaned statistics timer crash + +From: Sebastian Hesselbarth + +[ Upstream commit f564412c935111c583b787bcc18157377b208e2e ] + +The periodic statistics timer gets started at port _probe() time, but +is stopped on _stop() only. In a modular environment, this can cause +the timer to access already deallocated memory, if the module is unloaded +without starting the eth device. To fix this, we add the timer right +before the port is started, instead of at _probe() time. + +Signed-off-by: Sebastian Hesselbarth +Acked-by: Jason Cooper +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/marvell/mv643xx_eth.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/marvell/mv643xx_eth.c ++++ b/drivers/net/ethernet/marvell/mv643xx_eth.c +@@ -2229,6 +2229,7 @@ static int mv643xx_eth_open(struct net_d + mp->int_mask |= INT_TX_END_0 << i; + } + ++ add_timer(&mp->mib_counters_timer); + port_start(mp); + + wrlp(mp, INT_MASK_EXT, INT_EXT_LINK_PHY | INT_EXT_TX); +@@ -2737,7 +2738,6 @@ static int mv643xx_eth_probe(struct plat + mp->mib_counters_timer.data = (unsigned long)mp; + mp->mib_counters_timer.function = mib_counters_timer_wrapper; + mp->mib_counters_timer.expires = jiffies + 30 * HZ; +- add_timer(&mp->mib_counters_timer); + + spin_lock_init(&mp->mib_counters_lock); + diff --git a/queue-3.10/net-mv643xx_eth-update-statistics-timer-from-timer-context-only.patch b/queue-3.10/net-mv643xx_eth-update-statistics-timer-from-timer-context-only.patch new file mode 100644 index 00000000000..efe3ad53f73 --- /dev/null +++ b/queue-3.10/net-mv643xx_eth-update-statistics-timer-from-timer-context-only.patch @@ -0,0 +1,41 @@ +From 117aa3e23dcc332b2617bf92dd2890329dd5866a Mon Sep 17 00:00:00 2001 +From: Sebastian Hesselbarth +Date: Wed, 2 Oct 2013 12:57:20 +0200 +Subject: net: mv643xx_eth: update statistics timer from timer context only + +From: Sebastian Hesselbarth + +[ Upstream commit 041b4ddb84989f06ff1df0ca869b950f1ee3cb1c ] + +Each port driver installs a periodic timer to update port statistics +by calling mib_counters_update. As mib_counters_update is also called +from non-timer context, we should not reschedule the timer there but +rather move it to timer-only context. + +Signed-off-by: Sebastian Hesselbarth +Acked-by: Jason Cooper +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/marvell/mv643xx_eth.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/drivers/net/ethernet/marvell/mv643xx_eth.c ++++ b/drivers/net/ethernet/marvell/mv643xx_eth.c +@@ -1125,15 +1125,13 @@ static void mib_counters_update(struct m + p->rx_discard += rdlp(mp, RX_DISCARD_FRAME_CNT); + p->rx_overrun += rdlp(mp, RX_OVERRUN_FRAME_CNT); + spin_unlock_bh(&mp->mib_counters_lock); +- +- mod_timer(&mp->mib_counters_timer, jiffies + 30 * HZ); + } + + static void mib_counters_timer_wrapper(unsigned long _mp) + { + struct mv643xx_eth_private *mp = (void *)_mp; +- + mib_counters_update(mp); ++ mod_timer(&mp->mib_counters_timer, jiffies + 30 * HZ); + } + + diff --git a/queue-3.10/net-secure_seq-fix-warning-when-config_ipv6-and-config_inet-are-not-selected.patch b/queue-3.10/net-secure_seq-fix-warning-when-config_ipv6-and-config_inet-are-not-selected.patch new file mode 100644 index 00000000000..178dc946b84 --- /dev/null +++ b/queue-3.10/net-secure_seq-fix-warning-when-config_ipv6-and-config_inet-are-not-selected.patch @@ -0,0 +1,58 @@ +From 2c3b7942dc9f726819fbee80f547e50461f7ab0f Mon Sep 17 00:00:00 2001 +From: Fabio Estevam +Date: Sat, 5 Oct 2013 17:56:59 -0300 +Subject: net: secure_seq: Fix warning when CONFIG_IPV6 and CONFIG_INET are not selected + +From: Fabio Estevam + +[ Upstream commit cb03db9d0e964568407fb08ea46cc2b6b7f67587 ] + +net_secret() is only used when CONFIG_IPV6 or CONFIG_INET are selected. + +Building a defconfig with both of these symbols unselected (Using the ARM +at91sam9rl_defconfig, for example) leads to the following build warning: + +$ make at91sam9rl_defconfig +# +# configuration written to .config +# + +$ make net/core/secure_seq.o +scripts/kconfig/conf --silentoldconfig Kconfig + CHK include/config/kernel.release + CHK include/generated/uapi/linux/version.h + CHK include/generated/utsrelease.h +make[1]: `include/generated/mach-types.h' is up to date. + CALL scripts/checksyscalls.sh + CC net/core/secure_seq.o +net/core/secure_seq.c:17:13: warning: 'net_secret_init' defined but not used [-Wunused-function] + +Fix this warning by protecting the definition of net_secret() with these +symbols. + +Reported-by: Olof Johansson +Signed-off-by: Fabio Estevam +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/secure_seq.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/core/secure_seq.c ++++ b/net/core/secure_seq.c +@@ -10,6 +10,7 @@ + + #include + ++#if IS_ENABLED(CONFIG_IPV6) || IS_ENABLED(CONFIG_INET) + #define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4) + + static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned; +@@ -29,6 +30,7 @@ static void net_secret_init(void) + cmpxchg(&net_secret[--i], 0, tmp); + } + } ++#endif + + #ifdef CONFIG_INET + static u32 seq_scale(u32 seq) diff --git a/queue-3.10/net-unix-inherit-sock_pass-cred-sec-flags-from-socket-to-fix-race.patch b/queue-3.10/net-unix-inherit-sock_pass-cred-sec-flags-from-socket-to-fix-race.patch new file mode 100644 index 00000000000..5357235a885 --- /dev/null +++ b/queue-3.10/net-unix-inherit-sock_pass-cred-sec-flags-from-socket-to-fix-race.patch @@ -0,0 +1,87 @@ +From 707029030c46cfcad3d61d67e76f77cc167831b3 Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann +Date: Thu, 17 Oct 2013 22:51:31 +0200 +Subject: net: unix: inherit SOCK_PASS{CRED, SEC} flags from socket to fix race + +From: Daniel Borkmann + +[ Upstream commit 90c6bd34f884cd9cee21f1d152baf6c18bcac949 ] + +In the case of credentials passing in unix stream sockets (dgram +sockets seem not affected), we get a rather sparse race after +commit 16e5726 ("af_unix: dont send SCM_CREDENTIALS by default"). + +We have a stream server on receiver side that requests credential +passing from senders (e.g. nc -U). Since we need to set SO_PASSCRED +on each spawned/accepted socket on server side to 1 first (as it's +not inherited), it can happen that in the time between accept() and +setsockopt() we get interrupted, the sender is being scheduled and +continues with passing data to our receiver. At that time SO_PASSCRED +is neither set on sender nor receiver side, hence in cmsg's +SCM_CREDENTIALS we get eventually pid:0, uid:65534, gid:65534 +(== overflow{u,g}id) instead of what we actually would like to see. + +On the sender side, here nc -U, the tests in maybe_add_creds() +invoked through unix_stream_sendmsg() would fail, as at that exact +time, as mentioned, the sender has neither SO_PASSCRED on his side +nor sees it on the server side, and we have a valid 'other' socket +in place. Thus, sender believes it would just look like a normal +connection, not needing/requesting SO_PASSCRED at that time. + +As reverting 16e5726 would not be an option due to the significant +performance regression reported when having creds always passed, +one way/trade-off to prevent that would be to set SO_PASSCRED on +the listener socket and allow inheriting these flags to the spawned +socket on server side in accept(). It seems also logical to do so +if we'd tell the listener socket to pass those flags onwards, and +would fix the race. + +Before, strace: + +recvmsg(4, {msg_name(0)=NULL, msg_iov(1)=[{"blub\n", 4096}], + msg_controllen=32, {cmsg_len=28, cmsg_level=SOL_SOCKET, + cmsg_type=SCM_CREDENTIALS{pid=0, uid=65534, gid=65534}}, + msg_flags=0}, 0) = 5 + +After, strace: + +recvmsg(4, {msg_name(0)=NULL, msg_iov(1)=[{"blub\n", 4096}], + msg_controllen=32, {cmsg_len=28, cmsg_level=SOL_SOCKET, + cmsg_type=SCM_CREDENTIALS{pid=11580, uid=1000, gid=1000}}, + msg_flags=0}, 0) = 5 + +Signed-off-by: Daniel Borkmann +Cc: Eric Dumazet +Cc: Eric W. Biederman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/unix/af_unix.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -1245,6 +1245,15 @@ static int unix_socketpair(struct socket + return 0; + } + ++static void unix_sock_inherit_flags(const struct socket *old, ++ struct socket *new) ++{ ++ if (test_bit(SOCK_PASSCRED, &old->flags)) ++ set_bit(SOCK_PASSCRED, &new->flags); ++ if (test_bit(SOCK_PASSSEC, &old->flags)) ++ set_bit(SOCK_PASSSEC, &new->flags); ++} ++ + static int unix_accept(struct socket *sock, struct socket *newsock, int flags) + { + struct sock *sk = sock->sk; +@@ -1279,6 +1288,7 @@ static int unix_accept(struct socket *so + /* attach accepted sock to socket */ + unix_state_lock(tsk); + newsock->state = SS_CONNECTED; ++ unix_sock_inherit_flags(sock, newsock); + sock_graft(tsk, newsock); + unix_state_unlock(tsk); + return 0; diff --git a/queue-3.10/net-vlan-fix-nlmsg-size-calculation-in-vlan_get_size.patch b/queue-3.10/net-vlan-fix-nlmsg-size-calculation-in-vlan_get_size.patch new file mode 100644 index 00000000000..cc7d2597970 --- /dev/null +++ b/queue-3.10/net-vlan-fix-nlmsg-size-calculation-in-vlan_get_size.patch @@ -0,0 +1,31 @@ +From ac784a91c26bf53af7db09d8d6c0d1c24d714d65 Mon Sep 17 00:00:00 2001 +From: Marc Kleine-Budde +Date: Mon, 7 Oct 2013 23:19:58 +0200 +Subject: net: vlan: fix nlmsg size calculation in vlan_get_size() + +From: Marc Kleine-Budde + +[ Upstream commit c33a39c575068c2ea9bffb22fd6de2df19c74b89 ] + +This patch fixes the calculation of the nlmsg size, by adding the missing +nla_total_size(). + +Cc: Patrick McHardy +Signed-off-by: Marc Kleine-Budde +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/8021q/vlan_netlink.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/8021q/vlan_netlink.c ++++ b/net/8021q/vlan_netlink.c +@@ -171,7 +171,7 @@ static size_t vlan_get_size(const struct + + return nla_total_size(2) + /* IFLA_VLAN_PROTOCOL */ + nla_total_size(2) + /* IFLA_VLAN_ID */ +- sizeof(struct ifla_vlan_flags) + /* IFLA_VLAN_FLAGS */ ++ nla_total_size(sizeof(struct ifla_vlan_flags)) + /* IFLA_VLAN_FLAGS */ + vlan_qos_map_size(vlan->nr_ingress_mappings) + + vlan_qos_map_size(vlan->nr_egress_mappings); + } diff --git a/queue-3.10/netfilter-nf_conntrack-fix-rt6i_gateway-checks-for-h.323-helper.patch b/queue-3.10/netfilter-nf_conntrack-fix-rt6i_gateway-checks-for-h.323-helper.patch new file mode 100644 index 00000000000..f83d33c61d9 --- /dev/null +++ b/queue-3.10/netfilter-nf_conntrack-fix-rt6i_gateway-checks-for-h.323-helper.patch @@ -0,0 +1,35 @@ +From 3503ce5929ed9eff3ff79c08ad0c6061ec1d41d2 Mon Sep 17 00:00:00 2001 +From: Julian Anastasov +Date: Sun, 20 Oct 2013 15:43:05 +0300 +Subject: netfilter: nf_conntrack: fix rt6i_gateway checks for H.323 helper + +From: Julian Anastasov + +[ Upstream commit 56e42441ed54b092d6c7411138ce60d049e7c731 ] + +Now when rt6_nexthop() can return nexthop address we can use it +for proper nexthop comparison of directly connected destinations. +For more information refer to commit bbb5823cf742a7 +("netfilter: nf_conntrack: fix rt_gateway checks for H.323 helper"). + +Signed-off-by: Julian Anastasov +Acked-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nf_conntrack_h323_main.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/netfilter/nf_conntrack_h323_main.c ++++ b/net/netfilter/nf_conntrack_h323_main.c +@@ -778,8 +778,8 @@ static int callforward_do_filter(const u + flowi6_to_flowi(&fl1), false)) { + if (!afinfo->route(&init_net, (struct dst_entry **)&rt2, + flowi6_to_flowi(&fl2), false)) { +- if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway, +- sizeof(rt1->rt6i_gateway)) && ++ if (ipv6_addr_equal(rt6_nexthop(rt1), ++ rt6_nexthop(rt2)) && + rt1->dst.dev == rt2->dst.dev) + ret = 1; + dst_release(&rt2->dst); diff --git a/queue-3.10/proc-connector-fix-info-leaks.patch b/queue-3.10/proc-connector-fix-info-leaks.patch new file mode 100644 index 00000000000..b2d065760aa --- /dev/null +++ b/queue-3.10/proc-connector-fix-info-leaks.patch @@ -0,0 +1,167 @@ +From a8d65362af47290c2d5c2322ca07edbe932d4a88 Mon Sep 17 00:00:00 2001 +From: Mathias Krause +Date: Mon, 30 Sep 2013 22:03:06 +0200 +Subject: proc connector: fix info leaks + +From: Mathias Krause + +[ Upstream commit e727ca82e0e9616ab4844301e6bae60ca7327682 ] + +Initialize event_data for all possible message types to prevent leaking +kernel stack contents to userland (up to 20 bytes). Also set the flags +member of the connector message to 0 to prevent leaking two more stack +bytes this way. + +Signed-off-by: Mathias Krause +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/connector/cn_proc.c | 18 ++++++++++++++++++ + 1 file changed, 18 insertions(+) + +--- a/drivers/connector/cn_proc.c ++++ b/drivers/connector/cn_proc.c +@@ -65,6 +65,7 @@ void proc_fork_connector(struct task_str + + msg = (struct cn_msg *)buffer; + ev = (struct proc_event *)msg->data; ++ memset(&ev->event_data, 0, sizeof(ev->event_data)); + get_seq(&msg->seq, &ev->cpu); + ktime_get_ts(&ts); /* get high res monotonic timestamp */ + put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); +@@ -80,6 +81,7 @@ void proc_fork_connector(struct task_str + memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); + msg->ack = 0; /* not used */ + msg->len = sizeof(*ev); ++ msg->flags = 0; /* not used */ + /* If cn_netlink_send() failed, the data is not sent */ + cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); + } +@@ -96,6 +98,7 @@ void proc_exec_connector(struct task_str + + msg = (struct cn_msg *)buffer; + ev = (struct proc_event *)msg->data; ++ memset(&ev->event_data, 0, sizeof(ev->event_data)); + get_seq(&msg->seq, &ev->cpu); + ktime_get_ts(&ts); /* get high res monotonic timestamp */ + put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); +@@ -106,6 +109,7 @@ void proc_exec_connector(struct task_str + memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); + msg->ack = 0; /* not used */ + msg->len = sizeof(*ev); ++ msg->flags = 0; /* not used */ + cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); + } + +@@ -122,6 +126,7 @@ void proc_id_connector(struct task_struc + + msg = (struct cn_msg *)buffer; + ev = (struct proc_event *)msg->data; ++ memset(&ev->event_data, 0, sizeof(ev->event_data)); + ev->what = which_id; + ev->event_data.id.process_pid = task->pid; + ev->event_data.id.process_tgid = task->tgid; +@@ -145,6 +150,7 @@ void proc_id_connector(struct task_struc + memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); + msg->ack = 0; /* not used */ + msg->len = sizeof(*ev); ++ msg->flags = 0; /* not used */ + cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); + } + +@@ -160,6 +166,7 @@ void proc_sid_connector(struct task_stru + + msg = (struct cn_msg *)buffer; + ev = (struct proc_event *)msg->data; ++ memset(&ev->event_data, 0, sizeof(ev->event_data)); + get_seq(&msg->seq, &ev->cpu); + ktime_get_ts(&ts); /* get high res monotonic timestamp */ + put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); +@@ -170,6 +177,7 @@ void proc_sid_connector(struct task_stru + memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); + msg->ack = 0; /* not used */ + msg->len = sizeof(*ev); ++ msg->flags = 0; /* not used */ + cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); + } + +@@ -185,6 +193,7 @@ void proc_ptrace_connector(struct task_s + + msg = (struct cn_msg *)buffer; + ev = (struct proc_event *)msg->data; ++ memset(&ev->event_data, 0, sizeof(ev->event_data)); + get_seq(&msg->seq, &ev->cpu); + ktime_get_ts(&ts); /* get high res monotonic timestamp */ + put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); +@@ -203,6 +212,7 @@ void proc_ptrace_connector(struct task_s + memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); + msg->ack = 0; /* not used */ + msg->len = sizeof(*ev); ++ msg->flags = 0; /* not used */ + cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); + } + +@@ -218,6 +228,7 @@ void proc_comm_connector(struct task_str + + msg = (struct cn_msg *)buffer; + ev = (struct proc_event *)msg->data; ++ memset(&ev->event_data, 0, sizeof(ev->event_data)); + get_seq(&msg->seq, &ev->cpu); + ktime_get_ts(&ts); /* get high res monotonic timestamp */ + put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); +@@ -229,6 +240,7 @@ void proc_comm_connector(struct task_str + memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); + msg->ack = 0; /* not used */ + msg->len = sizeof(*ev); ++ msg->flags = 0; /* not used */ + cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); + } + +@@ -244,6 +256,7 @@ void proc_coredump_connector(struct task + + msg = (struct cn_msg *)buffer; + ev = (struct proc_event *)msg->data; ++ memset(&ev->event_data, 0, sizeof(ev->event_data)); + get_seq(&msg->seq, &ev->cpu); + ktime_get_ts(&ts); /* get high res monotonic timestamp */ + put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); +@@ -254,6 +267,7 @@ void proc_coredump_connector(struct task + memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); + msg->ack = 0; /* not used */ + msg->len = sizeof(*ev); ++ msg->flags = 0; /* not used */ + cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); + } + +@@ -269,6 +283,7 @@ void proc_exit_connector(struct task_str + + msg = (struct cn_msg *)buffer; + ev = (struct proc_event *)msg->data; ++ memset(&ev->event_data, 0, sizeof(ev->event_data)); + get_seq(&msg->seq, &ev->cpu); + ktime_get_ts(&ts); /* get high res monotonic timestamp */ + put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); +@@ -281,6 +296,7 @@ void proc_exit_connector(struct task_str + memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); + msg->ack = 0; /* not used */ + msg->len = sizeof(*ev); ++ msg->flags = 0; /* not used */ + cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); + } + +@@ -304,6 +320,7 @@ static void cn_proc_ack(int err, int rcv + + msg = (struct cn_msg *)buffer; + ev = (struct proc_event *)msg->data; ++ memset(&ev->event_data, 0, sizeof(ev->event_data)); + msg->seq = rcvd_seq; + ktime_get_ts(&ts); /* get high res monotonic timestamp */ + put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); +@@ -313,6 +330,7 @@ static void cn_proc_ack(int err, int rcv + memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); + msg->ack = rcvd_ack + 1; + msg->len = sizeof(*ev); ++ msg->flags = 0; /* not used */ + cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); + } + diff --git a/queue-3.10/sctp-perform-software-checksum-if-packet-has-to-be-fragmented.patch b/queue-3.10/sctp-perform-software-checksum-if-packet-has-to-be-fragmented.patch new file mode 100644 index 00000000000..1c20530f643 --- /dev/null +++ b/queue-3.10/sctp-perform-software-checksum-if-packet-has-to-be-fragmented.patch @@ -0,0 +1,37 @@ +From d80d3eb5e4d6ef63ccdb7fa0d7146d4f69ab03b5 Mon Sep 17 00:00:00 2001 +From: Vlad Yasevich +Date: Tue, 15 Oct 2013 22:01:31 -0400 +Subject: sctp: Perform software checksum if packet has to be fragmented. + +From: Vlad Yasevich + +[ Upstream commit d2dbbba77e95dff4b4f901fee236fef6d9552072 ] + +IP/IPv6 fragmentation knows how to compute only TCP/UDP checksum. +This causes problems if SCTP packets has to be fragmented and +ipsummed has been set to PARTIAL due to checksum offload support. +This condition can happen when retransmitting after MTU discover, +or when INIT or other control chunks are larger then MTU. +Check for the rare fragmentation condition in SCTP and use software +checksum calculation in this case. + +CC: Fan Du +Signed-off-by: Vlad Yasevich +Acked-by: Neil Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/output.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/sctp/output.c ++++ b/net/sctp/output.c +@@ -548,7 +548,7 @@ int sctp_packet_transmit(struct sctp_pac + */ + if (!sctp_checksum_disable) { + if (!(dst->dev->features & NETIF_F_SCTP_CSUM) || +- (dst_xfrm(dst) != NULL)) { ++ (dst_xfrm(dst) != NULL) || packet->ipfragok) { + __u32 crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len); + + /* 3) Put the resultant value into the checksum field in the diff --git a/queue-3.10/sctp-use-software-crc32-checksum-when-xfrm-transform-will-happen.patch b/queue-3.10/sctp-use-software-crc32-checksum-when-xfrm-transform-will-happen.patch new file mode 100644 index 00000000000..e9831643266 --- /dev/null +++ b/queue-3.10/sctp-use-software-crc32-checksum-when-xfrm-transform-will-happen.patch @@ -0,0 +1,38 @@ +From e994f4f745ca33a23014b516a12c7582f61ef2ca Mon Sep 17 00:00:00 2001 +From: Fan Du +Date: Tue, 15 Oct 2013 22:01:30 -0400 +Subject: sctp: Use software crc32 checksum when xfrm transform will happen. + +From: Fan Du + +[ Upstream commit 27127a82561a2a3ed955ce207048e1b066a80a2a ] + +igb/ixgbe have hardware sctp checksum support, when this feature is enabled +and also IPsec is armed to protect sctp traffic, ugly things happened as +xfrm_output checks CHECKSUM_PARTIAL to do checksum operation(sum every thing +up and pack the 16bits result in the checksum field). The result is fail +establishment of sctp communication. + +Signed-off-by: Fan Du +Cc: Neil Horman +Cc: Steffen Klassert +Signed-off-by: Vlad Yasevich +Acked-by: Neil Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/output.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/sctp/output.c ++++ b/net/sctp/output.c +@@ -547,7 +547,8 @@ int sctp_packet_transmit(struct sctp_pac + * by CRC32-C as described in . + */ + if (!sctp_checksum_disable) { +- if (!(dst->dev->features & NETIF_F_SCTP_CSUM)) { ++ if (!(dst->dev->features & NETIF_F_SCTP_CSUM) || ++ (dst_xfrm(dst) != NULL)) { + __u32 crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len); + + /* 3) Put the resultant value into the checksum field in the diff --git a/queue-3.10/series b/queue-3.10/series new file mode 100644 index 00000000000..b38c3d2b71d --- /dev/null +++ b/queue-3.10/series @@ -0,0 +1,40 @@ +tcp-tso-packets-automatic-sizing.patch +tcp-tsq-can-use-a-dynamic-limit.patch +tcp-must-unclone-packets-before-mangling-them.patch +tcp-do-not-forget-fin-in-tcp_shifted_skb.patch +tcp-fix-incorrect-ca_state-in-tail-loss-probe.patch +net-do-not-call-sock_put-on-timewait-sockets.patch +l2tp-fix-kernel-panic-when-using-ipv4-mapped-ipv6-addresses.patch +l2tp-fix-build-warning-with-ipv6-disabled.patch +net-mv643xx_eth-update-statistics-timer-from-timer-context-only.patch +net-mv643xx_eth-fix-orphaned-statistics-timer-crash.patch +net-heap-overflow-in-__audit_sockaddr.patch +proc-connector-fix-info-leaks.patch +ipv4-fix-ineffective-source-address-selection.patch +can-dev-fix-nlmsg-size-calculation-in-can_get_size.patch +net-secure_seq-fix-warning-when-config_ipv6-and-config_inet-are-not-selected.patch +xen-netback-don-t-destroy-the-netdev-until-the-vif-is-shut-down.patch +net-vlan-fix-nlmsg-size-calculation-in-vlan_get_size.patch +vti-get-rid-of-nf-mark-rule-in-prerouting.patch +l2tp-must-disable-bh-before-calling-l2tp_xmit_skb.patch +farsync-fix-info-leak-in-ioctl.patch +unix_diag-fix-info-leak.patch +connector-use-nlmsg_len-to-check-message-length.patch +bnx2x-record-rx-queue-for-lro-packets.patch +virtio-net-don-t-respond-to-cpu-hotplug-notifier-if-we-re-not-ready.patch +virtio-net-fix-the-race-between-channels-setting-and-refill.patch +virtio-net-refill-only-when-device-is-up-during-setting-queues.patch +bridge-correctly-clamp-max-forward_delay-when-enabling-stp.patch +net-dst-provide-accessor-function-to-dst-xfrm.patch +sctp-use-software-crc32-checksum-when-xfrm-transform-will-happen.patch +sctp-perform-software-checksum-if-packet-has-to-be-fragmented.patch +wanxl-fix-info-leak-in-ioctl.patch +be2net-pass-if_id-for-v1-and-v2-versions-of-tx_create-cmd.patch +net-unix-inherit-sock_pass-cred-sec-flags-from-socket-to-fix-race.patch +net-fix-cipso-packet-validation-when-netlabel.patch +inet-fix-possible-memory-corruption-with-udp_cork-and-ufo.patch +ipv6-always-prefer-rt6i_gateway-if-present.patch +ipv6-fill-rt6i_gateway-with-nexthop-address.patch +netfilter-nf_conntrack-fix-rt6i_gateway-checks-for-h.323-helper.patch +ipv6-probe-routes-asynchronous-in-rt6_probe.patch +davinci_emac.c-fix-iff_allmulti-setup.patch diff --git a/queue-3.10/tcp-do-not-forget-fin-in-tcp_shifted_skb.patch b/queue-3.10/tcp-do-not-forget-fin-in-tcp_shifted_skb.patch new file mode 100644 index 00000000000..76158cb46b9 --- /dev/null +++ b/queue-3.10/tcp-do-not-forget-fin-in-tcp_shifted_skb.patch @@ -0,0 +1,76 @@ +From ab8882f2a3f46b06d8cda14c6aee6f0232307a5e Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Fri, 4 Oct 2013 10:31:41 -0700 +Subject: tcp: do not forget FIN in tcp_shifted_skb() + +From: Eric Dumazet + +[ Upstream commit 5e8a402f831dbe7ee831340a91439e46f0d38acd ] + +Yuchung found following problem : + + There are bugs in the SACK processing code, merging part in + tcp_shift_skb_data(), that incorrectly resets or ignores the sacked + skbs FIN flag. When a receiver first SACK the FIN sequence, and later + throw away ofo queue (e.g., sack-reneging), the sender will stop + retransmitting the FIN flag, and hangs forever. + +Following packetdrill test can be used to reproduce the bug. + +$ cat sack-merge-bug.pkt +`sysctl -q net.ipv4.tcp_fack=0` + +// Establish a connection and send 10 MSS. +0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++.000 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 ++.000 bind(3, ..., ...) = 0 ++.000 listen(3, 1) = 0 + ++.050 < S 0:0(0) win 32792 ++.000 > S. 0:0(0) ack 1 ++.001 < . 1:1(0) ack 1 win 1024 ++.000 accept(3, ..., ...) = 4 + ++.100 write(4, ..., 12000) = 12000 ++.000 shutdown(4, SHUT_WR) = 0 ++.000 > . 1:10001(10000) ack 1 ++.050 < . 1:1(0) ack 2001 win 257 ++.000 > FP. 10001:12001(2000) ack 1 ++.050 < . 1:1(0) ack 2001 win 257 ++.050 < . 1:1(0) ack 2001 win 257 +// SACK reneg ++.050 < . 1:1(0) ack 12001 win 257 ++0 %{ print "unacked: ",tcpi_unacked }% ++5 %{ print "" }% + +First, a typo inverted left/right of one OR operation, then +code forgot to advance end_seq if the merged skb carried FIN. + +Bug was added in 2.6.29 by commit 832d11c5cd076ab +("tcp: Try to restore large SKBs while SACK processing") + +Signed-off-by: Eric Dumazet +Signed-off-by: Yuchung Cheng +Acked-by: Neal Cardwell +Cc: Ilpo Järvinen +Acked-by: Ilpo Järvinen +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -1292,7 +1292,10 @@ static bool tcp_shifted_skb(struct sock + tp->lost_cnt_hint -= tcp_skb_pcount(prev); + } + +- TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(prev)->tcp_flags; ++ TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags; ++ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) ++ TCP_SKB_CB(prev)->end_seq++; ++ + if (skb == tcp_highest_sack(sk)) + tcp_advance_highest_sack(sk, skb); + diff --git a/queue-3.10/tcp-fix-incorrect-ca_state-in-tail-loss-probe.patch b/queue-3.10/tcp-fix-incorrect-ca_state-in-tail-loss-probe.patch new file mode 100644 index 00000000000..7bd8420b645 --- /dev/null +++ b/queue-3.10/tcp-fix-incorrect-ca_state-in-tail-loss-probe.patch @@ -0,0 +1,41 @@ +From 81f50cf8065f81d67e2f771f334cb47873415f0b Mon Sep 17 00:00:00 2001 +From: Yuchung Cheng +Date: Sat, 12 Oct 2013 10:16:27 -0700 +Subject: tcp: fix incorrect ca_state in tail loss probe + +From: Yuchung Cheng + +[ Upstream commit 031afe4990a7c9dbff41a3a742c44d3e740ea0a1 ] + +On receiving an ACK that covers the loss probe sequence, TLP +immediately sets the congestion state to Open, even though some packets +are not recovered and retransmisssion are on the way. The later ACks +may trigger a WARN_ON check in step D of tcp_fastretrans_alert(), e.g., +https://bugzilla.redhat.com/show_bug.cgi?id=989251 + +The fix is to follow the similar procedure in recovery by calling +tcp_try_keep_open(). The sender switches to Open state if no packets +are retransmissted. Otherwise it goes to Disorder and let subsequent +ACKs move the state to Recovery or Open. + +Reported-By: Michael Sterrett +Tested-By: Dormando +Signed-off-by: Yuchung Cheng +Acked-by: Neal Cardwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -3345,7 +3345,7 @@ static void tcp_process_tlp_ack(struct s + tcp_init_cwnd_reduction(sk, true); + tcp_set_ca_state(sk, TCP_CA_CWR); + tcp_end_cwnd_reduction(sk); +- tcp_set_ca_state(sk, TCP_CA_Open); ++ tcp_try_keep_open(sk); + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPLOSSPROBERECOVERY); + } diff --git a/queue-3.10/tcp-must-unclone-packets-before-mangling-them.patch b/queue-3.10/tcp-must-unclone-packets-before-mangling-them.patch new file mode 100644 index 00000000000..b7f50df7b24 --- /dev/null +++ b/queue-3.10/tcp-must-unclone-packets-before-mangling-them.patch @@ -0,0 +1,68 @@ +From 35536f38006d0995e3c14246c7b56f01e8f9f047 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Tue, 15 Oct 2013 11:54:30 -0700 +Subject: tcp: must unclone packets before mangling them + +From: Eric Dumazet + +[ Upstream commit c52e2421f7368fd36cbe330d2cf41b10452e39a9 ] + +TCP stack should make sure it owns skbs before mangling them. + +We had various crashes using bnx2x, and it turned out gso_size +was cleared right before bnx2x driver was populating TC descriptor +of the _previous_ packet send. TCP stack can sometime retransmit +packets that are still in Qdisc. + +Of course we could make bnx2x driver more robust (using +ACCESS_ONCE(shinfo->gso_size) for example), but the bug is TCP stack. + +We have identified two points where skb_unclone() was needed. + +This patch adds a WARN_ON_ONCE() to warn us if we missed another +fix of this kind. + +Kudos to Neal for finding the root cause of this bug. Its visible +using small MSS. + +Signed-off-by: Eric Dumazet +Signed-off-by: Neal Cardwell +Cc: Yuchung Cheng +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_output.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -976,6 +976,9 @@ static void tcp_queue_skb(struct sock *s + static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, + unsigned int mss_now) + { ++ /* Make sure we own this skb before messing gso_size/gso_segs */ ++ WARN_ON_ONCE(skb_cloned(skb)); ++ + if (skb->len <= mss_now || !sk_can_gso(sk) || + skb->ip_summed == CHECKSUM_NONE) { + /* Avoid the costly divide in the normal +@@ -1057,9 +1060,7 @@ int tcp_fragment(struct sock *sk, struct + if (nsize < 0) + nsize = 0; + +- if (skb_cloned(skb) && +- skb_is_nonlinear(skb) && +- pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) ++ if (skb_unclone(skb, GFP_ATOMIC)) + return -ENOMEM; + + /* Get a new skb... force flag on. */ +@@ -2334,6 +2335,8 @@ int __tcp_retransmit_skb(struct sock *sk + int oldpcount = tcp_skb_pcount(skb); + + if (unlikely(oldpcount > 1)) { ++ if (skb_unclone(skb, GFP_ATOMIC)) ++ return -ENOMEM; + tcp_init_tso_segs(sk, skb, cur_mss); + tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb)); + } diff --git a/queue-3.10/tcp-tso-packets-automatic-sizing.patch b/queue-3.10/tcp-tso-packets-automatic-sizing.patch new file mode 100644 index 00000000000..028a46df168 --- /dev/null +++ b/queue-3.10/tcp-tso-packets-automatic-sizing.patch @@ -0,0 +1,271 @@ +From ed171f3521e0b0d3145d3eb702e6e5e349df0f6b Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Tue, 27 Aug 2013 05:46:32 -0700 +Subject: tcp: TSO packets automatic sizing + +From: Eric Dumazet + +[ Upstream commits 6d36824e730f247b602c90e8715a792003e3c5a7, + 02cf4ebd82ff0ac7254b88e466820a290ed8289a, and parts of + 7eec4174ff29cd42f2acfae8112f51c228545d40 ] + +After hearing many people over past years complaining against TSO being +bursty or even buggy, we are proud to present automatic sizing of TSO +packets. + +One part of the problem is that tcp_tso_should_defer() uses an heuristic +relying on upcoming ACKS instead of a timer, but more generally, having +big TSO packets makes little sense for low rates, as it tends to create +micro bursts on the network, and general consensus is to reduce the +buffering amount. + +This patch introduces a per socket sk_pacing_rate, that approximates +the current sending rate, and allows us to size the TSO packets so +that we try to send one packet every ms. + +This field could be set by other transports. + +Patch has no impact for high speed flows, where having large TSO packets +makes sense to reach line rate. + +For other flows, this helps better packet scheduling and ACK clocking. + +This patch increases performance of TCP flows in lossy environments. + +A new sysctl (tcp_min_tso_segs) is added, to specify the +minimal size of a TSO packet (default being 2). + +A follow-up patch will provide a new packet scheduler (FQ), using +sk_pacing_rate as an input to perform optional per flow pacing. + +This explains why we chose to set sk_pacing_rate to twice the current +rate, allowing 'slow start' ramp up. + +sk_pacing_rate = 2 * cwnd * mss / srtt + +v2: Neal Cardwell reported a suspect deferring of last two segments on +initial write of 10 MSS, I had to change tcp_tso_should_defer() to take +into account tp->xmit_size_goal_segs + +Signed-off-by: Eric Dumazet +Cc: Neal Cardwell +Cc: Yuchung Cheng +Cc: Van Jacobson +Cc: Tom Herbert +Acked-by: Yuchung Cheng +Acked-by: Neal Cardwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/networking/ip-sysctl.txt | 9 ++++++++ + include/net/sock.h | 2 + + include/net/tcp.h | 1 + net/core/sock.c | 1 + net/ipv4/sysctl_net_ipv4.c | 10 +++++++++ + net/ipv4/tcp.c | 28 ++++++++++++++++++++++----- + net/ipv4/tcp_input.c | 34 ++++++++++++++++++++++++++++++++- + net/ipv4/tcp_output.c | 2 - + 8 files changed, 80 insertions(+), 7 deletions(-) + +--- a/Documentation/networking/ip-sysctl.txt ++++ b/Documentation/networking/ip-sysctl.txt +@@ -478,6 +478,15 @@ tcp_syn_retries - INTEGER + tcp_timestamps - BOOLEAN + Enable timestamps as defined in RFC1323. + ++tcp_min_tso_segs - INTEGER ++ Minimal number of segments per TSO frame. ++ Since linux-3.12, TCP does an automatic sizing of TSO frames, ++ depending on flow rate, instead of filling 64Kbytes packets. ++ For specific usages, it's possible to force TCP to build big ++ TSO frames. Note that TCP stack might split too big TSO packets ++ if available window is too small. ++ Default: 2 ++ + tcp_tso_win_divisor - INTEGER + This allows control over what percentage of the congestion window + can be consumed by a single TSO frame. +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -230,6 +230,7 @@ struct cg_proto; + * @sk_wmem_queued: persistent queue size + * @sk_forward_alloc: space allocated forward + * @sk_allocation: allocation mode ++ * @sk_pacing_rate: Pacing rate (if supported by transport/packet scheduler) + * @sk_sndbuf: size of send buffer in bytes + * @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, + * %SO_OOBINLINE settings, %SO_TIMESTAMPING settings +@@ -355,6 +356,7 @@ struct sock { + kmemcheck_bitfield_end(flags); + int sk_wmem_queued; + gfp_t sk_allocation; ++ u32 sk_pacing_rate; /* bytes per second */ + netdev_features_t sk_route_caps; + netdev_features_t sk_route_nocaps; + int sk_gso_type; +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -287,6 +287,7 @@ extern int sysctl_tcp_thin_dupack; + extern int sysctl_tcp_early_retrans; + extern int sysctl_tcp_limit_output_bytes; + extern int sysctl_tcp_challenge_ack_limit; ++extern int sysctl_tcp_min_tso_segs; + + extern atomic_long_t tcp_memory_allocated; + extern struct percpu_counter tcp_sockets_allocated; +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -2271,6 +2271,7 @@ void sock_init_data(struct socket *sock, + + sk->sk_stamp = ktime_set(-1L, 0); + ++ sk->sk_pacing_rate = ~0U; + /* + * Before updating sk_refcnt, we must commit prior changes to memory + * (Documentation/RCU/rculist_nulls.txt for details) +--- a/net/ipv4/sysctl_net_ipv4.c ++++ b/net/ipv4/sysctl_net_ipv4.c +@@ -29,6 +29,7 @@ + static int zero; + static int one = 1; + static int four = 4; ++static int gso_max_segs = GSO_MAX_SEGS; + static int tcp_retr1_max = 255; + static int ip_local_port_range_min[] = { 1, 1 }; + static int ip_local_port_range_max[] = { 65535, 65535 }; +@@ -753,6 +754,15 @@ static struct ctl_table ipv4_table[] = { + .extra2 = &four, + }, + { ++ .procname = "tcp_min_tso_segs", ++ .data = &sysctl_tcp_min_tso_segs, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = proc_dointvec_minmax, ++ .extra1 = &zero, ++ .extra2 = &gso_max_segs, ++ }, ++ { + .procname = "udp_mem", + .data = &sysctl_udp_mem, + .maxlen = sizeof(sysctl_udp_mem), +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -282,6 +282,8 @@ + + int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; + ++int sysctl_tcp_min_tso_segs __read_mostly = 2; ++ + struct percpu_counter tcp_orphan_count; + EXPORT_SYMBOL_GPL(tcp_orphan_count); + +@@ -786,12 +788,28 @@ static unsigned int tcp_xmit_size_goal(s + xmit_size_goal = mss_now; + + if (large_allowed && sk_can_gso(sk)) { +- xmit_size_goal = ((sk->sk_gso_max_size - 1) - +- inet_csk(sk)->icsk_af_ops->net_header_len - +- inet_csk(sk)->icsk_ext_hdr_len - +- tp->tcp_header_len); ++ u32 gso_size, hlen; + +- /* TSQ : try to have two TSO segments in flight */ ++ /* Maybe we should/could use sk->sk_prot->max_header here ? */ ++ hlen = inet_csk(sk)->icsk_af_ops->net_header_len + ++ inet_csk(sk)->icsk_ext_hdr_len + ++ tp->tcp_header_len; ++ ++ /* Goal is to send at least one packet per ms, ++ * not one big TSO packet every 100 ms. ++ * This preserves ACK clocking and is consistent ++ * with tcp_tso_should_defer() heuristic. ++ */ ++ gso_size = sk->sk_pacing_rate / (2 * MSEC_PER_SEC); ++ gso_size = max_t(u32, gso_size, ++ sysctl_tcp_min_tso_segs * mss_now); ++ ++ xmit_size_goal = min_t(u32, gso_size, ++ sk->sk_gso_max_size - 1 - hlen); ++ ++ /* TSQ : try to have at least two segments in flight ++ * (one in NIC TX ring, another in Qdisc) ++ */ + xmit_size_goal = min_t(u32, xmit_size_goal, + sysctl_tcp_limit_output_bytes >> 1); + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -699,6 +699,34 @@ static void tcp_rtt_estimator(struct soc + } + } + ++/* Set the sk_pacing_rate to allow proper sizing of TSO packets. ++ * Note: TCP stack does not yet implement pacing. ++ * FQ packet scheduler can be used to implement cheap but effective ++ * TCP pacing, to smooth the burst on large writes when packets ++ * in flight is significantly lower than cwnd (or rwin) ++ */ ++static void tcp_update_pacing_rate(struct sock *sk) ++{ ++ const struct tcp_sock *tp = tcp_sk(sk); ++ u64 rate; ++ ++ /* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */ ++ rate = (u64)tp->mss_cache * 2 * (HZ << 3); ++ ++ rate *= max(tp->snd_cwnd, tp->packets_out); ++ ++ /* Correction for small srtt : minimum srtt being 8 (1 jiffy << 3), ++ * be conservative and assume srtt = 1 (125 us instead of 1.25 ms) ++ * We probably need usec resolution in the future. ++ * Note: This also takes care of possible srtt=0 case, ++ * when tcp_rtt_estimator() was not yet called. ++ */ ++ if (tp->srtt > 8 + 2) ++ do_div(rate, tp->srtt); ++ ++ sk->sk_pacing_rate = min_t(u64, rate, ~0U); ++} ++ + /* Calculate rto without backoff. This is the second half of Van Jacobson's + * routine referred to above. + */ +@@ -3330,7 +3358,7 @@ static int tcp_ack(struct sock *sk, cons + u32 ack_seq = TCP_SKB_CB(skb)->seq; + u32 ack = TCP_SKB_CB(skb)->ack_seq; + bool is_dupack = false; +- u32 prior_in_flight; ++ u32 prior_in_flight, prior_cwnd = tp->snd_cwnd, prior_rtt = tp->srtt; + u32 prior_fackets; + int prior_packets = tp->packets_out; + int prior_sacked = tp->sacked_out; +@@ -3438,6 +3466,8 @@ static int tcp_ack(struct sock *sk, cons + + if (icsk->icsk_pending == ICSK_TIME_RETRANS) + tcp_schedule_loss_probe(sk); ++ if (tp->srtt != prior_rtt || tp->snd_cwnd != prior_cwnd) ++ tcp_update_pacing_rate(sk); + return 1; + + no_queue: +@@ -5736,6 +5766,8 @@ int tcp_rcv_state_process(struct sock *s + } else + tcp_init_metrics(sk); + ++ tcp_update_pacing_rate(sk); ++ + /* Prevent spurious tcp_cwnd_restart() on + * first data packet. + */ +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -1623,7 +1623,7 @@ static bool tcp_tso_should_defer(struct + + /* If a full-sized TSO skb can be sent, do it. */ + if (limit >= min_t(unsigned int, sk->sk_gso_max_size, +- sk->sk_gso_max_segs * tp->mss_cache)) ++ tp->xmit_size_goal_segs * tp->mss_cache)) + goto send_now; + + /* Middle in queue won't get any more data, full sendable already? */ diff --git a/queue-3.10/tcp-tsq-can-use-a-dynamic-limit.patch b/queue-3.10/tcp-tsq-can-use-a-dynamic-limit.patch new file mode 100644 index 00000000000..68b7baf0ec4 --- /dev/null +++ b/queue-3.10/tcp-tsq-can-use-a-dynamic-limit.patch @@ -0,0 +1,107 @@ +From 5b54531c1fc7370d648dc6a1364565eebab58a61 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Fri, 27 Sep 2013 03:28:54 -0700 +Subject: tcp: TSQ can use a dynamic limit + +From: Eric Dumazet + +[ Upstream commit c9eeec26e32e087359160406f96e0949b3cc6f10 ] + +When TCP Small Queues was added, we used a sysctl to limit amount of +packets queues on Qdisc/device queues for a given TCP flow. + +Problem is this limit is either too big for low rates, or too small +for high rates. + +Now TCP stack has rate estimation in sk->sk_pacing_rate, and TSO +auto sizing, it can better control number of packets in Qdisc/device +queues. + +New limit is two packets or at least 1 to 2 ms worth of packets. + +Low rates flows benefit from this patch by having even smaller +number of packets in queues, allowing for faster recovery, +better RTT estimations. + +High rates flows benefit from this patch by allowing more than 2 packets +in flight as we had reports this was a limiting factor to reach line +rate. [ In particular if TX completion is delayed because of coalescing +parameters ] + +Example for a single flow on 10Gbp link controlled by FQ/pacing + +14 packets in flight instead of 2 + +$ tc -s -d qd +qdisc fq 8001: dev eth0 root refcnt 32 limit 10000p flow_limit 100p +buckets 1024 quantum 3028 initial_quantum 15140 + Sent 1168459366606 bytes 771822841 pkt (dropped 0, overlimits 0 +requeues 6822476) + rate 9346Mbit 771713pps backlog 953820b 14p requeues 6822476 + 2047 flow, 2046 inactive, 1 throttled, delay 15673 ns + 2372 gc, 0 highprio, 0 retrans, 9739249 throttled, 0 flows_plimit + +Note that sk_pacing_rate is currently set to twice the actual rate, but +this might be refined in the future when a flow is in congestion +avoidance. + +Additional change : skb->destructor should be set to tcp_wfree(). + +A future patch (for linux 3.13+) might remove tcp_limit_output_bytes + +Signed-off-by: Eric Dumazet +Cc: Wei Liu +Cc: Cong Wang +Cc: Yuchung Cheng +Cc: Neal Cardwell +Acked-by: Neal Cardwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_output.c | 17 +++++++++++------ + 1 file changed, 11 insertions(+), 6 deletions(-) + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -887,8 +887,7 @@ static int tcp_transmit_skb(struct sock + + skb_orphan(skb); + skb->sk = sk; +- skb->destructor = (sysctl_tcp_limit_output_bytes > 0) ? +- tcp_wfree : sock_wfree; ++ skb->destructor = tcp_wfree; + atomic_add(skb->truesize, &sk->sk_wmem_alloc); + + /* Build TCP header and checksum it. */ +@@ -1832,7 +1831,6 @@ static bool tcp_write_xmit(struct sock * + while ((skb = tcp_send_head(sk))) { + unsigned int limit; + +- + tso_segs = tcp_init_tso_segs(sk, skb, mss_now); + BUG_ON(!tso_segs); + +@@ -1861,13 +1859,20 @@ static bool tcp_write_xmit(struct sock * + break; + } + +- /* TSQ : sk_wmem_alloc accounts skb truesize, +- * including skb overhead. But thats OK. ++ /* TCP Small Queues : ++ * Control number of packets in qdisc/devices to two packets / or ~1 ms. ++ * This allows for : ++ * - better RTT estimation and ACK scheduling ++ * - faster recovery ++ * - high rates + */ +- if (atomic_read(&sk->sk_wmem_alloc) >= sysctl_tcp_limit_output_bytes) { ++ limit = max(skb->truesize, sk->sk_pacing_rate >> 10); ++ ++ if (atomic_read(&sk->sk_wmem_alloc) > limit) { + set_bit(TSQ_THROTTLED, &tp->tsq_flags); + break; + } ++ + limit = mss_now; + if (tso_segs > 1 && !tcp_urg_mode(tp)) + limit = tcp_mss_split_point(sk, skb, mss_now, diff --git a/queue-3.10/unix_diag-fix-info-leak.patch b/queue-3.10/unix_diag-fix-info-leak.patch new file mode 100644 index 00000000000..ba4228593d2 --- /dev/null +++ b/queue-3.10/unix_diag-fix-info-leak.patch @@ -0,0 +1,30 @@ +From 0359b57d2c8974e9288decf689ae6606f35a690b Mon Sep 17 00:00:00 2001 +From: Mathias Krause +Date: Mon, 30 Sep 2013 22:05:40 +0200 +Subject: unix_diag: fix info leak + +From: Mathias Krause + +[ Upstream commit 6865d1e834be84ddd5808d93d5035b492346c64a ] + +When filling the netlink message we miss to wipe the pad field, +therefore leak one byte of heap memory to userland. Fix this by +setting pad to 0. + +Signed-off-by: Mathias Krause +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/unix/diag.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/unix/diag.c ++++ b/net/unix/diag.c +@@ -124,6 +124,7 @@ static int sk_diag_fill(struct sock *sk, + rep->udiag_family = AF_UNIX; + rep->udiag_type = sk->sk_type; + rep->udiag_state = sk->sk_state; ++ rep->pad = 0; + rep->udiag_ino = sk_ino; + sock_diag_save_cookie(sk, rep->udiag_cookie); + diff --git a/queue-3.10/virtio-net-don-t-respond-to-cpu-hotplug-notifier-if-we-re-not-ready.patch b/queue-3.10/virtio-net-don-t-respond-to-cpu-hotplug-notifier-if-we-re-not-ready.patch new file mode 100644 index 00000000000..ba8e5be4ddb --- /dev/null +++ b/queue-3.10/virtio-net-don-t-respond-to-cpu-hotplug-notifier-if-we-re-not-ready.patch @@ -0,0 +1,56 @@ +From ea3c6dc6b83ed16f6c097a836284d06b4f37b925 Mon Sep 17 00:00:00 2001 +From: Jason Wang +Date: Tue, 15 Oct 2013 11:18:58 +0800 +Subject: virtio-net: don't respond to cpu hotplug notifier if we're not ready + +From: Jason Wang + +[ Upstream commit 3ab098df35f8b98b6553edc2e40234af512ba877 ] + +We're trying to re-configure the affinity unconditionally in cpu hotplug +callback. This may lead the issue during resuming from s3/s4 since + +- virt queues haven't been allocated at that time. +- it's unnecessary since thaw method will re-configure the affinity. + +Fix this issue by checking the config_enable and do nothing is we're not ready. + +The bug were introduced by commit 8de4b2f3ae90c8fc0f17eeaab87d5a951b66ee17 +(virtio-net: reset virtqueue affinity when doing cpu hotplug). + +Acked-by: Michael S. Tsirkin +Cc: Rusty Russell +Cc: Michael S. Tsirkin +Cc: Wanlong Gao +Reviewed-by: Wanlong Gao +Signed-off-by: Jason Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/virtio_net.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -1097,6 +1097,11 @@ static int virtnet_cpu_callback(struct n + { + struct virtnet_info *vi = container_of(nfb, struct virtnet_info, nb); + ++ mutex_lock(&vi->config_lock); ++ ++ if (!vi->config_enable) ++ goto done; ++ + switch(action & ~CPU_TASKS_FROZEN) { + case CPU_ONLINE: + case CPU_DOWN_FAILED: +@@ -1109,6 +1114,9 @@ static int virtnet_cpu_callback(struct n + default: + break; + } ++ ++done: ++ mutex_unlock(&vi->config_lock); + return NOTIFY_OK; + } + diff --git a/queue-3.10/virtio-net-fix-the-race-between-channels-setting-and-refill.patch b/queue-3.10/virtio-net-fix-the-race-between-channels-setting-and-refill.patch new file mode 100644 index 00000000000..b205d4ebf2f --- /dev/null +++ b/queue-3.10/virtio-net-fix-the-race-between-channels-setting-and-refill.patch @@ -0,0 +1,55 @@ +From 451bad27ad7ce05114f03127a4ceca35d35d58a2 Mon Sep 17 00:00:00 2001 +From: Jason Wang +Date: Thu, 4 Jul 2013 11:22:57 +0930 +Subject: virtio-net: fix the race between channels setting and refill + +From: Jason Wang + +[ Upstream commit 9b9cd8024a2882e896c65222aa421d461354e3f2 ] + +Commit 55257d72bd1c51f25106350f4983ec19f62ed1fa (virtio-net: fill only rx queues +which are being used) tries to refill on demand when changing the number of +channels by call try_refill_recv() directly, this may race: + +- the refill work who may do the refill in the same time +- the try_refill_recv() called in bh since napi was not disabled + +Which may led guest complain during setting channels: + +virtio_net virtio0: input.1:id 0 is not a head! + +Solve this issue by scheduling a refill work which can guarantee the +serialization of refill. + +Signed-off-by: Jason Wang +Cc: Sasha Levin +Cc: Rusty Russell +Cc: Michael S. Tsirkin +Signed-off-by: Rusty Russell +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/virtio_net.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -902,7 +902,6 @@ static int virtnet_set_queues(struct vir + struct scatterlist sg; + struct virtio_net_ctrl_mq s; + struct net_device *dev = vi->dev; +- int i; + + if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ)) + return 0; +@@ -916,10 +915,8 @@ static int virtnet_set_queues(struct vir + queue_pairs); + return -EINVAL; + } else { +- for (i = vi->curr_queue_pairs; i < queue_pairs; i++) +- if (!try_fill_recv(&vi->rq[i], GFP_KERNEL)) +- schedule_delayed_work(&vi->refill, 0); + vi->curr_queue_pairs = queue_pairs; ++ schedule_delayed_work(&vi->refill, 0); + } + + return 0; diff --git a/queue-3.10/virtio-net-refill-only-when-device-is-up-during-setting-queues.patch b/queue-3.10/virtio-net-refill-only-when-device-is-up-during-setting-queues.patch new file mode 100644 index 00000000000..708c37ad42a --- /dev/null +++ b/queue-3.10/virtio-net-refill-only-when-device-is-up-during-setting-queues.patch @@ -0,0 +1,50 @@ +From 5335e0d9c3736ef6fa59412d0ebe1cee855da925 Mon Sep 17 00:00:00 2001 +From: Jason Wang +Date: Tue, 15 Oct 2013 11:18:59 +0800 +Subject: virtio-net: refill only when device is up during setting queues + +From: Jason Wang + +[ Upstream commit 35ed159bfd96a7547ec277ed8b550c7cbd9841b6 ] + +We used to schedule the refill work unconditionally after changing the +number of queues. This may lead an issue if the device is not +up. Since we only try to cancel the work in ndo_stop(), this may cause +the refill work still work after removing the device. Fix this by only +schedule the work when device is up. + +The bug were introduce by commit 9b9cd8024a2882e896c65222aa421d461354e3f2. +(virtio-net: fix the race between channels setting and refill) + +Signed-off-by: Jason Wang +Cc: Rusty Russell +Cc: Michael S. Tsirkin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/virtio_net.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -916,7 +916,9 @@ static int virtnet_set_queues(struct vir + return -EINVAL; + } else { + vi->curr_queue_pairs = queue_pairs; +- schedule_delayed_work(&vi->refill, 0); ++ /* virtnet_open() will refill when device is going to up. */ ++ if (dev->flags & IFF_UP) ++ schedule_delayed_work(&vi->refill, 0); + } + + return 0; +@@ -1714,7 +1716,9 @@ static int virtnet_restore(struct virtio + vi->config_enable = true; + mutex_unlock(&vi->config_lock); + ++ rtnl_lock(); + virtnet_set_queues(vi, vi->curr_queue_pairs); ++ rtnl_unlock(); + + return 0; + } diff --git a/queue-3.10/vti-get-rid-of-nf-mark-rule-in-prerouting.patch b/queue-3.10/vti-get-rid-of-nf-mark-rule-in-prerouting.patch new file mode 100644 index 00000000000..2b3d94d5d96 --- /dev/null +++ b/queue-3.10/vti-get-rid-of-nf-mark-rule-in-prerouting.patch @@ -0,0 +1,125 @@ +From 71e72171bb0eaaf2e7b460829030b726da2fb54f Mon Sep 17 00:00:00 2001 +From: Christophe Gouault +Date: Tue, 8 Oct 2013 17:21:22 +0200 +Subject: vti: get rid of nf mark rule in prerouting + +From: Christophe Gouault + +[ Upstream commit 7263a5187f9e9de45fcb51349cf0e031142c19a1 ] + +This patch fixes and improves the use of vti interfaces (while +lightly changing the way of configuring them). + +Currently: + +- it is necessary to identify and mark inbound IPsec + packets destined to each vti interface, via netfilter rules in + the mangle table at prerouting hook. + +- the vti module cannot retrieve the right tunnel in input since + commit b9959fd3: vti tunnels all have an i_key, but the tunnel lookup + is done with flag TUNNEL_NO_KEY, so there no chance to retrieve them. + +- the i_key is used by the outbound processing as a mark to lookup + for the right SP and SA bundle. + +This patch uses the o_key to store the vti mark (instead of i_key) and +enables: + +- to avoid the need for previously marking the inbound skbuffs via a + netfilter rule. +- to properly retrieve the right tunnel in input, only based on the IPsec + packet outer addresses. +- to properly perform an inbound policy check (using the tunnel o_key + as a mark). +- to properly perform an outbound SPD and SAD lookup (using the tunnel + o_key as a mark). +- to keep the current mark of the skbuff. The skbuff mark is neither + used nor changed by the vti interface. Only the vti interface o_key + is used. + +SAs have a wildcard mark. +SPs have a mark equal to the vti interface o_key. + +The vti interface must be created as follows (i_key = 0, o_key = mark): + + ip link add vti1 mode vti local 1.1.1.1 remote 2.2.2.2 okey 1 + +The SPs attached to vti1 must be created as follows (mark = vti1 o_key): + + ip xfrm policy add dir out mark 1 tmpl src 1.1.1.1 dst 2.2.2.2 \ + proto esp mode tunnel + ip xfrm policy add dir in mark 1 tmpl src 2.2.2.2 dst 1.1.1.1 \ + proto esp mode tunnel + +The SAs are created with the default wildcard mark. There is no +distinction between global vs. vti SAs. Just their addresses will +possibly link them to a vti interface: + + ip xfrm state add src 1.1.1.1 dst 2.2.2.2 proto esp spi 1000 mode tunnel \ + enc "cbc(aes)" "azertyuiopqsdfgh" + + ip xfrm state add src 2.2.2.2 dst 1.1.1.1 proto esp spi 2000 mode tunnel \ + enc "cbc(aes)" "sqbdhgqsdjqjsdfh" + +To avoid matching "global" (not vti) SPs in vti interfaces, global SPs +should no use the default wildcard mark, but explicitly match mark 0. + +To avoid a double SPD lookup in input and output (in global and vti SPDs), +the NOPOLICY and NOXFRM options should be set on the vti interfaces: + + echo 1 > /proc/sys/net/ipv4/conf/vti1/disable_policy + echo 1 > /proc/sys/net/ipv4/conf/vti1/disable_xfrm + +The outgoing traffic is steered to vti1 by a route via the vti interface: + + ip route add 192.168.0.0/16 dev vti1 + +The incoming IPsec traffic is steered to vti1 because its outer addresses +match the vti1 tunnel configuration. + +Signed-off-by: Christophe Gouault +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_vti.c | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +--- a/net/ipv4/ip_vti.c ++++ b/net/ipv4/ip_vti.c +@@ -285,8 +285,17 @@ static int vti_rcv(struct sk_buff *skb) + tunnel = vti_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr); + if (tunnel != NULL) { + struct pcpu_tstats *tstats; ++ u32 oldmark = skb->mark; ++ int ret; + +- if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) ++ ++ /* temporarily mark the skb with the tunnel o_key, to ++ * only match policies with this mark. ++ */ ++ skb->mark = be32_to_cpu(tunnel->parms.o_key); ++ ret = xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb); ++ skb->mark = oldmark; ++ if (!ret) + return -1; + + tstats = this_cpu_ptr(tunnel->dev->tstats); +@@ -295,7 +304,6 @@ static int vti_rcv(struct sk_buff *skb) + tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); + +- skb->mark = 0; + secpath_reset(skb); + skb->dev = tunnel->dev; + return 1; +@@ -327,7 +335,7 @@ static netdev_tx_t vti_tunnel_xmit(struc + + memset(&fl4, 0, sizeof(fl4)); + flowi4_init_output(&fl4, tunnel->parms.link, +- be32_to_cpu(tunnel->parms.i_key), RT_TOS(tos), ++ be32_to_cpu(tunnel->parms.o_key), RT_TOS(tos), + RT_SCOPE_UNIVERSE, + IPPROTO_IPIP, 0, + dst, tiph->saddr, 0, 0); diff --git a/queue-3.10/wanxl-fix-info-leak-in-ioctl.patch b/queue-3.10/wanxl-fix-info-leak-in-ioctl.patch new file mode 100644 index 00000000000..5356ac7745b --- /dev/null +++ b/queue-3.10/wanxl-fix-info-leak-in-ioctl.patch @@ -0,0 +1,30 @@ +From a6140f3d48c273dcfa84762f9b32072518f32d56 Mon Sep 17 00:00:00 2001 +From: Salva Peiró +Date: Wed, 16 Oct 2013 12:46:50 +0200 +Subject: wanxl: fix info leak in ioctl + +From: Salva Peiró + +[ Upstream commit 2b13d06c9584b4eb773f1e80bbaedab9a1c344e1 ] + +The wanxl_ioctl() code fails to initialize the two padding bytes of +struct sync_serial_settings after the ->loopback member. Add an explicit +memset(0) before filling the structure to avoid the info leak. + +Signed-off-by: Salva Peiró +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/wan/wanxl.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/wan/wanxl.c ++++ b/drivers/net/wan/wanxl.c +@@ -355,6 +355,7 @@ static int wanxl_ioctl(struct net_device + ifr->ifr_settings.size = size; /* data size wanted */ + return -ENOBUFS; + } ++ memset(&line, 0, sizeof(line)); + line.clock_type = get_status(port)->clocking; + line.clock_rate = 0; + line.loopback = 0; diff --git a/queue-3.10/xen-netback-don-t-destroy-the-netdev-until-the-vif-is-shut-down.patch b/queue-3.10/xen-netback-don-t-destroy-the-netdev-until-the-vif-is-shut-down.patch new file mode 100644 index 00000000000..8b606c04978 --- /dev/null +++ b/queue-3.10/xen-netback-don-t-destroy-the-netdev-until-the-vif-is-shut-down.patch @@ -0,0 +1,125 @@ +From 410cea5ccb7486942d6d5e0e5bfc65b317c7fdf8 Mon Sep 17 00:00:00 2001 +From: Paul Durrant +Date: Tue, 8 Oct 2013 14:22:56 +0100 +Subject: xen-netback: Don't destroy the netdev until the vif is shut down + +From: Paul Durrant + +[ upstream commit id: 279f438e36c0a70b23b86d2090aeec50155034a9 ] + +Without this patch, if a frontend cycles through states Closing +and Closed (which Windows frontends need to do) then the netdev +will be destroyed and requires re-invocation of hotplug scripts +to restore state before the frontend can move to Connected. Thus +when udev is not in use the backend gets stuck in InitWait. + +With this patch, the netdev is left alone whilst the backend is +still online and is only de-registered and freed just prior to +destroying the vif (which is also nicely symmetrical with the +netdev allocation and registration being done during probe) so +no re-invocation of hotplug scripts is required. + +Signed-off-by: Paul Durrant +Cc: David Vrabel +Cc: Wei Liu +Cc: Ian Campbell +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/xen-netback/common.h | 1 + + drivers/net/xen-netback/interface.c | 12 ++++++++++-- + drivers/net/xen-netback/xenbus.c | 17 ++++++++++++----- + 3 files changed, 23 insertions(+), 7 deletions(-) + +--- a/drivers/net/xen-netback/common.h ++++ b/drivers/net/xen-netback/common.h +@@ -115,6 +115,7 @@ struct xenvif *xenvif_alloc(struct devic + int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref, + unsigned long rx_ring_ref, unsigned int evtchn); + void xenvif_disconnect(struct xenvif *vif); ++void xenvif_free(struct xenvif *vif); + + void xenvif_get(struct xenvif *vif); + void xenvif_put(struct xenvif *vif); +--- a/drivers/net/xen-netback/interface.c ++++ b/drivers/net/xen-netback/interface.c +@@ -304,6 +304,9 @@ struct xenvif *xenvif_alloc(struct devic + } + + netdev_dbg(dev, "Successfully created xenvif\n"); ++ ++ __module_get(THIS_MODULE); ++ + return vif; + } + +@@ -369,9 +372,14 @@ void xenvif_disconnect(struct xenvif *vi + if (vif->irq) + unbind_from_irqhandler(vif->irq, vif); + +- unregister_netdev(vif->dev); +- + xen_netbk_unmap_frontend_rings(vif); ++} ++ ++void xenvif_free(struct xenvif *vif) ++{ ++ unregister_netdev(vif->dev); + + free_netdev(vif->dev); ++ ++ module_put(THIS_MODULE); + } +--- a/drivers/net/xen-netback/xenbus.c ++++ b/drivers/net/xen-netback/xenbus.c +@@ -42,7 +42,7 @@ static int netback_remove(struct xenbus_ + if (be->vif) { + kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); + xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status"); +- xenvif_disconnect(be->vif); ++ xenvif_free(be->vif); + be->vif = NULL; + } + kfree(be); +@@ -203,9 +203,18 @@ static void disconnect_backend(struct xe + { + struct backend_info *be = dev_get_drvdata(&dev->dev); + ++ if (be->vif) ++ xenvif_disconnect(be->vif); ++} ++ ++static void destroy_backend(struct xenbus_device *dev) ++{ ++ struct backend_info *be = dev_get_drvdata(&dev->dev); ++ + if (be->vif) { ++ kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); + xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status"); +- xenvif_disconnect(be->vif); ++ xenvif_free(be->vif); + be->vif = NULL; + } + } +@@ -237,14 +246,11 @@ static void frontend_changed(struct xenb + case XenbusStateConnected: + if (dev->state == XenbusStateConnected) + break; +- backend_create_xenvif(be); + if (be->vif) + connect(be); + break; + + case XenbusStateClosing: +- if (be->vif) +- kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); + disconnect_backend(dev); + xenbus_switch_state(dev, XenbusStateClosing); + break; +@@ -253,6 +259,7 @@ static void frontend_changed(struct xenb + xenbus_switch_state(dev, XenbusStateClosed); + if (xenbus_dev_is_online(dev)) + break; ++ destroy_backend(dev); + /* fall through if not online */ + case XenbusStateUnknown: + device_unregister(&dev->dev);