From: Greg Kroah-Hartman Date: Sun, 16 Sep 2012 15:47:07 +0000 (-0700) Subject: 3.4-stable patches X-Git-Tag: v3.0.44~86 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=f2aa0679a7ee0d1d6c76e7d2350921cc3548110c;p=thirdparty%2Fkernel%2Fstable-queue.git 3.4-stable patches added patches: af_netlink-force-credentials-passing.patch af_packet-don-t-emit-packet-on-orig-fanout-group.patch af_packet-remove-bug-statement-in-tpacket_destruct_skb.patch atm-fix-info-leak-in-getsockopt-so_atmpvc.patch atm-fix-info-leak-via-getsockname.patch bluetooth-hci-fix-info-leak-in-getsockopt-hci_filter.patch bluetooth-hci-fix-info-leak-via-getsockname.patch bluetooth-l2cap-fix-info-leak-via-getsockname.patch bluetooth-rfcomm-fix-info-leak-in-getsockopt-bt_security.patch bluetooth-rfcomm-fix-info-leak-in-ioctl-rfcommgetdevlist.patch bluetooth-rfcomm-fix-info-leak-via-getsockname.patch bnx2x-fix-57840_mf-pci-id.patch dccp-fix-info-leak-via-getsockopt-dccp_sockopt_ccid_tx_info.patch gianfar-fix-default-tx-vlan-offload-feature-flag.patch ipv6-addrconf-avoid-calling-netdevice-notifiers-with-rcu-read-side-lock.patch ipvs-fix-info-leak-in-getsockopt-ip_vs_so_get_timeout.patch isdnloop-fix-and-simplify-isdnloop_init.patch l2tp-avoid-to-use-synchronize_rcu-in-tunnel-free-function.patch llc-fix-info-leak-via-getsockname.patch net-allow-driver-to-limit-number-of-gso-segments-per-skb.patch net-core-fix-potential-memory-leak-in-dev_set_alias.patch net-fix-info-leak-in-compat-dev_ifconf.patch net-ipv4-ipmr_expire_timer-causes-crash-when-removing-net-namespace.patch netlink-fix-possible-spoofing-from-non-root-processes.patch net_sched-gact-fix-potential-panic-in-tcf_gact.patch openvswitch-reset-upper-layer-protocol-info-on-internal-devices.patch pptp-lookup-route-with-the-proper-net-namespace.patch sfc-fix-maximum-number-of-tso-segments-and-minimum-tx-queue-size.patch sfc-fix-reporting-of-ipv4-full-filters-through-ethtool.patch tcp-apply-device-tso-segment-limit-earlier.patch tcp-fix-cwnd-reduction-for-non-sack-recovery.patch --- diff --git a/queue-3.4/af_netlink-force-credentials-passing.patch b/queue-3.4/af_netlink-force-credentials-passing.patch new file mode 100644 index 00000000000..66f1cd77cd9 --- /dev/null +++ b/queue-3.4/af_netlink-force-credentials-passing.patch @@ -0,0 +1,88 @@ +From 8e7046c42482be5c54a8eec27c64bc8270643920 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Tue, 21 Aug 2012 06:21:17 +0000 +Subject: af_netlink: force credentials passing [CVE-2012-3520] + + +From: Eric Dumazet + +[ Upstream commit e0e3cea46d31d23dc40df0a49a7a2c04fe8edfea ] + +Pablo Neira Ayuso discovered that avahi and +potentially NetworkManager accept spoofed Netlink messages because of a +kernel bug. The kernel passes all-zero SCM_CREDENTIALS ancillary data +to the receiver if the sender did not provide such data, instead of not +including any such data at all or including the correct data from the +peer (as it is the case with AF_UNIX). + +This bug was introduced in commit 16e572626961 +(af_unix: dont send SCM_CREDENTIALS by default) + +This patch forces passing credentials for netlink, as +before the regression. + +Another fix would be to not add SCM_CREDENTIALS in +netlink messages if not provided by the sender, but it +might break some programs. + +With help from Florian Weimer & Petr Matousek + +This issue is designated as CVE-2012-3520 + +Signed-off-by: Eric Dumazet +Cc: Petr Matousek +Cc: Florian Weimer +Cc: Pablo Neira Ayuso +Signed-off-by: David S. Miller +--- + include/net/scm.h | 4 +++- + net/netlink/af_netlink.c | 2 +- + net/unix/af_unix.c | 4 ++-- + 3 files changed, 6 insertions(+), 4 deletions(-) + +--- a/include/net/scm.h ++++ b/include/net/scm.h +@@ -71,9 +71,11 @@ static __inline__ void scm_destroy(struc + } + + static __inline__ int scm_send(struct socket *sock, struct msghdr *msg, +- struct scm_cookie *scm) ++ struct scm_cookie *scm, bool forcecreds) + { + memset(scm, 0, sizeof(*scm)); ++ if (forcecreds) ++ scm_set_cred(scm, task_tgid(current), current_cred()); + unix_get_peersec_dgram(sock, scm); + if (msg->msg_controllen <= 0) + return 0; +--- a/net/netlink/af_netlink.c ++++ b/net/netlink/af_netlink.c +@@ -1329,7 +1329,7 @@ static int netlink_sendmsg(struct kiocb + if (NULL == siocb->scm) + siocb->scm = &scm; + +- err = scm_send(sock, msg, siocb->scm); ++ err = scm_send(sock, msg, siocb->scm, true); + if (err < 0) + return err; + +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -1446,7 +1446,7 @@ static int unix_dgram_sendmsg(struct kio + if (NULL == siocb->scm) + siocb->scm = &tmp_scm; + wait_for_unix_gc(); +- err = scm_send(sock, msg, siocb->scm); ++ err = scm_send(sock, msg, siocb->scm, false); + if (err < 0) + return err; + +@@ -1607,7 +1607,7 @@ static int unix_stream_sendmsg(struct ki + if (NULL == siocb->scm) + siocb->scm = &tmp_scm; + wait_for_unix_gc(); +- err = scm_send(sock, msg, siocb->scm); ++ err = scm_send(sock, msg, siocb->scm, false); + if (err < 0) + return err; + diff --git a/queue-3.4/af_packet-don-t-emit-packet-on-orig-fanout-group.patch b/queue-3.4/af_packet-don-t-emit-packet-on-orig-fanout-group.patch new file mode 100644 index 00000000000..b35151acb70 --- /dev/null +++ b/queue-3.4/af_packet-don-t-emit-packet-on-orig-fanout-group.patch @@ -0,0 +1,100 @@ +From e8dc0ed4f35bce30376ee6ef71cb1316bf4b2257 Mon Sep 17 00:00:00 2001 +From: Eric Leblond +Date: Thu, 16 Aug 2012 22:02:58 +0000 +Subject: af_packet: don't emit packet on orig fanout group + + +From: Eric Leblond + +[ Upstream commit c0de08d04215031d68fa13af36f347a6cfa252ca ] + +If a packet is emitted on one socket in one group of fanout sockets, +it is transmitted again. It is thus read again on one of the sockets +of the fanout group. This result in a loop for software which +generate packets when receiving one. +This retransmission is not the intended behavior: a fanout group +must behave like a single socket. The packet should not be +transmitted on a socket if it originates from a socket belonging +to the same fanout group. + +This patch fixes the issue by changing the transmission check to +take fanout group info account. + +Reported-by: Aleksandr Kotov +Signed-off-by: Eric Leblond +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/netdevice.h | 2 ++ + net/core/dev.c | 16 ++++++++++++++-- + net/packet/af_packet.c | 9 +++++++++ + 3 files changed, 25 insertions(+), 2 deletions(-) + +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -1496,6 +1496,8 @@ struct packet_type { + struct sk_buff **(*gro_receive)(struct sk_buff **head, + struct sk_buff *skb); + int (*gro_complete)(struct sk_buff *skb); ++ bool (*id_match)(struct packet_type *ptype, ++ struct sock *sk); + void *af_packet_priv; + struct list_head list; + }; +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -1641,6 +1641,19 @@ static inline int deliver_skb(struct sk_ + return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); + } + ++static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb) ++{ ++ if (ptype->af_packet_priv == NULL) ++ return false; ++ ++ if (ptype->id_match) ++ return ptype->id_match(ptype, skb->sk); ++ else if ((struct sock *)ptype->af_packet_priv == skb->sk) ++ return true; ++ ++ return false; ++} ++ + /* + * Support routine. Sends outgoing frames to any network + * taps currently in use. +@@ -1658,8 +1671,7 @@ static void dev_queue_xmit_nit(struct sk + * they originated from - MvS (miquels@drinkel.ow.org) + */ + if ((ptype->dev == dev || !ptype->dev) && +- (ptype->af_packet_priv == NULL || +- (struct sock *)ptype->af_packet_priv != skb->sk)) { ++ (!skb_loop_sk(ptype, skb))) { + if (pt_prev) { + deliver_skb(skb2, pt_prev, skb->dev); + pt_prev = ptype; +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -1280,6 +1280,14 @@ static void __fanout_unlink(struct sock + spin_unlock(&f->lock); + } + ++bool match_fanout_group(struct packet_type *ptype, struct sock * sk) ++{ ++ if (ptype->af_packet_priv == (void*)((struct packet_sock *)sk)->fanout) ++ return true; ++ ++ return false; ++} ++ + static int fanout_add(struct sock *sk, u16 id, u16 type_flags) + { + struct packet_sock *po = pkt_sk(sk); +@@ -1332,6 +1340,7 @@ static int fanout_add(struct sock *sk, u + match->prot_hook.dev = po->prot_hook.dev; + match->prot_hook.func = packet_rcv_fanout; + match->prot_hook.af_packet_priv = match; ++ match->prot_hook.id_match = match_fanout_group; + dev_add_pack(&match->prot_hook); + list_add(&match->list, &fanout_list); + } diff --git a/queue-3.4/af_packet-remove-bug-statement-in-tpacket_destruct_skb.patch b/queue-3.4/af_packet-remove-bug-statement-in-tpacket_destruct_skb.patch new file mode 100644 index 00000000000..63fced46e2d --- /dev/null +++ b/queue-3.4/af_packet-remove-bug-statement-in-tpacket_destruct_skb.patch @@ -0,0 +1,49 @@ +From 20897991d4bef87c60d31987ee913bd9499da04d Mon Sep 17 00:00:00 2001 +From: "danborkmann@iogearbox.net" +Date: Fri, 10 Aug 2012 22:48:54 +0000 +Subject: af_packet: remove BUG statement in tpacket_destruct_skb + + +From: "danborkmann@iogearbox.net" + +[ Upstream commit 7f5c3e3a80e6654cf48dfba7cf94f88c6b505467 ] + +Here's a quote of the comment about the BUG macro from asm-generic/bug.h: + + Don't use BUG() or BUG_ON() unless there's really no way out; one + example might be detecting data structure corruption in the middle + of an operation that can't be backed out of. If the (sub)system + can somehow continue operating, perhaps with reduced functionality, + it's probably not BUG-worthy. + + If you're tempted to BUG(), think again: is completely giving up + really the *only* solution? There are usually better options, where + users don't need to reboot ASAP and can mostly shut down cleanly. + +In our case, the status flag of a ring buffer slot is managed from both sides, +the kernel space and the user space. This means that even though the kernel +side might work as expected, the user space screws up and changes this flag +right between the send(2) is triggered when the flag is changed to +TP_STATUS_SENDING and a given skb is destructed after some time. Then, this +will hit the BUG macro. As David suggested, the best solution is to simply +remove this statement since it cannot be used for kernel side internal +consistency checks. I've tested it and the system still behaves /stable/ in +this case, so in accordance with the above comment, we should rather remove it. + +Signed-off-by: Daniel Borkmann +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/packet/af_packet.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -1943,7 +1943,6 @@ static void tpacket_destruct_skb(struct + + if (likely(po->tx_ring.pg_vec)) { + ph = skb_shinfo(skb)->destructor_arg; +- BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING); + BUG_ON(atomic_read(&po->tx_ring.pending) == 0); + atomic_dec(&po->tx_ring.pending); + __packet_set_status(po, ph, TP_STATUS_AVAILABLE); diff --git a/queue-3.4/atm-fix-info-leak-in-getsockopt-so_atmpvc.patch b/queue-3.4/atm-fix-info-leak-in-getsockopt-so_atmpvc.patch new file mode 100644 index 00000000000..b5c09b3e048 --- /dev/null +++ b/queue-3.4/atm-fix-info-leak-in-getsockopt-so_atmpvc.patch @@ -0,0 +1,31 @@ +From 7c41d0277f7856e4dbc03b509c1e85d3d4ab2fee Mon Sep 17 00:00:00 2001 +From: Mathias Krause +Date: Wed, 15 Aug 2012 11:31:44 +0000 +Subject: atm: fix info leak in getsockopt(SO_ATMPVC) + + +From: Mathias Krause + +[ Upstream commit e862f1a9b7df4e8196ebec45ac62295138aa3fc2 ] + +The ATM code fails to initialize the two padding bytes of struct +sockaddr_atmpvc inserted for alignment. Add an explicit memset(0) +before filling the structure to avoid the info leak. + +Signed-off-by: Mathias Krause +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/atm/common.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/atm/common.c ++++ b/net/atm/common.c +@@ -812,6 +812,7 @@ int vcc_getsockopt(struct socket *sock, + + if (!vcc->dev || !test_bit(ATM_VF_ADDR, &vcc->flags)) + return -ENOTCONN; ++ memset(&pvc, 0, sizeof(pvc)); + pvc.sap_family = AF_ATMPVC; + pvc.sap_addr.itf = vcc->dev->number; + pvc.sap_addr.vpi = vcc->vpi; diff --git a/queue-3.4/atm-fix-info-leak-via-getsockname.patch b/queue-3.4/atm-fix-info-leak-via-getsockname.patch new file mode 100644 index 00000000000..7f4bd9c1f82 --- /dev/null +++ b/queue-3.4/atm-fix-info-leak-via-getsockname.patch @@ -0,0 +1,31 @@ +From 3eec186a22d707d45439c43acf7e69da1d4a9608 Mon Sep 17 00:00:00 2001 +From: Mathias Krause +Date: Wed, 15 Aug 2012 11:31:45 +0000 +Subject: atm: fix info leak via getsockname() + + +From: Mathias Krause + +[ Upstream commit 3c0c5cfdcd4d69ffc4b9c0907cec99039f30a50a ] + +The ATM code fails to initialize the two padding bytes of struct +sockaddr_atmpvc inserted for alignment. Add an explicit memset(0) +before filling the structure to avoid the info leak. + +Signed-off-by: Mathias Krause +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/atm/pvc.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/atm/pvc.c ++++ b/net/atm/pvc.c +@@ -95,6 +95,7 @@ static int pvc_getname(struct socket *so + return -ENOTCONN; + *sockaddr_len = sizeof(struct sockaddr_atmpvc); + addr = (struct sockaddr_atmpvc *)sockaddr; ++ memset(addr, 0, sizeof(*addr)); + addr->sap_family = AF_ATMPVC; + addr->sap_addr.itf = vcc->dev->number; + addr->sap_addr.vpi = vcc->vpi; diff --git a/queue-3.4/bluetooth-hci-fix-info-leak-in-getsockopt-hci_filter.patch b/queue-3.4/bluetooth-hci-fix-info-leak-in-getsockopt-hci_filter.patch new file mode 100644 index 00000000000..2b9855cf04a --- /dev/null +++ b/queue-3.4/bluetooth-hci-fix-info-leak-in-getsockopt-hci_filter.patch @@ -0,0 +1,35 @@ +From 816c81b7dc99c52dc28f7dc6fc5ab43f44783673 Mon Sep 17 00:00:00 2001 +From: Mathias Krause +Date: Wed, 15 Aug 2012 11:31:46 +0000 +Subject: Bluetooth: HCI - Fix info leak in getsockopt(HCI_FILTER) + + +From: Mathias Krause + +[ Upstream commit e15ca9a0ef9a86f0477530b0f44a725d67f889ee ] + +The HCI code fails to initialize the two padding bytes of struct +hci_ufilter before copying it to userland -- that for leaking two +bytes kernel stack. Add an explicit memset(0) before filling the +structure to avoid the info leak. + +Signed-off-by: Mathias Krause +Cc: Marcel Holtmann +Cc: Gustavo Padovan +Cc: Johan Hedberg +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bluetooth/hci_sock.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/bluetooth/hci_sock.c ++++ b/net/bluetooth/hci_sock.c +@@ -1016,6 +1016,7 @@ static int hci_sock_getsockopt(struct so + { + struct hci_filter *f = &hci_pi(sk)->filter; + ++ memset(&uf, 0, sizeof(uf)); + uf.type_mask = f->type_mask; + uf.opcode = f->opcode; + uf.event_mask[0] = *((u32 *) f->event_mask + 0); diff --git a/queue-3.4/bluetooth-hci-fix-info-leak-via-getsockname.patch b/queue-3.4/bluetooth-hci-fix-info-leak-via-getsockname.patch new file mode 100644 index 00000000000..cb732591463 --- /dev/null +++ b/queue-3.4/bluetooth-hci-fix-info-leak-via-getsockname.patch @@ -0,0 +1,35 @@ +From 52bacac2528761f7ab8d130d1a3c624e17f240a8 Mon Sep 17 00:00:00 2001 +From: Mathias Krause +Date: Wed, 15 Aug 2012 11:31:47 +0000 +Subject: Bluetooth: HCI - Fix info leak via getsockname() + + +From: Mathias Krause + +[ Upstream commit 3f68ba07b1da811bf383b4b701b129bfcb2e4988 ] + +The HCI code fails to initialize the hci_channel member of struct +sockaddr_hci and that for leaks two bytes kernel stack via the +getsockname() syscall. Initialize hci_channel with 0 to avoid the +info leak. + +Signed-off-by: Mathias Krause +Cc: Marcel Holtmann +Cc: Gustavo Padovan +Cc: Johan Hedberg +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bluetooth/hci_sock.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/bluetooth/hci_sock.c ++++ b/net/bluetooth/hci_sock.c +@@ -706,6 +706,7 @@ static int hci_sock_getname(struct socke + *addr_len = sizeof(*haddr); + haddr->hci_family = AF_BLUETOOTH; + haddr->hci_dev = hdev->id; ++ haddr->hci_channel= 0; + + release_sock(sk); + return 0; diff --git a/queue-3.4/bluetooth-l2cap-fix-info-leak-via-getsockname.patch b/queue-3.4/bluetooth-l2cap-fix-info-leak-via-getsockname.patch new file mode 100644 index 00000000000..2008c210a91 --- /dev/null +++ b/queue-3.4/bluetooth-l2cap-fix-info-leak-via-getsockname.patch @@ -0,0 +1,35 @@ +From 5354da9f7562082e55436859380ffb5f7ff21253 Mon Sep 17 00:00:00 2001 +From: Mathias Krause +Date: Wed, 15 Aug 2012 11:31:51 +0000 +Subject: Bluetooth: L2CAP - Fix info leak via getsockname() + + +From: Mathias Krause + +[ Upstream commit 792039c73cf176c8e39a6e8beef2c94ff46522ed ] + +The L2CAP code fails to initialize the l2_bdaddr_type member of struct +sockaddr_l2 and the padding byte added for alignment. It that for leaks +two bytes kernel stack via the getsockname() syscall. Add an explicit +memset(0) before filling the structure to avoid the info leak. + +Signed-off-by: Mathias Krause +Cc: Marcel Holtmann +Cc: Gustavo Padovan +Cc: Johan Hedberg +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bluetooth/l2cap_sock.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/bluetooth/l2cap_sock.c ++++ b/net/bluetooth/l2cap_sock.c +@@ -242,6 +242,7 @@ static int l2cap_sock_getname(struct soc + + BT_DBG("sock %p, sk %p", sock, sk); + ++ memset(la, 0, sizeof(struct sockaddr_l2)); + addr->sa_family = AF_BLUETOOTH; + *len = sizeof(struct sockaddr_l2); + diff --git a/queue-3.4/bluetooth-rfcomm-fix-info-leak-in-getsockopt-bt_security.patch b/queue-3.4/bluetooth-rfcomm-fix-info-leak-in-getsockopt-bt_security.patch new file mode 100644 index 00000000000..eb353a5b97e --- /dev/null +++ b/queue-3.4/bluetooth-rfcomm-fix-info-leak-in-getsockopt-bt_security.patch @@ -0,0 +1,35 @@ +From 34efe115a763215abd8b5f4672dea7ed2d8e292b Mon Sep 17 00:00:00 2001 +From: Mathias Krause +Date: Wed, 15 Aug 2012 11:31:48 +0000 +Subject: Bluetooth: RFCOMM - Fix info leak in getsockopt(BT_SECURITY) + + +From: Mathias Krause + +[ Upstream commit 9ad2de43f1aee7e7274a4e0d41465489299e344b ] + +The RFCOMM code fails to initialize the key_size member of struct +bt_security before copying it to userland -- that for leaking one +byte kernel stack. Initialize key_size with 0 to avoid the info +leak. + +Signed-off-by: Mathias Krause +Cc: Marcel Holtmann +Cc: Gustavo Padovan +Cc: Johan Hedberg +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bluetooth/rfcomm/sock.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/bluetooth/rfcomm/sock.c ++++ b/net/bluetooth/rfcomm/sock.c +@@ -836,6 +836,7 @@ static int rfcomm_sock_getsockopt(struct + } + + sec.level = rfcomm_pi(sk)->sec_level; ++ sec.key_size = 0; + + len = min_t(unsigned int, len, sizeof(sec)); + if (copy_to_user(optval, (char *) &sec, len)) diff --git a/queue-3.4/bluetooth-rfcomm-fix-info-leak-in-ioctl-rfcommgetdevlist.patch b/queue-3.4/bluetooth-rfcomm-fix-info-leak-in-ioctl-rfcommgetdevlist.patch new file mode 100644 index 00000000000..cbb0ce47155 --- /dev/null +++ b/queue-3.4/bluetooth-rfcomm-fix-info-leak-in-ioctl-rfcommgetdevlist.patch @@ -0,0 +1,39 @@ +From 31e77f71371f834c66ab1329ff7c0d55ef314b75 Mon Sep 17 00:00:00 2001 +From: Mathias Krause +Date: Wed, 15 Aug 2012 11:31:49 +0000 +Subject: Bluetooth: RFCOMM - Fix info leak in ioctl(RFCOMMGETDEVLIST) + + +From: Mathias Krause + +[ Upstream commit f9432c5ec8b1e9a09b9b0e5569e3c73db8de432a ] + +The RFCOMM code fails to initialize the two padding bytes of struct +rfcomm_dev_list_req inserted for alignment before copying it to +userland. Additionally there are two padding bytes in each instance of +struct rfcomm_dev_info. The ioctl() that for disclosures two bytes plus +dev_num times two bytes uninitialized kernel heap memory. + +Allocate the memory using kzalloc() to fix this issue. + +Signed-off-by: Mathias Krause +Cc: Marcel Holtmann +Cc: Gustavo Padovan +Cc: Johan Hedberg +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bluetooth/rfcomm/tty.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/bluetooth/rfcomm/tty.c ++++ b/net/bluetooth/rfcomm/tty.c +@@ -467,7 +467,7 @@ static int rfcomm_get_dev_list(void __us + + size = sizeof(*dl) + dev_num * sizeof(*di); + +- dl = kmalloc(size, GFP_KERNEL); ++ dl = kzalloc(size, GFP_KERNEL); + if (!dl) + return -ENOMEM; + diff --git a/queue-3.4/bluetooth-rfcomm-fix-info-leak-via-getsockname.patch b/queue-3.4/bluetooth-rfcomm-fix-info-leak-via-getsockname.patch new file mode 100644 index 00000000000..19e0a4c0586 --- /dev/null +++ b/queue-3.4/bluetooth-rfcomm-fix-info-leak-via-getsockname.patch @@ -0,0 +1,35 @@ +From cf376769b392b18638422874acfb7d2d7e532903 Mon Sep 17 00:00:00 2001 +From: Mathias Krause +Date: Wed, 15 Aug 2012 11:31:50 +0000 +Subject: Bluetooth: RFCOMM - Fix info leak via getsockname() + + +From: Mathias Krause + +[ Upstream commit 9344a972961d1a6d2c04d9008b13617bcb6ec2ef ] + +The RFCOMM code fails to initialize the trailing padding byte of struct +sockaddr_rc added for alignment. It that for leaks one byte kernel stack +via the getsockname() syscall. Add an explicit memset(0) before filling +the structure to avoid the info leak. + +Signed-off-by: Mathias Krause +Cc: Marcel Holtmann +Cc: Gustavo Padovan +Cc: Johan Hedberg +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bluetooth/rfcomm/sock.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/bluetooth/rfcomm/sock.c ++++ b/net/bluetooth/rfcomm/sock.c +@@ -546,6 +546,7 @@ static int rfcomm_sock_getname(struct so + + BT_DBG("sock %p, sk %p", sock, sk); + ++ memset(sa, 0, sizeof(*sa)); + sa->rc_family = AF_BLUETOOTH; + sa->rc_channel = rfcomm_pi(sk)->channel; + if (peer) diff --git a/queue-3.4/bnx2x-fix-57840_mf-pci-id.patch b/queue-3.4/bnx2x-fix-57840_mf-pci-id.patch new file mode 100644 index 00000000000..d1ab14af97c --- /dev/null +++ b/queue-3.4/bnx2x-fix-57840_mf-pci-id.patch @@ -0,0 +1,33 @@ +From f8a331d0027d463518c91746cdbe6a2a73e1b3f6 Mon Sep 17 00:00:00 2001 +From: Yuval Mintz +Date: Sun, 26 Aug 2012 00:35:45 +0000 +Subject: bnx2x: fix 57840_MF pci id + + +From: Yuval Mintz + +[ Upstream commit 5c879d2094946081af934739850c7260e8b25d3c ] + +Commit c3def943c7117d42caaed3478731ea7c3c87190e have added support for +new pci ids of the 57840 board, while failing to change the obsolete value +in 'pci_ids.h'. +This patch does so, allowing the probe of such devices. + +Signed-off-by: Yuval Mintz +Signed-off-by: Eilon Greenstein +Signed-off-by: David S. Miller +--- + include/linux/pci_ids.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/include/linux/pci_ids.h ++++ b/include/linux/pci_ids.h +@@ -2148,7 +2148,7 @@ + #define PCI_DEVICE_ID_TIGON3_5704S 0x16a8 + #define PCI_DEVICE_ID_NX2_57800_VF 0x16a9 + #define PCI_DEVICE_ID_NX2_5706S 0x16aa +-#define PCI_DEVICE_ID_NX2_57840_MF 0x16ab ++#define PCI_DEVICE_ID_NX2_57840_MF 0x16a4 + #define PCI_DEVICE_ID_NX2_5708S 0x16ac + #define PCI_DEVICE_ID_NX2_57840_VF 0x16ad + #define PCI_DEVICE_ID_NX2_57810_MF 0x16ae diff --git a/queue-3.4/dccp-fix-info-leak-via-getsockopt-dccp_sockopt_ccid_tx_info.patch b/queue-3.4/dccp-fix-info-leak-via-getsockopt-dccp_sockopt_ccid_tx_info.patch new file mode 100644 index 00000000000..07c726291d8 --- /dev/null +++ b/queue-3.4/dccp-fix-info-leak-via-getsockopt-dccp_sockopt_ccid_tx_info.patch @@ -0,0 +1,34 @@ +From 9bfa2df36d3059c9a32730e08685d9c6136c76e6 Mon Sep 17 00:00:00 2001 +From: Mathias Krause +Date: Wed, 15 Aug 2012 11:31:55 +0000 +Subject: dccp: fix info leak via getsockopt(DCCP_SOCKOPT_CCID_TX_INFO) + + +From: Mathias Krause + +[ Upstream commit 7b07f8eb75aa3097cdfd4f6eac3da49db787381d ] + +The CCID3 code fails to initialize the trailing padding bytes of struct +tfrc_tx_info added for alignment on 64 bit architectures. It that for +potentially leaks four bytes kernel stack via the getsockopt() syscall. +Add an explicit memset(0) before filling the structure to avoid the +info leak. + +Signed-off-by: Mathias Krause +Cc: Gerrit Renker +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dccp/ccids/ccid3.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/dccp/ccids/ccid3.c ++++ b/net/dccp/ccids/ccid3.c +@@ -531,6 +531,7 @@ static int ccid3_hc_tx_getsockopt(struct + case DCCP_SOCKOPT_CCID_TX_INFO: + if (len < sizeof(tfrc)) + return -EINVAL; ++ memset(&tfrc, 0, sizeof(tfrc)); + tfrc.tfrctx_x = hc->tx_x; + tfrc.tfrctx_x_recv = hc->tx_x_recv; + tfrc.tfrctx_x_calc = hc->tx_x_calc; diff --git a/queue-3.4/gianfar-fix-default-tx-vlan-offload-feature-flag.patch b/queue-3.4/gianfar-fix-default-tx-vlan-offload-feature-flag.patch new file mode 100644 index 00000000000..ea5197c2d5b --- /dev/null +++ b/queue-3.4/gianfar-fix-default-tx-vlan-offload-feature-flag.patch @@ -0,0 +1,35 @@ +From 4c1f1a9efe54407e076390dd156bf0b8d256010e Mon Sep 17 00:00:00 2001 +From: Claudiu Manoil +Date: Thu, 23 Aug 2012 21:46:25 +0000 +Subject: gianfar: fix default tx vlan offload feature flag + + +From: Claudiu Manoil + +[ Upstream commit e2c53be223aca36cf93eb6a0f6bafa079e78f52b ] + +Commit - +"b852b72 gianfar: fix bug caused by +87c288c6e9aa31720b72e2bc2d665e24e1653c3e" +disables by default (on mac init) the hw vlan tag insertion. +The "features" flags were not updated to reflect this, and +"ethtool -K" shows tx-vlan-offload to be "on" by default. + +Cc: Sebastian Poehn +Signed-off-by: Claudiu Manoil +Signed-off-by: David S. Miller +--- + drivers/net/ethernet/freescale/gianfar.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/freescale/gianfar.c ++++ b/drivers/net/ethernet/freescale/gianfar.c +@@ -1037,7 +1037,7 @@ static int gfar_probe(struct platform_de + + if (priv->device_flags & FSL_GIANFAR_DEV_HAS_VLAN) { + dev->hw_features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; +- dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; ++ dev->features |= NETIF_F_HW_VLAN_RX; + } + + if (priv->device_flags & FSL_GIANFAR_DEV_HAS_EXTENDED_HASH) { diff --git a/queue-3.4/ipv6-addrconf-avoid-calling-netdevice-notifiers-with-rcu-read-side-lock.patch b/queue-3.4/ipv6-addrconf-avoid-calling-netdevice-notifiers-with-rcu-read-side-lock.patch new file mode 100644 index 00000000000..dc5cd54b048 --- /dev/null +++ b/queue-3.4/ipv6-addrconf-avoid-calling-netdevice-notifiers-with-rcu-read-side-lock.patch @@ -0,0 +1,73 @@ +From 553d3d861eea733420633be512f1b800a2cfe4b6 Mon Sep 17 00:00:00 2001 +From: Ben Hutchings +Date: Tue, 14 Aug 2012 08:54:51 +0000 +Subject: ipv6: addrconf: Avoid calling netdevice notifiers with RCU read-side lock + + +From: Ben Hutchings + +[ Upstream commit 4acd4945cd1e1f92b20d14e349c6c6a52acbd42d ] + +Cong Wang reports that lockdep detected suspicious RCU usage while +enabling IPV6 forwarding: + + [ 1123.310275] =============================== + [ 1123.442202] [ INFO: suspicious RCU usage. ] + [ 1123.558207] 3.6.0-rc1+ #109 Not tainted + [ 1123.665204] ------------------------------- + [ 1123.768254] include/linux/rcupdate.h:430 Illegal context switch in RCU read-side critical section! + [ 1123.992320] + [ 1123.992320] other info that might help us debug this: + [ 1123.992320] + [ 1124.307382] + [ 1124.307382] rcu_scheduler_active = 1, debug_locks = 0 + [ 1124.522220] 2 locks held by sysctl/5710: + [ 1124.648364] #0: (rtnl_mutex){+.+.+.}, at: [] rtnl_trylock+0x15/0x17 + [ 1124.882211] #1: (rcu_read_lock){.+.+.+}, at: [] rcu_lock_acquire+0x0/0x29 + [ 1125.085209] + [ 1125.085209] stack backtrace: + [ 1125.332213] Pid: 5710, comm: sysctl Not tainted 3.6.0-rc1+ #109 + [ 1125.441291] Call Trace: + [ 1125.545281] [] lockdep_rcu_suspicious+0x109/0x112 + [ 1125.667212] [] rcu_preempt_sleep_check+0x45/0x47 + [ 1125.781838] [] __might_sleep+0x1e/0x19b +[...] + [ 1127.445223] [] call_netdevice_notifiers+0x4a/0x4f +[...] + [ 1127.772188] [] dev_disable_lro+0x32/0x6b + [ 1127.885174] [] dev_forward_change+0x30/0xcb + [ 1128.013214] [] addrconf_forward_change+0x85/0xc5 +[...] + +addrconf_forward_change() uses RCU iteration over the netdev list, +which is unnecessary since it already holds the RTNL lock. We also +cannot reasonably require netdevice notifier functions not to sleep. + +Reported-by: Cong Wang +Signed-off-by: Ben Hutchings +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/addrconf.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -493,8 +493,7 @@ static void addrconf_forward_change(stru + struct net_device *dev; + struct inet6_dev *idev; + +- rcu_read_lock(); +- for_each_netdev_rcu(net, dev) { ++ for_each_netdev(net, dev) { + idev = __in6_dev_get(dev); + if (idev) { + int changed = (!idev->cnf.forwarding) ^ (!newf); +@@ -503,7 +502,6 @@ static void addrconf_forward_change(stru + dev_forward_change(idev); + } + } +- rcu_read_unlock(); + } + + static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) diff --git a/queue-3.4/ipvs-fix-info-leak-in-getsockopt-ip_vs_so_get_timeout.patch b/queue-3.4/ipvs-fix-info-leak-in-getsockopt-ip_vs_so_get_timeout.patch new file mode 100644 index 00000000000..a09be63b732 --- /dev/null +++ b/queue-3.4/ipvs-fix-info-leak-in-getsockopt-ip_vs_so_get_timeout.patch @@ -0,0 +1,36 @@ +From 3b4af5fea7fc8180f427fb354a4917f395ca566b Mon Sep 17 00:00:00 2001 +From: Mathias Krause +Date: Wed, 15 Aug 2012 11:31:56 +0000 +Subject: ipvs: fix info leak in getsockopt(IP_VS_SO_GET_TIMEOUT) + + +From: Mathias Krause + +[ Upstream commit 2d8a041b7bfe1097af21441cb77d6af95f4f4680 ] + +If at least one of CONFIG_IP_VS_PROTO_TCP or CONFIG_IP_VS_PROTO_UDP is +not set, __ip_vs_get_timeouts() does not fully initialize the structure +that gets copied to userland and that for leaks up to 12 bytes of kernel +stack. Add an explicit memset(0) before passing the structure to +__ip_vs_get_timeouts() to avoid the info leak. + +Signed-off-by: Mathias Krause +Cc: Wensong Zhang +Cc: Simon Horman +Cc: Julian Anastasov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/ipvs/ip_vs_ctl.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/netfilter/ipvs/ip_vs_ctl.c ++++ b/net/netfilter/ipvs/ip_vs_ctl.c +@@ -2713,6 +2713,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cm + { + struct ip_vs_timeout_user t; + ++ memset(&t, 0, sizeof(t)); + __ip_vs_get_timeouts(net, &t); + if (copy_to_user(user, &t, sizeof(t)) != 0) + ret = -EFAULT; diff --git a/queue-3.4/isdnloop-fix-and-simplify-isdnloop_init.patch b/queue-3.4/isdnloop-fix-and-simplify-isdnloop_init.patch new file mode 100644 index 00000000000..331b406d8a9 --- /dev/null +++ b/queue-3.4/isdnloop-fix-and-simplify-isdnloop_init.patch @@ -0,0 +1,60 @@ +From 81b9718fac70322890f437f07ef9639c1c85291e Mon Sep 17 00:00:00 2001 +From: Wu Fengguang +Date: Thu, 2 Aug 2012 23:10:01 +0000 +Subject: isdnloop: fix and simplify isdnloop_init() + + +From: Wu Fengguang + +[ Upstream commit 77f00f6324cb97cf1df6f9c4aaeea6ada23abdb2 ] + +Fix a buffer overflow bug by removing the revision and printk. + +[ 22.016214] isdnloop-ISDN-driver Rev 1.11.6.7 +[ 22.097508] isdnloop: (loop0) virtual card added +[ 22.174400] Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: ffffffff83244972 +[ 22.174400] +[ 22.436157] Pid: 1, comm: swapper Not tainted 3.5.0-bisect-00018-gfa8bbb1-dirty #129 +[ 22.624071] Call Trace: +[ 22.720558] [] ? CallcNew+0x56/0x56 +[ 22.815248] [] panic+0x110/0x329 +[ 22.914330] [] ? isdnloop_init+0xaf/0xb1 +[ 23.014800] [] ? CallcNew+0x56/0x56 +[ 23.090763] [] __stack_chk_fail+0x2b/0x30 +[ 23.185748] [] isdnloop_init+0xaf/0xb1 + +Signed-off-by: Fengguang Wu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/isdn/isdnloop/isdnloop.c | 12 ------------ + 1 file changed, 12 deletions(-) + +--- a/drivers/isdn/isdnloop/isdnloop.c ++++ b/drivers/isdn/isdnloop/isdnloop.c +@@ -16,7 +16,6 @@ + #include + #include "isdnloop.h" + +-static char *revision = "$Revision: 1.11.6.7 $"; + static char *isdnloop_id = "loop0"; + + MODULE_DESCRIPTION("ISDN4Linux: Pseudo Driver that simulates an ISDN card"); +@@ -1494,17 +1493,6 @@ isdnloop_addcard(char *id1) + static int __init + isdnloop_init(void) + { +- char *p; +- char rev[10]; +- +- if ((p = strchr(revision, ':'))) { +- strcpy(rev, p + 1); +- p = strchr(rev, '$'); +- *p = 0; +- } else +- strcpy(rev, " ??? "); +- printk(KERN_NOTICE "isdnloop-ISDN-driver Rev%s\n", rev); +- + if (isdnloop_id) + return (isdnloop_addcard(isdnloop_id)); + diff --git a/queue-3.4/l2tp-avoid-to-use-synchronize_rcu-in-tunnel-free-function.patch b/queue-3.4/l2tp-avoid-to-use-synchronize_rcu-in-tunnel-free-function.patch new file mode 100644 index 00000000000..11b46cf6ff0 --- /dev/null +++ b/queue-3.4/l2tp-avoid-to-use-synchronize_rcu-in-tunnel-free-function.patch @@ -0,0 +1,45 @@ +From d10ea5b12fc752978f9055712db85e243ef7f2f1 Mon Sep 17 00:00:00 2001 +From: "xeb@mail.ru" +Date: Fri, 24 Aug 2012 01:07:38 +0000 +Subject: l2tp: avoid to use synchronize_rcu in tunnel free function + + +From: "xeb@mail.ru" + +[ Upstream commit 99469c32f79a32d8481f87be0d3c66dad286f4ec ] + +Avoid to use synchronize_rcu in l2tp_tunnel_free because context may be +atomic. + +Signed-off-by: Dmitry Kozlov +Signed-off-by: David S. Miller +--- + net/l2tp/l2tp_core.c | 3 +-- + net/l2tp/l2tp_core.h | 1 + + 2 files changed, 2 insertions(+), 2 deletions(-) + +--- a/net/l2tp/l2tp_core.c ++++ b/net/l2tp/l2tp_core.c +@@ -1253,11 +1253,10 @@ static void l2tp_tunnel_free(struct l2tp + /* Remove from tunnel list */ + spin_lock_bh(&pn->l2tp_tunnel_list_lock); + list_del_rcu(&tunnel->list); ++ kfree_rcu(tunnel, rcu); + spin_unlock_bh(&pn->l2tp_tunnel_list_lock); +- synchronize_rcu(); + + atomic_dec(&l2tp_tunnel_count); +- kfree(tunnel); + } + + /* Create a socket for the tunnel, if one isn't set up by +--- a/net/l2tp/l2tp_core.h ++++ b/net/l2tp/l2tp_core.h +@@ -157,6 +157,7 @@ struct l2tp_tunnel_cfg { + + struct l2tp_tunnel { + int magic; /* Should be L2TP_TUNNEL_MAGIC */ ++ struct rcu_head rcu; + rwlock_t hlist_lock; /* protect session_hlist */ + struct hlist_head session_hlist[L2TP_HASH_SIZE]; + /* hashed list of sessions, diff --git a/queue-3.4/llc-fix-info-leak-via-getsockname.patch b/queue-3.4/llc-fix-info-leak-via-getsockname.patch new file mode 100644 index 00000000000..5b4f1f61ab9 --- /dev/null +++ b/queue-3.4/llc-fix-info-leak-via-getsockname.patch @@ -0,0 +1,46 @@ +From f917ecb33ee2815b2d0ba711a11ad94c88ad4669 Mon Sep 17 00:00:00 2001 +From: Mathias Krause +Date: Wed, 15 Aug 2012 11:31:53 +0000 +Subject: llc: fix info leak via getsockname() + + +From: Mathias Krause + +[ Upstream commit 3592aaeb80290bda0f2cf0b5456c97bfc638b192 ] + +The LLC code wrongly returns 0, i.e. "success", when the socket is +zapped. Together with the uninitialized uaddrlen pointer argument from +sys_getsockname this leads to an arbitrary memory leak of up to 128 +bytes kernel stack via the getsockname() syscall. + +Return an error instead when the socket is zapped to prevent the info +leak. Also remove the unnecessary memset(0). We don't directly write to +the memory pointed by uaddr but memcpy() a local structure at the end of +the function that is properly initialized. + +Signed-off-by: Mathias Krause +Cc: Arnaldo Carvalho de Melo +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/llc/af_llc.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/net/llc/af_llc.c ++++ b/net/llc/af_llc.c +@@ -971,14 +971,13 @@ static int llc_ui_getname(struct socket + struct sockaddr_llc sllc; + struct sock *sk = sock->sk; + struct llc_sock *llc = llc_sk(sk); +- int rc = 0; ++ int rc = -EBADF; + + memset(&sllc, 0, sizeof(sllc)); + lock_sock(sk); + if (sock_flag(sk, SOCK_ZAPPED)) + goto out; + *uaddrlen = sizeof(sllc); +- memset(uaddr, 0, *uaddrlen); + if (peer) { + rc = -ENOTCONN; + if (sk->sk_state != TCP_ESTABLISHED) diff --git a/queue-3.4/net-allow-driver-to-limit-number-of-gso-segments-per-skb.patch b/queue-3.4/net-allow-driver-to-limit-number-of-gso-segments-per-skb.patch new file mode 100644 index 00000000000..89985c368b1 --- /dev/null +++ b/queue-3.4/net-allow-driver-to-limit-number-of-gso-segments-per-skb.patch @@ -0,0 +1,69 @@ +From 2c7347a56d6bd220a747ebdbe67c834ec708b3ce Mon Sep 17 00:00:00 2001 +From: Ben Hutchings +Date: Mon, 30 Jul 2012 15:57:00 +0000 +Subject: net: Allow driver to limit number of GSO segments per skb + + +From: Ben Hutchings + +[ Upstream commit 30b678d844af3305cda5953467005cebb5d7b687 ] + +A peer (or local user) may cause TCP to use a nominal MSS of as little +as 88 (actual MSS of 76 with timestamps). Given that we have a +sufficiently prodigious local sender and the peer ACKs quickly enough, +it is nevertheless possible to grow the window for such a connection +to the point that we will try to send just under 64K at once. This +results in a single skb that expands to 861 segments. + +In some drivers with TSO support, such an skb will require hundreds of +DMA descriptors; a substantial fraction of a TX ring or even more than +a full ring. The TX queue selected for the skb may stall and trigger +the TX watchdog repeatedly (since the problem skb will be retried +after the TX reset). This particularly affects sfc, for which the +issue is designated as CVE-2012-3412. + +Therefore: +1. Add the field net_device::gso_max_segs holding the device-specific + limit. +2. In netif_skb_features(), if the number of segments is too high then + mask out GSO features to force fall back to software GSO. + +Signed-off-by: Ben Hutchings +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/netdevice.h | 2 ++ + net/core/dev.c | 4 ++++ + 2 files changed, 6 insertions(+) + +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -1279,6 +1279,8 @@ struct net_device { + /* for setting kernel sock attribute on TCP connection setup */ + #define GSO_MAX_SIZE 65536 + unsigned int gso_max_size; ++#define GSO_MAX_SEGS 65535 ++ u16 gso_max_segs; + + #ifdef CONFIG_DCB + /* Data Center Bridging netlink ops */ +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -2121,6 +2121,9 @@ netdev_features_t netif_skb_features(str + __be16 protocol = skb->protocol; + netdev_features_t features = skb->dev->features; + ++ if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs) ++ features &= ~NETIF_F_GSO_MASK; ++ + if (protocol == htons(ETH_P_8021Q)) { + struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; + protocol = veh->h_vlan_encapsulated_proto; +@@ -5909,6 +5912,7 @@ struct net_device *alloc_netdev_mqs(int + dev_net_set(dev, &init_net); + + dev->gso_max_size = GSO_MAX_SIZE; ++ dev->gso_max_segs = GSO_MAX_SEGS; + + INIT_LIST_HEAD(&dev->napi_list); + INIT_LIST_HEAD(&dev->unreg_list); diff --git a/queue-3.4/net-core-fix-potential-memory-leak-in-dev_set_alias.patch b/queue-3.4/net-core-fix-potential-memory-leak-in-dev_set_alias.patch new file mode 100644 index 00000000000..a0f01549120 --- /dev/null +++ b/queue-3.4/net-core-fix-potential-memory-leak-in-dev_set_alias.patch @@ -0,0 +1,45 @@ +From b9d726ecad7fe940be99a2d326f3558864a7d32d Mon Sep 17 00:00:00 2001 +From: Alexey Khoroshilov +Date: Wed, 8 Aug 2012 00:33:25 +0000 +Subject: net/core: Fix potential memory leak in dev_set_alias() + + +From: Alexey Khoroshilov + +[ Upstream commit 7364e445f62825758fa61195d237a5b8ecdd06ec ] + +Do not leak memory by updating pointer with potentially NULL realloc return value. + +Found by Linux Driver Verification project (linuxtesting.org). + +Signed-off-by: Alexey Khoroshilov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -1056,6 +1056,8 @@ rollback: + */ + int dev_set_alias(struct net_device *dev, const char *alias, size_t len) + { ++ char *new_ifalias; ++ + ASSERT_RTNL(); + + if (len >= IFALIASZ) +@@ -1069,9 +1071,10 @@ int dev_set_alias(struct net_device *dev + return 0; + } + +- dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL); +- if (!dev->ifalias) ++ new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL); ++ if (!new_ifalias) + return -ENOMEM; ++ dev->ifalias = new_ifalias; + + strlcpy(dev->ifalias, alias, len+1); + return len; diff --git a/queue-3.4/net-fix-info-leak-in-compat-dev_ifconf.patch b/queue-3.4/net-fix-info-leak-in-compat-dev_ifconf.patch new file mode 100644 index 00000000000..8b7078c828a --- /dev/null +++ b/queue-3.4/net-fix-info-leak-in-compat-dev_ifconf.patch @@ -0,0 +1,33 @@ +From 5d925ae211c8c2f62e869f6259633da56944e48e Mon Sep 17 00:00:00 2001 +From: Mathias Krause +Date: Wed, 15 Aug 2012 11:31:57 +0000 +Subject: net: fix info leak in compat dev_ifconf() + + +From: Mathias Krause + +[ Upstream commit 43da5f2e0d0c69ded3d51907d9552310a6b545e8 ] + +The implementation of dev_ifconf() for the compat ioctl interface uses +an intermediate ifc structure allocated in userland for the duration of +the syscall. Though, it fails to initialize the padding bytes inserted +for alignment and that for leaks four bytes of kernel stack. Add an +explicit memset(0) before filling the structure to avoid the info leak. + +Signed-off-by: Mathias Krause +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/socket.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/socket.c ++++ b/net/socket.c +@@ -2658,6 +2658,7 @@ static int dev_ifconf(struct net *net, s + if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf))) + return -EFAULT; + ++ memset(&ifc, 0, sizeof(ifc)); + if (ifc32.ifcbuf == 0) { + ifc32.ifc_len = 0; + ifc.ifc_len = 0; diff --git a/queue-3.4/net-ipv4-ipmr_expire_timer-causes-crash-when-removing-net-namespace.patch b/queue-3.4/net-ipv4-ipmr_expire_timer-causes-crash-when-removing-net-namespace.patch new file mode 100644 index 00000000000..fc271507b9b --- /dev/null +++ b/queue-3.4/net-ipv4-ipmr_expire_timer-causes-crash-when-removing-net-namespace.patch @@ -0,0 +1,83 @@ +From bac567858178ff4d4aabb1d6b9eba9b32f71d08a Mon Sep 17 00:00:00 2001 +From: Francesco Ruggeri +Date: Fri, 24 Aug 2012 07:38:35 +0000 +Subject: net: ipv4: ipmr_expire_timer causes crash when removing net namespace + + +From: Francesco Ruggeri + +[ Upstream commit acbb219d5f53821b2d0080d047800410c0420ea1 ] + +When tearing down a net namespace, ipv4 mr_table structures are freed +without first deactivating their timers. This can result in a crash in +run_timer_softirq. +This patch mimics the corresponding behaviour in ipv6. +Locking and synchronization seem to be adequate. +We are about to kfree mrt, so existing code should already make sure that +no other references to mrt are pending or can be created by incoming traffic. +The functions invoked here do not cause new references to mrt or other +race conditions to be created. +Invoking del_timer_sync guarantees that ipmr_expire_timer is inactive. +Both ipmr_expire_process (whose completion we may have to wait in +del_timer_sync) and mroute_clean_tables internally use mfc_unres_lock +or other synchronizations when needed, and they both only modify mrt. + +Tested in Linux 3.4.8. + +Signed-off-by: Francesco Ruggeri +Signed-off-by: David S. Miller +--- + net/ipv4/ipmr.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +--- a/net/ipv4/ipmr.c ++++ b/net/ipv4/ipmr.c +@@ -124,6 +124,8 @@ static DEFINE_SPINLOCK(mfc_unres_lock); + static struct kmem_cache *mrt_cachep __read_mostly; + + static struct mr_table *ipmr_new_table(struct net *net, u32 id); ++static void ipmr_free_table(struct mr_table *mrt); ++ + static int ip_mr_forward(struct net *net, struct mr_table *mrt, + struct sk_buff *skb, struct mfc_cache *cache, + int local); +@@ -131,6 +133,7 @@ static int ipmr_cache_report(struct mr_t + struct sk_buff *pkt, vifi_t vifi, int assert); + static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, + struct mfc_cache *c, struct rtmsg *rtm); ++static void mroute_clean_tables(struct mr_table *mrt); + static void ipmr_expire_process(unsigned long arg); + + #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES +@@ -271,7 +274,7 @@ static void __net_exit ipmr_rules_exit(s + + list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { + list_del(&mrt->list); +- kfree(mrt); ++ ipmr_free_table(mrt); + } + fib_rules_unregister(net->ipv4.mr_rules_ops); + } +@@ -299,7 +302,7 @@ static int __net_init ipmr_rules_init(st + + static void __net_exit ipmr_rules_exit(struct net *net) + { +- kfree(net->ipv4.mrt); ++ ipmr_free_table(net->ipv4.mrt); + } + #endif + +@@ -336,6 +339,13 @@ static struct mr_table *ipmr_new_table(s + return mrt; + } + ++static void ipmr_free_table(struct mr_table *mrt) ++{ ++ del_timer_sync(&mrt->ipmr_expire_timer); ++ mroute_clean_tables(mrt); ++ kfree(mrt); ++} ++ + /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ + + static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) diff --git a/queue-3.4/net_sched-gact-fix-potential-panic-in-tcf_gact.patch b/queue-3.4/net_sched-gact-fix-potential-panic-in-tcf_gact.patch new file mode 100644 index 00000000000..fb1dbf79eb0 --- /dev/null +++ b/queue-3.4/net_sched-gact-fix-potential-panic-in-tcf_gact.patch @@ -0,0 +1,68 @@ +From b0ac7f146de5bfbfd1fee1ad8c865caa72ffcc70 Mon Sep 17 00:00:00 2001 +From: Hiroaki SHIMODA +Date: Fri, 3 Aug 2012 19:57:52 +0900 +Subject: net_sched: gact: Fix potential panic in tcf_gact(). + + +From: Hiroaki SHIMODA + +[ Upstream commit 696ecdc10622d86541f2e35cc16e15b6b3b1b67e ] + +gact_rand array is accessed by gact->tcfg_ptype whose value +is assumed to less than MAX_RAND, but any range checks are +not performed. + +So add a check in tcf_gact_init(). And in tcf_gact(), we can +reduce a branch. + +Signed-off-by: Hiroaki SHIMODA +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/act_gact.c | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +--- a/net/sched/act_gact.c ++++ b/net/sched/act_gact.c +@@ -67,6 +67,9 @@ static int tcf_gact_init(struct nlattr * + struct tcf_common *pc; + int ret = 0; + int err; ++#ifdef CONFIG_GACT_PROB ++ struct tc_gact_p *p_parm = NULL; ++#endif + + if (nla == NULL) + return -EINVAL; +@@ -82,6 +85,12 @@ static int tcf_gact_init(struct nlattr * + #ifndef CONFIG_GACT_PROB + if (tb[TCA_GACT_PROB] != NULL) + return -EOPNOTSUPP; ++#else ++ if (tb[TCA_GACT_PROB]) { ++ p_parm = nla_data(tb[TCA_GACT_PROB]); ++ if (p_parm->ptype >= MAX_RAND) ++ return -EINVAL; ++ } + #endif + + pc = tcf_hash_check(parm->index, a, bind, &gact_hash_info); +@@ -103,8 +112,7 @@ static int tcf_gact_init(struct nlattr * + spin_lock_bh(&gact->tcf_lock); + gact->tcf_action = parm->action; + #ifdef CONFIG_GACT_PROB +- if (tb[TCA_GACT_PROB] != NULL) { +- struct tc_gact_p *p_parm = nla_data(tb[TCA_GACT_PROB]); ++ if (p_parm) { + gact->tcfg_paction = p_parm->paction; + gact->tcfg_pval = p_parm->pval; + gact->tcfg_ptype = p_parm->ptype; +@@ -133,7 +141,7 @@ static int tcf_gact(struct sk_buff *skb, + + spin_lock(&gact->tcf_lock); + #ifdef CONFIG_GACT_PROB +- if (gact->tcfg_ptype && gact_rand[gact->tcfg_ptype] != NULL) ++ if (gact->tcfg_ptype) + action = gact_rand[gact->tcfg_ptype](gact); + else + action = gact->tcf_action; diff --git a/queue-3.4/netlink-fix-possible-spoofing-from-non-root-processes.patch b/queue-3.4/netlink-fix-possible-spoofing-from-non-root-processes.patch new file mode 100644 index 00000000000..0869649c06d --- /dev/null +++ b/queue-3.4/netlink-fix-possible-spoofing-from-non-root-processes.patch @@ -0,0 +1,73 @@ +From e419271426a339338348a40d97a5bd1f49eaa2c2 Mon Sep 17 00:00:00 2001 +From: Pablo Neira Ayuso +Date: Thu, 23 Aug 2012 02:09:11 +0000 +Subject: netlink: fix possible spoofing from non-root processes + + +From: Pablo Neira Ayuso + +[ Upstream commit 20e1db19db5d6b9e4e83021595eab0dc8f107bef ] + +Non-root user-space processes can send Netlink messages to other +processes that are well-known for being subscribed to Netlink +asynchronous notifications. This allows ilegitimate non-root +process to send forged messages to Netlink subscribers. + +The userspace process usually verifies the legitimate origin in +two ways: + +a) Socket credentials. If UID != 0, then the message comes from + some ilegitimate process and the message needs to be dropped. + +b) Netlink portID. In general, portID == 0 means that the origin + of the messages comes from the kernel. Thus, discarding any + message not coming from the kernel. + +However, ctnetlink sets the portID in event messages that has +been triggered by some user-space process, eg. conntrack utility. +So other processes subscribed to ctnetlink events, eg. conntrackd, +know that the event was triggered by some user-space action. + +Neither of the two ways to discard ilegitimate messages coming +from non-root processes can help for ctnetlink. + +This patch adds capability validation in case that dst_pid is set +in netlink_sendmsg(). This approach is aggressive since existing +applications using any Netlink bus to deliver messages between +two user-space processes will break. Note that the exception is +NETLINK_USERSOCK, since it is reserved for netlink-to-netlink +userspace communication. + +Still, if anyone wants that his Netlink bus allows netlink-to-netlink +userspace, then they can set NL_NONROOT_SEND. However, by default, +I don't think it makes sense to allow to use NETLINK_ROUTE to +communicate two processes that are sending no matter what information +that is not related to link/neighbouring/routing. They should be using +NETLINK_USERSOCK instead for that. + +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: David S. Miller +--- + net/netlink/af_netlink.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/netlink/af_netlink.c ++++ b/net/netlink/af_netlink.c +@@ -1340,7 +1340,8 @@ static int netlink_sendmsg(struct kiocb + dst_pid = addr->nl_pid; + dst_group = ffs(addr->nl_groups); + err = -EPERM; +- if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND)) ++ if ((dst_group || dst_pid) && ++ !netlink_capable(sock, NL_NONROOT_SEND)) + goto out; + } else { + dst_pid = nlk->dst_pid; +@@ -2115,6 +2116,7 @@ static void __init netlink_add_usersock_ + rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners); + nl_table[NETLINK_USERSOCK].module = THIS_MODULE; + nl_table[NETLINK_USERSOCK].registered = 1; ++ nl_table[NETLINK_USERSOCK].nl_nonroot = NL_NONROOT_SEND; + + netlink_table_ungrab(); + } diff --git a/queue-3.4/openvswitch-reset-upper-layer-protocol-info-on-internal-devices.patch b/queue-3.4/openvswitch-reset-upper-layer-protocol-info-on-internal-devices.patch new file mode 100644 index 00000000000..a8a94842fe7 --- /dev/null +++ b/queue-3.4/openvswitch-reset-upper-layer-protocol-info-on-internal-devices.patch @@ -0,0 +1,50 @@ +From c7df1099f1ecaae91421cabad25658cbad9e593e Mon Sep 17 00:00:00 2001 +From: Jesse Gross +Date: Fri, 25 May 2012 11:29:30 -0700 +Subject: openvswitch: Reset upper layer protocol info on internal devices. + + +From: Jesse Gross + +[ Upstream commit 7fe99e2d434eafeac0c57b279a77e5de39212636 ] + +It's possible that packets that are sent on internal devices (from +the OVS perspective) have already traversed the local IP stack. +After they go through the internal device, they will again travel +through the IP stack which may get confused by the presence of +existing information in the skb. The problem can be observed +when switching between namespaces. This clears out that information +to avoid problems but deliberately leaves other metadata alone. +This is to provide maximum flexibility in chaining together OVS +and other Linux components. + +Signed-off-by: Jesse Gross +Signed-off-by: Greg Kroah-Hartman +--- + net/openvswitch/vport-internal_dev.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/net/openvswitch/vport-internal_dev.c ++++ b/net/openvswitch/vport-internal_dev.c +@@ -24,6 +24,9 @@ + #include + #include + ++#include ++#include ++ + #include "datapath.h" + #include "vport-internal_dev.h" + #include "vport-netdev.h" +@@ -209,6 +212,11 @@ static int internal_dev_recv(struct vpor + int len; + + len = skb->len; ++ ++ skb_dst_drop(skb); ++ nf_reset(skb); ++ secpath_reset(skb); ++ + skb->dev = netdev; + skb->pkt_type = PACKET_HOST; + skb->protocol = eth_type_trans(skb, netdev); diff --git a/queue-3.4/pptp-lookup-route-with-the-proper-net-namespace.patch b/queue-3.4/pptp-lookup-route-with-the-proper-net-namespace.patch new file mode 100644 index 00000000000..75046168009 --- /dev/null +++ b/queue-3.4/pptp-lookup-route-with-the-proper-net-namespace.patch @@ -0,0 +1,44 @@ +From 1870842e24d5d6bb98dd922eddbdbfea6df73326 Mon Sep 17 00:00:00 2001 +From: Gao feng +Date: Tue, 7 Aug 2012 00:23:11 +0000 +Subject: pptp: lookup route with the proper net namespace + + +From: Gao feng + +[ Upstream commit 08252b32311c3fa84219ad794d640af7399b5485 ] + +pptp always use init_net as the net namespace to lookup +route, this will cause route lookup failed in container. + +because we already set the correct net namespace to struct +sock in pptp_create,so fix this by using sock_net(sk) to +replace &init_net. + +Signed-off-by: Gao feng +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ppp/pptp.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/ppp/pptp.c ++++ b/drivers/net/ppp/pptp.c +@@ -189,7 +189,7 @@ static int pptp_xmit(struct ppp_channel + if (sk_pppox(po)->sk_state & PPPOX_DEAD) + goto tx_error; + +- rt = ip_route_output_ports(&init_net, &fl4, NULL, ++ rt = ip_route_output_ports(sock_net(sk), &fl4, NULL, + opt->dst_addr.sin_addr.s_addr, + opt->src_addr.sin_addr.s_addr, + 0, 0, IPPROTO_GRE, +@@ -468,7 +468,7 @@ static int pptp_connect(struct socket *s + po->chan.private = sk; + po->chan.ops = &pptp_chan_ops; + +- rt = ip_route_output_ports(&init_net, &fl4, sk, ++ rt = ip_route_output_ports(sock_net(sk), &fl4, sk, + opt->dst_addr.sin_addr.s_addr, + opt->src_addr.sin_addr.s_addr, + 0, 0, diff --git a/queue-3.4/series b/queue-3.4/series new file mode 100644 index 00000000000..f67374d7fd8 --- /dev/null +++ b/queue-3.4/series @@ -0,0 +1,31 @@ +net-allow-driver-to-limit-number-of-gso-segments-per-skb.patch +sfc-fix-maximum-number-of-tso-segments-and-minimum-tx-queue-size.patch +tcp-apply-device-tso-segment-limit-earlier.patch +net_sched-gact-fix-potential-panic-in-tcf_gact.patch +isdnloop-fix-and-simplify-isdnloop_init.patch +pptp-lookup-route-with-the-proper-net-namespace.patch +net-core-fix-potential-memory-leak-in-dev_set_alias.patch +af_packet-remove-bug-statement-in-tpacket_destruct_skb.patch +ipv6-addrconf-avoid-calling-netdevice-notifiers-with-rcu-read-side-lock.patch +atm-fix-info-leak-in-getsockopt-so_atmpvc.patch +atm-fix-info-leak-via-getsockname.patch +bluetooth-hci-fix-info-leak-in-getsockopt-hci_filter.patch +bluetooth-hci-fix-info-leak-via-getsockname.patch +bluetooth-rfcomm-fix-info-leak-in-getsockopt-bt_security.patch +bluetooth-rfcomm-fix-info-leak-in-ioctl-rfcommgetdevlist.patch +bluetooth-rfcomm-fix-info-leak-via-getsockname.patch +bluetooth-l2cap-fix-info-leak-via-getsockname.patch +llc-fix-info-leak-via-getsockname.patch +dccp-fix-info-leak-via-getsockopt-dccp_sockopt_ccid_tx_info.patch +ipvs-fix-info-leak-in-getsockopt-ip_vs_so_get_timeout.patch +net-fix-info-leak-in-compat-dev_ifconf.patch +af_packet-don-t-emit-packet-on-orig-fanout-group.patch +af_netlink-force-credentials-passing.patch +netlink-fix-possible-spoofing-from-non-root-processes.patch +tcp-fix-cwnd-reduction-for-non-sack-recovery.patch +sfc-fix-reporting-of-ipv4-full-filters-through-ethtool.patch +gianfar-fix-default-tx-vlan-offload-feature-flag.patch +l2tp-avoid-to-use-synchronize_rcu-in-tunnel-free-function.patch +net-ipv4-ipmr_expire_timer-causes-crash-when-removing-net-namespace.patch +bnx2x-fix-57840_mf-pci-id.patch +openvswitch-reset-upper-layer-protocol-info-on-internal-devices.patch diff --git a/queue-3.4/sfc-fix-maximum-number-of-tso-segments-and-minimum-tx-queue-size.patch b/queue-3.4/sfc-fix-maximum-number-of-tso-segments-and-minimum-tx-queue-size.patch new file mode 100644 index 00000000000..2342a1fd796 --- /dev/null +++ b/queue-3.4/sfc-fix-maximum-number-of-tso-segments-and-minimum-tx-queue-size.patch @@ -0,0 +1,150 @@ +From a74f85023df0d73e9c9dbbe3f99dfd4253115d24 Mon Sep 17 00:00:00 2001 +From: Ben Hutchings +Date: Mon, 30 Jul 2012 15:57:44 +0000 +Subject: sfc: Fix maximum number of TSO segments and minimum TX queue size + + +From: Ben Hutchings + +[ Upstream commit 7e6d06f0de3f74ca929441add094518ae332257c ] + +Currently an skb requiring TSO may not fit within a minimum-size TX +queue. The TX queue selected for the skb may stall and trigger the TX +watchdog repeatedly (since the problem skb will be retried after the +TX reset). This issue is designated as CVE-2012-3412. + +Set the maximum number of TSO segments for our devices to 100. This +should make no difference to behaviour unless the actual MSS is less +than about 700. Increase the minimum TX queue size accordingly to +allow for 2 worst-case skbs, so that there will definitely be space +to add an skb after we wake a queue. + +To avoid invalidating existing configurations, change +efx_ethtool_set_ringparam() to fix up values that are too small rather +than returning -EINVAL. + +Signed-off-by: Ben Hutchings +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/sfc/efx.c | 6 ++++++ + drivers/net/ethernet/sfc/efx.h | 14 ++++++++++---- + drivers/net/ethernet/sfc/ethtool.c | 16 +++++++++++----- + drivers/net/ethernet/sfc/tx.c | 19 +++++++++++++++++++ + 4 files changed, 46 insertions(+), 9 deletions(-) + +--- a/drivers/net/ethernet/sfc/efx.c ++++ b/drivers/net/ethernet/sfc/efx.c +@@ -1498,6 +1498,11 @@ static int efx_probe_all(struct efx_nic + goto fail2; + } + ++ BUILD_BUG_ON(EFX_DEFAULT_DMAQ_SIZE < EFX_RXQ_MIN_ENT); ++ if (WARN_ON(EFX_DEFAULT_DMAQ_SIZE < EFX_TXQ_MIN_ENT(efx))) { ++ rc = -EINVAL; ++ goto fail3; ++ } + efx->rxq_entries = efx->txq_entries = EFX_DEFAULT_DMAQ_SIZE; + + rc = efx_probe_filters(efx); +@@ -2065,6 +2070,7 @@ static int efx_register_netdev(struct ef + net_dev->irq = efx->pci_dev->irq; + net_dev->netdev_ops = &efx_netdev_ops; + SET_ETHTOOL_OPS(net_dev, &efx_ethtool_ops); ++ net_dev->gso_max_segs = EFX_TSO_MAX_SEGS; + + rtnl_lock(); + +--- a/drivers/net/ethernet/sfc/efx.h ++++ b/drivers/net/ethernet/sfc/efx.h +@@ -30,6 +30,7 @@ extern netdev_tx_t + efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb); + extern void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index); + extern int efx_setup_tc(struct net_device *net_dev, u8 num_tc); ++extern unsigned int efx_tx_max_skb_descs(struct efx_nic *efx); + + /* RX */ + extern int efx_probe_rx_queue(struct efx_rx_queue *rx_queue); +@@ -52,10 +53,15 @@ extern void efx_schedule_slow_fill(struc + #define EFX_MAX_EVQ_SIZE 16384UL + #define EFX_MIN_EVQ_SIZE 512UL + +-/* The smallest [rt]xq_entries that the driver supports. Callers of +- * efx_wake_queue() assume that they can subsequently send at least one +- * skb. Falcon/A1 may require up to three descriptors per skb_frag. */ +-#define EFX_MIN_RING_SIZE (roundup_pow_of_two(2 * 3 * MAX_SKB_FRAGS)) ++/* Maximum number of TCP segments we support for soft-TSO */ ++#define EFX_TSO_MAX_SEGS 100 ++ ++/* The smallest [rt]xq_entries that the driver supports. RX minimum ++ * is a bit arbitrary. For TX, we must have space for at least 2 ++ * TSO skbs. ++ */ ++#define EFX_RXQ_MIN_ENT 128U ++#define EFX_TXQ_MIN_ENT(efx) (2 * efx_tx_max_skb_descs(efx)) + + /* Filters */ + extern int efx_probe_filters(struct efx_nic *efx); +--- a/drivers/net/ethernet/sfc/ethtool.c ++++ b/drivers/net/ethernet/sfc/ethtool.c +@@ -680,21 +680,27 @@ static int efx_ethtool_set_ringparam(str + struct ethtool_ringparam *ring) + { + struct efx_nic *efx = netdev_priv(net_dev); ++ u32 txq_entries; + + if (ring->rx_mini_pending || ring->rx_jumbo_pending || + ring->rx_pending > EFX_MAX_DMAQ_SIZE || + ring->tx_pending > EFX_MAX_DMAQ_SIZE) + return -EINVAL; + +- if (ring->rx_pending < EFX_MIN_RING_SIZE || +- ring->tx_pending < EFX_MIN_RING_SIZE) { ++ if (ring->rx_pending < EFX_RXQ_MIN_ENT) { + netif_err(efx, drv, efx->net_dev, +- "TX and RX queues cannot be smaller than %ld\n", +- EFX_MIN_RING_SIZE); ++ "RX queues cannot be smaller than %u\n", ++ EFX_RXQ_MIN_ENT); + return -EINVAL; + } + +- return efx_realloc_channels(efx, ring->rx_pending, ring->tx_pending); ++ txq_entries = max(ring->tx_pending, EFX_TXQ_MIN_ENT(efx)); ++ if (txq_entries != ring->tx_pending) ++ netif_warn(efx, drv, efx->net_dev, ++ "increasing TX queue size to minimum of %u\n", ++ txq_entries); ++ ++ return efx_realloc_channels(efx, ring->rx_pending, txq_entries); + } + + static int efx_ethtool_set_pauseparam(struct net_device *net_dev, +--- a/drivers/net/ethernet/sfc/tx.c ++++ b/drivers/net/ethernet/sfc/tx.c +@@ -119,6 +119,25 @@ efx_max_tx_len(struct efx_nic *efx, dma_ + return len; + } + ++unsigned int efx_tx_max_skb_descs(struct efx_nic *efx) ++{ ++ /* Header and payload descriptor for each output segment, plus ++ * one for every input fragment boundary within a segment ++ */ ++ unsigned int max_descs = EFX_TSO_MAX_SEGS * 2 + MAX_SKB_FRAGS; ++ ++ /* Possibly one more per segment for the alignment workaround */ ++ if (EFX_WORKAROUND_5391(efx)) ++ max_descs += EFX_TSO_MAX_SEGS; ++ ++ /* Possibly more for PCIe page boundaries within input fragments */ ++ if (PAGE_SIZE > EFX_PAGE_SIZE) ++ max_descs += max_t(unsigned int, MAX_SKB_FRAGS, ++ DIV_ROUND_UP(GSO_MAX_SIZE, EFX_PAGE_SIZE)); ++ ++ return max_descs; ++} ++ + /* + * Add a socket buffer to a TX queue + * diff --git a/queue-3.4/sfc-fix-reporting-of-ipv4-full-filters-through-ethtool.patch b/queue-3.4/sfc-fix-reporting-of-ipv4-full-filters-through-ethtool.patch new file mode 100644 index 00000000000..d8aaf31b558 --- /dev/null +++ b/queue-3.4/sfc-fix-reporting-of-ipv4-full-filters-through-ethtool.patch @@ -0,0 +1,31 @@ +From 818dbc46fc3a4d8d48534ff1a7647e534aacd71a Mon Sep 17 00:00:00 2001 +From: Ben Hutchings +Date: Wed, 15 Aug 2012 18:09:15 +0100 +Subject: sfc: Fix reporting of IPv4 full filters through ethtool + + +From: Ben Hutchings + +[ Upstream commit ac70b2e9a13423b5efa0178e081936ce6979aea5 ] + +ETHTOOL_GRXCLSRULE returns filters for a TCP/IPv4 or UDP/IPv4 4-tuple +with source and destination swapped. + +Signed-off-by: Ben Hutchings +--- + drivers/net/ethernet/sfc/ethtool.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/sfc/ethtool.c ++++ b/drivers/net/ethernet/sfc/ethtool.c +@@ -863,8 +863,8 @@ static int efx_ethtool_get_class_rule(st + &ip_entry->ip4dst, &ip_entry->pdst); + if (rc != 0) { + rc = efx_filter_get_ipv4_full( +- &spec, &proto, &ip_entry->ip4src, &ip_entry->psrc, +- &ip_entry->ip4dst, &ip_entry->pdst); ++ &spec, &proto, &ip_entry->ip4dst, &ip_entry->pdst, ++ &ip_entry->ip4src, &ip_entry->psrc); + EFX_WARN_ON_PARANOID(rc); + ip_mask->ip4src = ~0; + ip_mask->psrc = ~0; diff --git a/queue-3.4/tcp-apply-device-tso-segment-limit-earlier.patch b/queue-3.4/tcp-apply-device-tso-segment-limit-earlier.patch new file mode 100644 index 00000000000..27bdf85992d --- /dev/null +++ b/queue-3.4/tcp-apply-device-tso-segment-limit-earlier.patch @@ -0,0 +1,130 @@ +From c58dec6070b6a78f9fdb370857fe59324659b3a7 Mon Sep 17 00:00:00 2001 +From: Ben Hutchings +Date: Mon, 30 Jul 2012 16:11:42 +0000 +Subject: tcp: Apply device TSO segment limit earlier + + +From: Ben Hutchings + +[ Upstream commit 1485348d2424e1131ea42efc033cbd9366462b01 ] + +Cache the device gso_max_segs in sock::sk_gso_max_segs and use it to +limit the size of TSO skbs. This avoids the need to fall back to +software GSO for local TCP senders. + +Signed-off-by: Ben Hutchings +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/sock.h | 2 ++ + net/core/sock.c | 1 + + net/ipv4/tcp.c | 4 +++- + net/ipv4/tcp_cong.c | 3 ++- + net/ipv4/tcp_output.c | 21 ++++++++++++--------- + 5 files changed, 20 insertions(+), 11 deletions(-) + +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -216,6 +216,7 @@ struct cg_proto; + * @sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK) + * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4) + * @sk_gso_max_size: Maximum GSO segment size to build ++ * @sk_gso_max_segs: Maximum number of GSO segments + * @sk_lingertime: %SO_LINGER l_linger setting + * @sk_backlog: always used with the per-socket spinlock held + * @sk_callback_lock: used with the callbacks in the end of this struct +@@ -335,6 +336,7 @@ struct sock { + netdev_features_t sk_route_nocaps; + int sk_gso_type; + unsigned int sk_gso_max_size; ++ u16 sk_gso_max_segs; + int sk_rcvlowat; + unsigned long sk_lingertime; + struct sk_buff_head sk_error_queue; +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -1411,6 +1411,7 @@ void sk_setup_caps(struct sock *sk, stru + } else { + sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; + sk->sk_gso_max_size = dst->dev->gso_max_size; ++ sk->sk_gso_max_segs = dst->dev->gso_max_segs; + } + } + } +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -740,7 +740,9 @@ static unsigned int tcp_xmit_size_goal(s + old_size_goal + mss_now > xmit_size_goal)) { + xmit_size_goal = old_size_goal; + } else { +- tp->xmit_size_goal_segs = xmit_size_goal / mss_now; ++ tp->xmit_size_goal_segs = ++ min_t(u16, xmit_size_goal / mss_now, ++ sk->sk_gso_max_segs); + xmit_size_goal = tp->xmit_size_goal_segs * mss_now; + } + } +--- a/net/ipv4/tcp_cong.c ++++ b/net/ipv4/tcp_cong.c +@@ -291,7 +291,8 @@ int tcp_is_cwnd_limited(const struct soc + left = tp->snd_cwnd - in_flight; + if (sk_can_gso(sk) && + left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd && +- left * tp->mss_cache < sk->sk_gso_max_size) ++ left * tp->mss_cache < sk->sk_gso_max_size && ++ left < sk->sk_gso_max_segs) + return 1; + return left <= tcp_max_tso_deferred_mss(tp); + } +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -1318,21 +1318,21 @@ static void tcp_cwnd_validate(struct soc + * when we would be allowed to send the split-due-to-Nagle skb fully. + */ + static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb, +- unsigned int mss_now, unsigned int cwnd) ++ unsigned int mss_now, unsigned int max_segs) + { + const struct tcp_sock *tp = tcp_sk(sk); +- u32 needed, window, cwnd_len; ++ u32 needed, window, max_len; + + window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; +- cwnd_len = mss_now * cwnd; ++ max_len = mss_now * max_segs; + +- if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk))) +- return cwnd_len; ++ if (likely(max_len <= window && skb != tcp_write_queue_tail(sk))) ++ return max_len; + + needed = min(skb->len, window); + +- if (cwnd_len <= needed) +- return cwnd_len; ++ if (max_len <= needed) ++ return max_len; + + return needed - needed % mss_now; + } +@@ -1560,7 +1560,8 @@ static int tcp_tso_should_defer(struct s + limit = min(send_win, cong_win); + + /* If a full-sized TSO skb can be sent, do it. */ +- if (limit >= sk->sk_gso_max_size) ++ if (limit >= min_t(unsigned int, sk->sk_gso_max_size, ++ sk->sk_gso_max_segs * tp->mss_cache)) + goto send_now; + + /* Middle in queue won't get any more data, full sendable already? */ +@@ -1786,7 +1787,9 @@ static int tcp_write_xmit(struct sock *s + limit = mss_now; + if (tso_segs > 1 && !tcp_urg_mode(tp)) + limit = tcp_mss_split_point(sk, skb, mss_now, +- cwnd_quota); ++ min_t(unsigned int, ++ cwnd_quota, ++ sk->sk_gso_max_segs)); + + if (skb->len > limit && + unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) diff --git a/queue-3.4/tcp-fix-cwnd-reduction-for-non-sack-recovery.patch b/queue-3.4/tcp-fix-cwnd-reduction-for-non-sack-recovery.patch new file mode 100644 index 00000000000..4f57b25d571 --- /dev/null +++ b/queue-3.4/tcp-fix-cwnd-reduction-for-non-sack-recovery.patch @@ -0,0 +1,105 @@ +From 9e92914f3062caac3b7ae1c24bc64e6670bff1cd Mon Sep 17 00:00:00 2001 +From: Yuchung Cheng +Date: Thu, 23 Aug 2012 07:05:17 +0000 +Subject: tcp: fix cwnd reduction for non-sack recovery + + +From: Yuchung Cheng + +[ Upstream commit 7c4a56fec379ac0d7754e0d4da6a7361f1a4fe64 ] + +The cwnd reduction in fast recovery is based on the number of packets +newly delivered per ACK. For non-sack connections every DUPACK +signifies a packet has been delivered, but the sender mistakenly +skips counting them for cwnd reduction. + +The fix is to compute newly_acked_sacked after DUPACKs are accounted +in sacked_out for non-sack connections. + +Signed-off-by: Yuchung Cheng +Acked-by: Nandita Dukkipati +Acked-by: Neal Cardwell +Signed-off-by: David S. Miller +--- + net/ipv4/tcp_input.c | 15 +++++++-------- + 1 file changed, 7 insertions(+), 8 deletions(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -3037,13 +3037,14 @@ static void tcp_update_cwnd_in_recovery( + * tcp_xmit_retransmit_queue(). + */ + static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, +- int newly_acked_sacked, bool is_dupack, ++ int prior_sacked, bool is_dupack, + int flag) + { + struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && + (tcp_fackets_out(tp) > tp->reordering)); ++ int newly_acked_sacked = 0; + int fast_rexmit = 0, mib_idx; + + if (WARN_ON(!tp->packets_out && tp->sacked_out)) +@@ -3103,6 +3104,7 @@ static void tcp_fastretrans_alert(struct + tcp_add_reno_sack(sk); + } else + do_lost = tcp_try_undo_partial(sk, pkts_acked); ++ newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked; + break; + case TCP_CA_Loss: + if (flag & FLAG_DATA_ACKED) +@@ -3124,6 +3126,7 @@ static void tcp_fastretrans_alert(struct + if (is_dupack) + tcp_add_reno_sack(sk); + } ++ newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked; + + if (icsk->icsk_ca_state <= TCP_CA_Disorder) + tcp_try_undo_dsack(sk); +@@ -3695,7 +3698,6 @@ static int tcp_ack(struct sock *sk, cons + int prior_packets; + int prior_sacked = tp->sacked_out; + int pkts_acked = 0; +- int newly_acked_sacked = 0; + int frto_cwnd = 0; + + /* If the ack is older than previous acks +@@ -3768,8 +3770,6 @@ static int tcp_ack(struct sock *sk, cons + flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); + + pkts_acked = prior_packets - tp->packets_out; +- newly_acked_sacked = (prior_packets - prior_sacked) - +- (tp->packets_out - tp->sacked_out); + + if (tp->frto_counter) + frto_cwnd = tcp_process_frto(sk, flag); +@@ -3783,7 +3783,7 @@ static int tcp_ack(struct sock *sk, cons + tcp_may_raise_cwnd(sk, flag)) + tcp_cong_avoid(sk, ack, prior_in_flight); + is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); +- tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked, ++ tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, + is_dupack, flag); + } else { + if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) +@@ -3798,7 +3798,7 @@ static int tcp_ack(struct sock *sk, cons + no_queue: + /* If data was DSACKed, see if we can undo a cwnd reduction. */ + if (flag & FLAG_DSACKING_ACK) +- tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked, ++ tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, + is_dupack, flag); + /* If this ack opens up a zero window, clear backoff. It was + * being used to time the probes, and is probably far higher than +@@ -3818,8 +3818,7 @@ old_ack: + */ + if (TCP_SKB_CB(skb)->sacked) { + flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); +- newly_acked_sacked = tp->sacked_out - prior_sacked; +- tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked, ++ tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, + is_dupack, flag); + } +