From: Greg Kroah-Hartman Date: Fri, 6 Dec 2013 00:39:30 +0000 (-0800) Subject: 3.12-stable patches X-Git-Tag: v3.4.73~14 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=bcdb90105d2fb700e8c69919c6b7b6184a592ee8;p=thirdparty%2Fkernel%2Fstable-queue.git 3.12-stable patches added patches: 6lowpan-uncompression-of-traffic-class-field-was.patch af_packet-block-bh-in-prb_shutdown_retire_blk_timer.patch atm-idt77252-fix-dev-refcnt-leak.patch bonding-don-t-permit-to-use-arp-monitoring-in-802.3ad.patch bonding-fix-two-race-conditions-in.patch bonding-rcuify-bond_set_rx_mode.patch bridge-flush-br-s-address-entry-in-fdb-when-remove-the-bridge-dev.patch connector-improved-unaligned-access-error-fix.patch core-dev-do-not-ignore-dmac-in-dev_forward_skb.patch gro-clean-up-tcpx_gro_receive-checksum-verification.patch gro-only-verify-tcp-checksums-for-candidates.patch gso-handle-new-frag_list-of-frags-gro-packets.patch inet-fix-addr_len-msg-msg_namelen-assignment-in-recv_error-and-rxpmtu-functions.patch inet-fix-possible-seqlock-deadlocks.patch inet-prevent-leakage-of-uninitialized-memory-to-user-in-recv-syscalls.patch ip6_output-fragment-outgoing-reassembled-skb-properly.patch ip6tnl-fix-use-after-free-of-fb_tnl_dev.patch ipv4-fix-possible-seqlock-deadlock.patch ipv4-fix-race-in-concurrent-ip_route_input_slow.patch ipv6-fix-headroom-calculation-in-udp6_ufo_fragment.patch ipv6-fix-inet6_init-cleanup-order.patch ipv6-fix-leaking-uninitialized-port-number-of-offender-sockaddr.patch ipv6-fix-possible-seqlock-deadlock-in-ip6_finish_output2.patch ipv6-protect-for_each_sk_fl_rcu-in-mem_check-with.patch ipv6-use-rt6_get_dflt_router-to-get-default-router-in.patch isdnloop-use-strlcpy-instead-of-strcpy.patch macvtap-limit-head-length-of-skb-allocated.patch net-8139cp-fix-a-bug_on-triggered-by-wrong-bytes_compl.patch net-add-bug_on-if-kernel-advertises-msg_namelen-sizeof-struct-sockaddr_storage.patch net-clamp-msg_namelen-instead-of-returning-an-error.patch net-core-always-propagate-flag-changes-to-interfaces.patch netfilter-push-reasm-skb-through-instead-of-original-frag-skbs.patch net-fix-ip-rule-delete-table-256.patch net-mlx4_en-fixed-crash-when-port-type-is-changed.patch net-rework-recvmsg-handler-msg_name-and-msg_namelen-logic.patch net-smc91-fix-crash-regression-on-the-versatile.patch net-tcp-fix-panic-in-tcp_fastopen_cache_set.patch net-update-consumers-of-msg_more-to-recognize-msg_sendpage_notlast.patch net-x86-bpf-don-t-forget-to-free-sk_filter-v2.patch packet-fix-use-after-free-race-in-send-path-when-dev-is-released.patch ping-prevent-null-pointer-dereference-on-write-to-msg_name.patch pktgen-xfrm-update-ipv4-header-total-len-and-checksum-after-tranformation.patch pkt_sched-fq-change-classification-of-control.patch pkt_sched-fq-fix-pacing-for-small-frames.patch pkt_sched-fq-warn-users-using-defrate.patch r8169-check-aldps-bit-and-disable-it-if-enabled-for-the-8168g.patch random32-fix-off-by-one-in-seeding-requirement.patch sch_tbf-handle-too-small-burst.patch sit-fix-use-after-free-of-fb_tunnel_dev.patch tcp-don-t-update-snd_nxt-when-a-socket-is-switched-from-repair-mode.patch tcp-tsq-restore-minimal-amount-of-queueing.patch team-fix-master-carrier-set-when-user-linkup-is-enabled.patch tuntap-limit-head-length-of-skb-allocated.patch usbnet-fix-status-interrupt-urb-handling.patch via-velocity-fix-netif_receive_skb-use-in-irq-disabled-section.patch xen-netback-include-definition-of-csum_ipv6_magic.patch xfrm-fix-null-pointer-dereference-when-decoding-sessions.patch xfrm-release-dst-if-this-dst-is-improper-for-vti-tunnel.patch --- diff --git a/queue-3.12/6lowpan-uncompression-of-traffic-class-field-was.patch b/queue-3.12/6lowpan-uncompression-of-traffic-class-field-was.patch new file mode 100644 index 00000000000..d5f5d2cc1e2 --- /dev/null +++ b/queue-3.12/6lowpan-uncompression-of-traffic-class-field-was.patch @@ -0,0 +1,67 @@ +From foo@baz Thu Dec 5 16:16:38 PST 2013 +From: Jukka Rissanen +Date: Wed, 13 Nov 2013 11:03:39 +0200 +Subject: 6lowpan: Uncompression of traffic class field was + incorrect + +From: Jukka Rissanen + +[ Upstream commit 1188f05497e7bd2f2614b99c54adfbe7413d5749 ] + +If priority/traffic class field in IPv6 header is set (seen when +using ssh), the uncompression sets the TC and Flow fields incorrectly. + +Example: + +This is IPv6 header of a sent packet. Note the priority/TC (=1) in +the first byte. + +00000000: 61 00 00 00 00 2c 06 40 fe 80 00 00 00 00 00 00 +00000010: 02 02 72 ff fe c6 42 10 fe 80 00 00 00 00 00 00 +00000020: 02 1e ab ff fe 4c 52 57 + +This gets compressed like this in the sending side + +00000000: 72 31 04 06 02 1e ab ff fe 4c 52 57 ec c2 00 16 +00000010: aa 2d fe 92 86 4e be c6 .... + +In the receiving end, the packet gets uncompressed to this +IPv6 header + +00000000: 60 06 06 02 00 2a 1e 40 fe 80 00 00 00 00 00 00 +00000010: 02 02 72 ff fe c6 42 10 fe 80 00 00 00 00 00 00 +00000020: ab ff fe 4c 52 57 ec c2 + +First four bytes are set incorrectly and we have also lost +two bytes from destination address. + +The fix is to switch the case values in switch statement +when checking the TC field. + +Signed-off-by: Jukka Rissanen +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ieee802154/6lowpan.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/ieee802154/6lowpan.c ++++ b/net/ieee802154/6lowpan.c +@@ -957,7 +957,7 @@ lowpan_process_data(struct sk_buff *skb) + * Traffic class carried in-line + * ECN + DSCP (1 byte), Flow Label is elided + */ +- case 1: /* 10b */ ++ case 2: /* 10b */ + if (lowpan_fetch_skb_u8(skb, &tmp)) + goto drop; + +@@ -968,7 +968,7 @@ lowpan_process_data(struct sk_buff *skb) + * Flow Label carried in-line + * ECN + 2-bit Pad + Flow Label (3 bytes), DSCP is elided + */ +- case 2: /* 01b */ ++ case 1: /* 01b */ + if (lowpan_fetch_skb_u8(skb, &tmp)) + goto drop; + diff --git a/queue-3.12/af_packet-block-bh-in-prb_shutdown_retire_blk_timer.patch b/queue-3.12/af_packet-block-bh-in-prb_shutdown_retire_blk_timer.patch new file mode 100644 index 00000000000..87d8f71555e --- /dev/null +++ b/queue-3.12/af_packet-block-bh-in-prb_shutdown_retire_blk_timer.patch @@ -0,0 +1,45 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Veaceslav Falico +Date: Fri, 29 Nov 2013 09:53:23 +0100 +Subject: af_packet: block BH in prb_shutdown_retire_blk_timer() + +From: Veaceslav Falico + +[ Upstream commit ec6f809ff6f19fafba3212f6aff0dda71dfac8e8 ] + +Currently we're using plain spin_lock() in prb_shutdown_retire_blk_timer(), +however the timer might fire right in the middle and thus try to re-aquire +the same spinlock, leaving us in a endless loop. + +To fix that, use the spin_lock_bh() to block it. + +Fixes: f6fb8f100b80 ("af-packet: TPACKET_V3 flexible buffer implementation.") +CC: "David S. Miller" +CC: Daniel Borkmann +CC: Willem de Bruijn +CC: Phil Sutter +CC: Eric Dumazet +Reported-by: Jan Stancek +Tested-by: Jan Stancek +Signed-off-by: Veaceslav Falico +Acked-by: Daniel Borkmann +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/packet/af_packet.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -439,9 +439,9 @@ static void prb_shutdown_retire_blk_time + + pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc; + +- spin_lock(&rb_queue->lock); ++ spin_lock_bh(&rb_queue->lock); + pkc->delete_blk_timer = 1; +- spin_unlock(&rb_queue->lock); ++ spin_unlock_bh(&rb_queue->lock); + + prb_del_retire_blk_timer(pkc); + } diff --git a/queue-3.12/atm-idt77252-fix-dev-refcnt-leak.patch b/queue-3.12/atm-idt77252-fix-dev-refcnt-leak.patch new file mode 100644 index 00000000000..c02e119931d --- /dev/null +++ b/queue-3.12/atm-idt77252-fix-dev-refcnt-leak.patch @@ -0,0 +1,31 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Ying Xue +Date: Tue, 19 Nov 2013 18:09:27 +0800 +Subject: atm: idt77252: fix dev refcnt leak + +From: Ying Xue + +[ Upstream commit b5de4a22f157ca345cdb3575207bf46402414bc1 ] + +init_card() calls dev_get_by_name() to get a network deceive. But it +doesn't decrease network device reference count after the device is +used. + +Signed-off-by: Ying Xue +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/atm/idt77252.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/atm/idt77252.c ++++ b/drivers/atm/idt77252.c +@@ -3511,7 +3511,7 @@ static int init_card(struct atm_dev *dev + tmp = dev_get_by_name(&init_net, tname); /* jhs: was "tmp = dev_get(tname);" */ + if (tmp) { + memcpy(card->atmdev->esi, tmp->dev_addr, 6); +- ++ dev_put(tmp); + printk("%s: ESI %pM\n", card->name, card->atmdev->esi); + } + /* diff --git a/queue-3.12/bonding-don-t-permit-to-use-arp-monitoring-in-802.3ad.patch b/queue-3.12/bonding-don-t-permit-to-use-arp-monitoring-in-802.3ad.patch new file mode 100644 index 00000000000..dcf778f1369 --- /dev/null +++ b/queue-3.12/bonding-don-t-permit-to-use-arp-monitoring-in-802.3ad.patch @@ -0,0 +1,41 @@ +From foo@baz Thu Dec 5 16:16:37 PST 2013 +From: Veaceslav Falico +Date: Tue, 12 Nov 2013 15:37:40 +0100 +Subject: bonding: don't permit to use ARP monitoring in 802.3ad + mode + +From: Veaceslav Falico + +[ Upstream commit ec9f1d15db8185f63a2c3143dc1e90ba18541b08 ] + +Currently the ARP monitoring is not supported with 802.3ad, and it's +prohibited to use it via the module params. + +However we still can set it afterwards via sysfs, cause we only check for +*LB modes there. + +To fix this - add a check for 802.3ad mode in bonding_store_arp_interval. + +CC: Jay Vosburgh +CC: Andy Gospodarek +Signed-off-by: Veaceslav Falico +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_sysfs.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/drivers/net/bonding/bond_sysfs.c ++++ b/drivers/net/bonding/bond_sysfs.c +@@ -587,8 +587,9 @@ static ssize_t bonding_store_arp_interva + goto out; + } + if (bond->params.mode == BOND_MODE_ALB || +- bond->params.mode == BOND_MODE_TLB) { +- pr_info("%s: ARP monitoring cannot be used with ALB/TLB. Only MII monitoring is supported on %s.\n", ++ bond->params.mode == BOND_MODE_TLB || ++ bond->params.mode == BOND_MODE_8023AD) { ++ pr_info("%s: ARP monitoring cannot be used with ALB/TLB/802.3ad. Only MII monitoring is supported on %s.\n", + bond->dev->name, bond->dev->name); + ret = -EINVAL; + goto out; diff --git a/queue-3.12/bonding-fix-two-race-conditions-in.patch b/queue-3.12/bonding-fix-two-race-conditions-in.patch new file mode 100644 index 00000000000..3710abb573f --- /dev/null +++ b/queue-3.12/bonding-fix-two-race-conditions-in.patch @@ -0,0 +1,64 @@ +From foo@baz Thu Dec 5 16:16:38 PST 2013 +From: Nikolay Aleksandrov +Date: Wed, 13 Nov 2013 17:07:46 +0100 +Subject: bonding: fix two race conditions in + bond_store_updelay/downdelay + +From: Nikolay Aleksandrov + +[ Upstream commit b869ccfab1e324507fa3596e3e1308444fb68227 ] + +This patch fixes two race conditions between bond_store_updelay/downdelay +and bond_store_miimon which could lead to division by zero as miimon can +be set to 0 while either updelay/downdelay are being set and thus miss the +zero check in the beginning, the zero div happens because updelay/downdelay +are stored as new_value / bond->params.miimon. Use rtnl to synchronize with +miimon setting. + +CC: Jay Vosburgh +CC: Andy Gospodarek +CC: Veaceslav Falico +Signed-off-by: Nikolay Aleksandrov +Acked-by: Veaceslav Falico +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_sysfs.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/drivers/net/bonding/bond_sysfs.c ++++ b/drivers/net/bonding/bond_sysfs.c +@@ -760,6 +760,8 @@ static ssize_t bonding_store_downdelay(s + int new_value, ret = count; + struct bonding *bond = to_bond(d); + ++ if (!rtnl_trylock()) ++ return restart_syscall(); + if (!(bond->params.miimon)) { + pr_err("%s: Unable to set down delay as MII monitoring is disabled\n", + bond->dev->name); +@@ -793,6 +795,7 @@ static ssize_t bonding_store_downdelay(s + } + + out: ++ rtnl_unlock(); + return ret; + } + static DEVICE_ATTR(downdelay, S_IRUGO | S_IWUSR, +@@ -815,6 +818,8 @@ static ssize_t bonding_store_updelay(str + int new_value, ret = count; + struct bonding *bond = to_bond(d); + ++ if (!rtnl_trylock()) ++ return restart_syscall(); + if (!(bond->params.miimon)) { + pr_err("%s: Unable to set up delay as MII monitoring is disabled\n", + bond->dev->name); +@@ -848,6 +853,7 @@ static ssize_t bonding_store_updelay(str + } + + out: ++ rtnl_unlock(); + return ret; + } + static DEVICE_ATTR(updelay, S_IRUGO | S_IWUSR, diff --git a/queue-3.12/bonding-rcuify-bond_set_rx_mode.patch b/queue-3.12/bonding-rcuify-bond_set_rx_mode.patch new file mode 100644 index 00000000000..f76c828f794 --- /dev/null +++ b/queue-3.12/bonding-rcuify-bond_set_rx_mode.patch @@ -0,0 +1,65 @@ +From foo@baz Thu Dec 5 16:16:37 PST 2013 +From: Veaceslav Falico +Date: Sat, 28 Sep 2013 21:18:56 +0200 +Subject: bonding: RCUify bond_set_rx_mode() + +From: Veaceslav Falico + +[ Upstream commit b32418705107265dfca5edfe2b547643e53a732e ] + +Currently we rely on rtnl locking in bond_set_rx_mode(), however it's not +always the case: + +RTNL: assertion failed at drivers/net/bonding/bond_main.c (3391) +... + [] dump_stack+0x54/0x74 + [] bond_set_rx_mode+0xc7/0xd0 [bonding] + [] __dev_set_rx_mode+0x57/0xa0 + [] __dev_mc_add+0x58/0x70 + [] dev_mc_add+0x10/0x20 + [] igmp6_group_added+0x18e/0x1d0 + [] ? kmem_cache_alloc_trace+0x236/0x260 + [] ipv6_dev_mc_inc+0x29f/0x320 + [] ipv6_sock_mc_join+0x157/0x260 +... + +Fix this by using RCU primitives. + +Reported-by: Joe Lawrence +Tested-by: Joe Lawrence +CC: Jay Vosburgh +CC: Andy Gospodarek +Signed-off-by: Veaceslav Falico +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_main.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -3395,20 +3395,20 @@ static void bond_set_rx_mode(struct net_ + struct bonding *bond = netdev_priv(bond_dev); + struct slave *slave; + +- ASSERT_RTNL(); +- ++ rcu_read_lock(); + if (USES_PRIMARY(bond->params.mode)) { +- slave = rtnl_dereference(bond->curr_active_slave); ++ slave = rcu_dereference(bond->curr_active_slave); + if (slave) { + dev_uc_sync(slave->dev, bond_dev); + dev_mc_sync(slave->dev, bond_dev); + } + } else { +- bond_for_each_slave(bond, slave) { ++ bond_for_each_slave_rcu(bond, slave) { + dev_uc_sync_multiple(slave->dev, bond_dev); + dev_mc_sync_multiple(slave->dev, bond_dev); + } + } ++ rcu_read_unlock(); + } + + static int bond_neigh_init(struct neighbour *n) diff --git a/queue-3.12/bridge-flush-br-s-address-entry-in-fdb-when-remove-the-bridge-dev.patch b/queue-3.12/bridge-flush-br-s-address-entry-in-fdb-when-remove-the-bridge-dev.patch new file mode 100644 index 00000000000..9f6827b3608 --- /dev/null +++ b/queue-3.12/bridge-flush-br-s-address-entry-in-fdb-when-remove-the-bridge-dev.patch @@ -0,0 +1,67 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Ding Tianhong +Date: Sat, 7 Dec 2013 22:12:05 +0800 +Subject: bridge: flush br's address entry in fdb when remove the bridge dev + +From: Ding Tianhong + +[ Upstream commit f873042093c0b418d2351fe142222b625c740149 ] + +When the following commands are executed: + +brctl addbr br0 +ifconfig br0 hw ether +rmmod bridge + +The calltrace will occur: + +[ 563.312114] device eth1 left promiscuous mode +[ 563.312188] br0: port 1(eth1) entered disabled state +[ 563.468190] kmem_cache_destroy bridge_fdb_cache: Slab cache still has objects +[ 563.468197] CPU: 6 PID: 6982 Comm: rmmod Tainted: G O 3.12.0-0.7-default+ #9 +[ 563.468199] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 +[ 563.468200] 0000000000000880 ffff88010f111e98 ffffffff814d1c92 ffff88010f111eb8 +[ 563.468204] ffffffff81148efd ffff88010f111eb8 0000000000000000 ffff88010f111ec8 +[ 563.468206] ffffffffa062a270 ffff88010f111ed8 ffffffffa063ac76 ffff88010f111f78 +[ 563.468209] Call Trace: +[ 563.468218] [] dump_stack+0x6a/0x78 +[ 563.468234] [] kmem_cache_destroy+0xfd/0x100 +[ 563.468242] [] br_fdb_fini+0x10/0x20 [bridge] +[ 563.468247] [] br_deinit+0x4e/0x50 [bridge] +[ 563.468254] [] SyS_delete_module+0x199/0x2b0 +[ 563.468259] [] system_call_fastpath+0x16/0x1b +[ 570.377958] Bridge firewalling registered + +--------------------------- cut here ------------------------------- + +The reason is that when the bridge dev's address is changed, the +br_fdb_change_mac_address() will add new address in fdb, but when +the bridge was removed, the address entry in the fdb did not free, +the bridge_fdb_cache still has objects when destroy the cache, Fix +this by flushing the bridge address entry when removing the bridge. + +v2: according to the Toshiaki Makita and Vlad's suggestion, I only + delete the vlan0 entry, it still have a leak here if the vlan id + is other number, so I need to call fdb_delete_by_port(br, NULL, 1) + to flush all entries whose dst is NULL for the bridge. + +Suggested-by: Toshiaki Makita +Suggested-by: Vlad Yasevich +Signed-off-by: Ding Tianhong +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_if.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/bridge/br_if.c ++++ b/net/bridge/br_if.c +@@ -172,6 +172,8 @@ void br_dev_delete(struct net_device *de + del_nbp(p); + } + ++ br_fdb_delete_by_port(br, NULL, 1); ++ + del_timer_sync(&br->gc_timer); + + br_sysfs_delbr(br->dev); diff --git a/queue-3.12/connector-improved-unaligned-access-error-fix.patch b/queue-3.12/connector-improved-unaligned-access-error-fix.patch new file mode 100644 index 00000000000..f58195cc8ba --- /dev/null +++ b/queue-3.12/connector-improved-unaligned-access-error-fix.patch @@ -0,0 +1,260 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Chris Metcalf +Date: Thu, 14 Nov 2013 12:09:21 -0500 +Subject: connector: improved unaligned access error fix + +From: Chris Metcalf + +[ Upstream commit 1ca1a4cf59ea343a1a70084fe7cc96f37f3cf5b1 ] + +In af3e095a1fb4, Erik Jacobsen fixed one type of unaligned access +bug for ia64 by converting a 64-bit write to use put_unaligned(). +Unfortunately, since gcc will convert a short memset() to a series +of appropriately-aligned stores, the problem is now visible again +on tilegx, where the memset that zeros out proc_event is converted +to three 64-bit stores, causing an unaligned access panic. + +A better fix for the original problem is to ensure that proc_event +is aligned to 8 bytes here. We can do that relatively easily by +arranging to start the struct cn_msg aligned to 8 bytes and then +offset by 4 bytes. Doing so means that the immediately following +proc_event structure is then correctly aligned to 8 bytes. + +The result is that the memset() stores are now aligned, and as an +added benefit, we can remove the put_unaligned() calls in the code. + +Signed-off-by: Chris Metcalf +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/connector/cn_proc.c | 72 +++++++++++++++++++++++++------------------- + 1 file changed, 42 insertions(+), 30 deletions(-) + +--- a/drivers/connector/cn_proc.c ++++ b/drivers/connector/cn_proc.c +@@ -32,11 +32,23 @@ + #include + #include + +-#include +- + #include + +-#define CN_PROC_MSG_SIZE (sizeof(struct cn_msg) + sizeof(struct proc_event)) ++/* ++ * Size of a cn_msg followed by a proc_event structure. Since the ++ * sizeof struct cn_msg is a multiple of 4 bytes, but not 8 bytes, we ++ * add one 4-byte word to the size here, and then start the actual ++ * cn_msg structure 4 bytes into the stack buffer. The result is that ++ * the immediately following proc_event structure is aligned to 8 bytes. ++ */ ++#define CN_PROC_MSG_SIZE (sizeof(struct cn_msg) + sizeof(struct proc_event) + 4) ++ ++/* See comment above; we test our assumption about sizeof struct cn_msg here. */ ++static inline struct cn_msg *buffer_to_cn_msg(__u8 *buffer) ++{ ++ BUILD_BUG_ON(sizeof(struct cn_msg) != 20); ++ return (struct cn_msg *)(buffer + 4); ++} + + static atomic_t proc_event_num_listeners = ATOMIC_INIT(0); + static struct cb_id cn_proc_event_id = { CN_IDX_PROC, CN_VAL_PROC }; +@@ -56,19 +68,19 @@ void proc_fork_connector(struct task_str + { + struct cn_msg *msg; + struct proc_event *ev; +- __u8 buffer[CN_PROC_MSG_SIZE]; ++ __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); + struct timespec ts; + struct task_struct *parent; + + if (atomic_read(&proc_event_num_listeners) < 1) + return; + +- msg = (struct cn_msg *)buffer; ++ msg = buffer_to_cn_msg(buffer); + ev = (struct proc_event *)msg->data; + memset(&ev->event_data, 0, sizeof(ev->event_data)); + get_seq(&msg->seq, &ev->cpu); + ktime_get_ts(&ts); /* get high res monotonic timestamp */ +- put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); ++ ev->timestamp_ns = timespec_to_ns(&ts); + ev->what = PROC_EVENT_FORK; + rcu_read_lock(); + parent = rcu_dereference(task->real_parent); +@@ -91,17 +103,17 @@ void proc_exec_connector(struct task_str + struct cn_msg *msg; + struct proc_event *ev; + struct timespec ts; +- __u8 buffer[CN_PROC_MSG_SIZE]; ++ __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); + + if (atomic_read(&proc_event_num_listeners) < 1) + return; + +- msg = (struct cn_msg *)buffer; ++ msg = buffer_to_cn_msg(buffer); + ev = (struct proc_event *)msg->data; + memset(&ev->event_data, 0, sizeof(ev->event_data)); + get_seq(&msg->seq, &ev->cpu); + ktime_get_ts(&ts); /* get high res monotonic timestamp */ +- put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); ++ ev->timestamp_ns = timespec_to_ns(&ts); + ev->what = PROC_EVENT_EXEC; + ev->event_data.exec.process_pid = task->pid; + ev->event_data.exec.process_tgid = task->tgid; +@@ -117,14 +129,14 @@ void proc_id_connector(struct task_struc + { + struct cn_msg *msg; + struct proc_event *ev; +- __u8 buffer[CN_PROC_MSG_SIZE]; ++ __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); + struct timespec ts; + const struct cred *cred; + + if (atomic_read(&proc_event_num_listeners) < 1) + return; + +- msg = (struct cn_msg *)buffer; ++ msg = buffer_to_cn_msg(buffer); + ev = (struct proc_event *)msg->data; + memset(&ev->event_data, 0, sizeof(ev->event_data)); + ev->what = which_id; +@@ -145,7 +157,7 @@ void proc_id_connector(struct task_struc + rcu_read_unlock(); + get_seq(&msg->seq, &ev->cpu); + ktime_get_ts(&ts); /* get high res monotonic timestamp */ +- put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); ++ ev->timestamp_ns = timespec_to_ns(&ts); + + memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); + msg->ack = 0; /* not used */ +@@ -159,17 +171,17 @@ void proc_sid_connector(struct task_stru + struct cn_msg *msg; + struct proc_event *ev; + struct timespec ts; +- __u8 buffer[CN_PROC_MSG_SIZE]; ++ __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); + + if (atomic_read(&proc_event_num_listeners) < 1) + return; + +- msg = (struct cn_msg *)buffer; ++ msg = buffer_to_cn_msg(buffer); + ev = (struct proc_event *)msg->data; + memset(&ev->event_data, 0, sizeof(ev->event_data)); + get_seq(&msg->seq, &ev->cpu); + ktime_get_ts(&ts); /* get high res monotonic timestamp */ +- put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); ++ ev->timestamp_ns = timespec_to_ns(&ts); + ev->what = PROC_EVENT_SID; + ev->event_data.sid.process_pid = task->pid; + ev->event_data.sid.process_tgid = task->tgid; +@@ -186,17 +198,17 @@ void proc_ptrace_connector(struct task_s + struct cn_msg *msg; + struct proc_event *ev; + struct timespec ts; +- __u8 buffer[CN_PROC_MSG_SIZE]; ++ __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); + + if (atomic_read(&proc_event_num_listeners) < 1) + return; + +- msg = (struct cn_msg *)buffer; ++ msg = buffer_to_cn_msg(buffer); + ev = (struct proc_event *)msg->data; + memset(&ev->event_data, 0, sizeof(ev->event_data)); + get_seq(&msg->seq, &ev->cpu); + ktime_get_ts(&ts); /* get high res monotonic timestamp */ +- put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); ++ ev->timestamp_ns = timespec_to_ns(&ts); + ev->what = PROC_EVENT_PTRACE; + ev->event_data.ptrace.process_pid = task->pid; + ev->event_data.ptrace.process_tgid = task->tgid; +@@ -221,17 +233,17 @@ void proc_comm_connector(struct task_str + struct cn_msg *msg; + struct proc_event *ev; + struct timespec ts; +- __u8 buffer[CN_PROC_MSG_SIZE]; ++ __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); + + if (atomic_read(&proc_event_num_listeners) < 1) + return; + +- msg = (struct cn_msg *)buffer; ++ msg = buffer_to_cn_msg(buffer); + ev = (struct proc_event *)msg->data; + memset(&ev->event_data, 0, sizeof(ev->event_data)); + get_seq(&msg->seq, &ev->cpu); + ktime_get_ts(&ts); /* get high res monotonic timestamp */ +- put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); ++ ev->timestamp_ns = timespec_to_ns(&ts); + ev->what = PROC_EVENT_COMM; + ev->event_data.comm.process_pid = task->pid; + ev->event_data.comm.process_tgid = task->tgid; +@@ -248,18 +260,18 @@ void proc_coredump_connector(struct task + { + struct cn_msg *msg; + struct proc_event *ev; +- __u8 buffer[CN_PROC_MSG_SIZE]; ++ __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); + struct timespec ts; + + if (atomic_read(&proc_event_num_listeners) < 1) + return; + +- msg = (struct cn_msg *)buffer; ++ msg = buffer_to_cn_msg(buffer); + ev = (struct proc_event *)msg->data; + memset(&ev->event_data, 0, sizeof(ev->event_data)); + get_seq(&msg->seq, &ev->cpu); + ktime_get_ts(&ts); /* get high res monotonic timestamp */ +- put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); ++ ev->timestamp_ns = timespec_to_ns(&ts); + ev->what = PROC_EVENT_COREDUMP; + ev->event_data.coredump.process_pid = task->pid; + ev->event_data.coredump.process_tgid = task->tgid; +@@ -275,18 +287,18 @@ void proc_exit_connector(struct task_str + { + struct cn_msg *msg; + struct proc_event *ev; +- __u8 buffer[CN_PROC_MSG_SIZE]; ++ __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); + struct timespec ts; + + if (atomic_read(&proc_event_num_listeners) < 1) + return; + +- msg = (struct cn_msg *)buffer; ++ msg = buffer_to_cn_msg(buffer); + ev = (struct proc_event *)msg->data; + memset(&ev->event_data, 0, sizeof(ev->event_data)); + get_seq(&msg->seq, &ev->cpu); + ktime_get_ts(&ts); /* get high res monotonic timestamp */ +- put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); ++ ev->timestamp_ns = timespec_to_ns(&ts); + ev->what = PROC_EVENT_EXIT; + ev->event_data.exit.process_pid = task->pid; + ev->event_data.exit.process_tgid = task->tgid; +@@ -312,18 +324,18 @@ static void cn_proc_ack(int err, int rcv + { + struct cn_msg *msg; + struct proc_event *ev; +- __u8 buffer[CN_PROC_MSG_SIZE]; ++ __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); + struct timespec ts; + + if (atomic_read(&proc_event_num_listeners) < 1) + return; + +- msg = (struct cn_msg *)buffer; ++ msg = buffer_to_cn_msg(buffer); + ev = (struct proc_event *)msg->data; + memset(&ev->event_data, 0, sizeof(ev->event_data)); + msg->seq = rcvd_seq; + ktime_get_ts(&ts); /* get high res monotonic timestamp */ +- put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); ++ ev->timestamp_ns = timespec_to_ns(&ts); + ev->cpu = -1; + ev->what = PROC_EVENT_NONE; + ev->event_data.ack.err = err; diff --git a/queue-3.12/core-dev-do-not-ignore-dmac-in-dev_forward_skb.patch b/queue-3.12/core-dev-do-not-ignore-dmac-in-dev_forward_skb.patch new file mode 100644 index 00000000000..b849fe710d2 --- /dev/null +++ b/queue-3.12/core-dev-do-not-ignore-dmac-in-dev_forward_skb.patch @@ -0,0 +1,71 @@ +From foo@baz Thu Dec 5 16:16:38 PST 2013 +From: Alexei Starovoitov +Date: Tue, 12 Nov 2013 14:39:13 -0800 +Subject: core/dev: do not ignore dmac in dev_forward_skb() + +From: Alexei Starovoitov + +[ Upstream commit 81b9eab5ebbf0d5d54da4fc168cfb02c2adc76b8 ] + +commit 06a23fe31ca3 +("core/dev: set pkt_type after eth_type_trans() in dev_forward_skb()") +and refactoring 64261f230a91 +("dev: move skb_scrub_packet() after eth_type_trans()") + +are forcing pkt_type to be PACKET_HOST when skb traverses veth. + +which means that ip forwarding will kick in inside netns +even if skb->eth->h_dest != dev->dev_addr + +Fix order of eth_type_trans() and skb_scrub_packet() in dev_forward_skb() +and in ip_tunnel_rcv() + +Fixes: 06a23fe31ca3 ("core/dev: set pkt_type after eth_type_trans() in dev_forward_skb()") +CC: Isaku Yamahata +CC: Maciej Zenczykowski +CC: Nicolas Dichtel +Signed-off-by: Alexei Starovoitov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 6 +----- + net/ipv4/ip_tunnel.c | 4 ++-- + 2 files changed, 3 insertions(+), 7 deletions(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -1691,13 +1691,9 @@ int dev_forward_skb(struct net_device *d + kfree_skb(skb); + return NET_RX_DROP; + } +- skb->protocol = eth_type_trans(skb, dev); + +- /* eth_type_trans() can set pkt_type. +- * call skb_scrub_packet() after it to clear pkt_type _after_ calling +- * eth_type_trans(). +- */ + skb_scrub_packet(skb, true); ++ skb->protocol = eth_type_trans(skb, dev); + + return netif_rx(skb); + } +--- a/net/ipv4/ip_tunnel.c ++++ b/net/ipv4/ip_tunnel.c +@@ -454,6 +454,8 @@ int ip_tunnel_rcv(struct ip_tunnel *tunn + tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); + ++ skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev))); ++ + if (tunnel->dev->type == ARPHRD_ETHER) { + skb->protocol = eth_type_trans(skb, tunnel->dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); +@@ -461,8 +463,6 @@ int ip_tunnel_rcv(struct ip_tunnel *tunn + skb->dev = tunnel->dev; + } + +- skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev))); +- + gro_cells_receive(&tunnel->gro_cells, skb); + return 0; + diff --git a/queue-3.12/gro-clean-up-tcpx_gro_receive-checksum-verification.patch b/queue-3.12/gro-clean-up-tcpx_gro_receive-checksum-verification.patch new file mode 100644 index 00000000000..556416538d9 --- /dev/null +++ b/queue-3.12/gro-clean-up-tcpx_gro_receive-checksum-verification.patch @@ -0,0 +1,122 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Herbert Xu +Date: Fri, 22 Nov 2013 10:32:11 +0800 +Subject: gro: Clean up tcpX_gro_receive checksum verification + +From: Herbert Xu + +[ Upstream commit b8ee93ba80b5a0b6c3c06b65c34dd1276f16c047 ] + +This patch simplifies the checksum verification in tcpX_gro_receive +by reusing the CHECKSUM_COMPLETE code for CHECKSUM_NONE. All it +does for CHECKSUM_NONE is compute the partial checksum and then +treat it as if it came from the hardware (CHECKSUM_COMPLETE). + +Signed-off-by: Herbert Xu + +Cheers, +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_offload.c | 26 ++++++++++---------------- + net/ipv6/tcpv6_offload.c | 27 ++++++++++----------------- + 2 files changed, 20 insertions(+), 33 deletions(-) + +--- a/net/ipv4/tcp_offload.c ++++ b/net/ipv4/tcp_offload.c +@@ -272,35 +272,29 @@ static struct sk_buff **tcp4_gro_receive + { + const struct iphdr *iph = skb_gro_network_header(skb); + __wsum wsum; +- __sum16 sum; + + /* Don't bother verifying checksum if we're going to flush anyway. */ + if (NAPI_GRO_CB(skb)->flush) + goto skip_csum; + ++ wsum = skb->csum; ++ + switch (skb->ip_summed) { ++ case CHECKSUM_NONE: ++ wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), ++ 0); ++ ++ /* fall through */ ++ + case CHECKSUM_COMPLETE: + if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, +- skb->csum)) { ++ wsum)) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + break; + } +-flush: ++ + NAPI_GRO_CB(skb)->flush = 1; + return NULL; +- +- case CHECKSUM_NONE: +- wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr, +- skb_gro_len(skb), IPPROTO_TCP, 0); +- sum = csum_fold(skb_checksum(skb, +- skb_gro_offset(skb), +- skb_gro_len(skb), +- wsum)); +- if (sum) +- goto flush; +- +- skb->ip_summed = CHECKSUM_UNNECESSARY; +- break; + } + + skip_csum: +--- a/net/ipv6/tcpv6_offload.c ++++ b/net/ipv6/tcpv6_offload.c +@@ -37,36 +37,29 @@ static struct sk_buff **tcp6_gro_receive + { + const struct ipv6hdr *iph = skb_gro_network_header(skb); + __wsum wsum; +- __sum16 sum; + + /* Don't bother verifying checksum if we're going to flush anyway. */ + if (NAPI_GRO_CB(skb)->flush) + goto skip_csum; + ++ wsum = skb->csum; ++ + switch (skb->ip_summed) { ++ case CHECKSUM_NONE: ++ wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), ++ wsum); ++ ++ /* fall through */ ++ + case CHECKSUM_COMPLETE: + if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr, +- skb->csum)) { ++ wsum)) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + break; + } +-flush: ++ + NAPI_GRO_CB(skb)->flush = 1; + return NULL; +- +- case CHECKSUM_NONE: +- wsum = ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr, +- skb_gro_len(skb), +- IPPROTO_TCP, 0)); +- sum = csum_fold(skb_checksum(skb, +- skb_gro_offset(skb), +- skb_gro_len(skb), +- wsum)); +- if (sum) +- goto flush; +- +- skb->ip_summed = CHECKSUM_UNNECESSARY; +- break; + } + + skip_csum: diff --git a/queue-3.12/gro-only-verify-tcp-checksums-for-candidates.patch b/queue-3.12/gro-only-verify-tcp-checksums-for-candidates.patch new file mode 100644 index 00000000000..b25cd65454a --- /dev/null +++ b/queue-3.12/gro-only-verify-tcp-checksums-for-candidates.patch @@ -0,0 +1,73 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Herbert Xu +Date: Fri, 22 Nov 2013 10:31:29 +0800 +Subject: gro: Only verify TCP checksums for candidates + +From: Herbert Xu + +[ Upstream commit cc5c00bbb44c5d68b883aa5cb9d01514a2525d94 ] + +In some cases we may receive IP packets that are longer than +their stated lengths. Such packets are never merged in GRO. +However, we may end up computing their checksums incorrectly +and end up allowing packets with a bogus checksum enter our +stack with the checksum status set as verified. + +Since such packets are rare and not performance-critical, this +patch simply skips the checksum verification for them. + +Reported-by: Alexander Duyck +Signed-off-by: Herbert Xu +Acked-by: Alexander Duyck + +Thanks, +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_offload.c | 5 +++++ + net/ipv6/tcpv6_offload.c | 5 +++++ + 2 files changed, 10 insertions(+) + +--- a/net/ipv4/tcp_offload.c ++++ b/net/ipv4/tcp_offload.c +@@ -274,6 +274,10 @@ static struct sk_buff **tcp4_gro_receive + __wsum wsum; + __sum16 sum; + ++ /* Don't bother verifying checksum if we're going to flush anyway. */ ++ if (NAPI_GRO_CB(skb)->flush) ++ goto skip_csum; ++ + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, +@@ -299,6 +303,7 @@ flush: + break; + } + ++skip_csum: + return tcp_gro_receive(head, skb); + } + +--- a/net/ipv6/tcpv6_offload.c ++++ b/net/ipv6/tcpv6_offload.c +@@ -39,6 +39,10 @@ static struct sk_buff **tcp6_gro_receive + __wsum wsum; + __sum16 sum; + ++ /* Don't bother verifying checksum if we're going to flush anyway. */ ++ if (NAPI_GRO_CB(skb)->flush) ++ goto skip_csum; ++ + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr, +@@ -65,6 +69,7 @@ flush: + break; + } + ++skip_csum: + return tcp_gro_receive(head, skb); + } + diff --git a/queue-3.12/gso-handle-new-frag_list-of-frags-gro-packets.patch b/queue-3.12/gso-handle-new-frag_list-of-frags-gro-packets.patch new file mode 100644 index 00000000000..976759ad589 --- /dev/null +++ b/queue-3.12/gso-handle-new-frag_list-of-frags-gro-packets.patch @@ -0,0 +1,167 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Herbert Xu +Date: Thu, 21 Nov 2013 11:10:04 -0800 +Subject: gso: handle new frag_list of frags GRO packets + +From: Herbert Xu + +[ Upstream commit 9d8506cc2d7ea1f911c72c100193a3677f6668c3 ] + +Recently GRO started generating packets with frag_lists of frags. +This was not handled by GSO, thus leading to a crash. + +Thankfully these packets are of a regular form and are easy to +handle. This patch handles them in two ways. For completely +non-linear frag_list entries, we simply continue to iterate over +the frag_list frags once we exhaust the normal frags. For frag_list +entries with linear parts, we call pskb_trim on the first part +of the frag_list skb, and then process the rest of the frags in +the usual way. + +This patch also kills a chunk of dead frag_list code that has +obviously never ever been run since it ends up generating a bogus +GSO-segmented packet with a frag_list entry. + +Future work is planned to split super big packets into TSO +ones. + +Fixes: 8a29111c7ca6 ("net: gro: allow to build full sized skb") +Reported-by: Christoph Paasch +Reported-by: Jerry Chu +Reported-by: Sander Eikelenboom +Signed-off-by: Herbert Xu +Signed-off-by: Eric Dumazet +Tested-by: Sander Eikelenboom +Tested-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/skbuff.c | 75 ++++++++++++++++++++++++++++++++++++------------------ + 1 file changed, 50 insertions(+), 25 deletions(-) + +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -2755,6 +2755,7 @@ struct sk_buff *skb_segment(struct sk_bu + struct sk_buff *segs = NULL; + struct sk_buff *tail = NULL; + struct sk_buff *fskb = skb_shinfo(skb)->frag_list; ++ skb_frag_t *skb_frag = skb_shinfo(skb)->frags; + unsigned int mss = skb_shinfo(skb)->gso_size; + unsigned int doffset = skb->data - skb_mac_header(skb); + unsigned int offset = doffset; +@@ -2794,16 +2795,38 @@ struct sk_buff *skb_segment(struct sk_bu + if (hsize > len || !sg) + hsize = len; + +- if (!hsize && i >= nfrags) { +- BUG_ON(fskb->len != len); ++ if (!hsize && i >= nfrags && skb_headlen(fskb) && ++ (skb_headlen(fskb) == len || sg)) { ++ BUG_ON(skb_headlen(fskb) > len); ++ ++ i = 0; ++ nfrags = skb_shinfo(fskb)->nr_frags; ++ skb_frag = skb_shinfo(fskb)->frags; ++ pos += skb_headlen(fskb); ++ ++ while (pos < offset + len) { ++ BUG_ON(i >= nfrags); ++ ++ size = skb_frag_size(skb_frag); ++ if (pos + size > offset + len) ++ break; ++ ++ i++; ++ pos += size; ++ skb_frag++; ++ } + +- pos += len; + nskb = skb_clone(fskb, GFP_ATOMIC); + fskb = fskb->next; + + if (unlikely(!nskb)) + goto err; + ++ if (unlikely(pskb_trim(nskb, len))) { ++ kfree_skb(nskb); ++ goto err; ++ } ++ + hsize = skb_end_offset(nskb); + if (skb_cow_head(nskb, doffset + headroom)) { + kfree_skb(nskb); +@@ -2847,7 +2870,7 @@ struct sk_buff *skb_segment(struct sk_bu + nskb->data - tnl_hlen, + doffset + tnl_hlen); + +- if (fskb != skb_shinfo(skb)->frag_list) ++ if (nskb->len == len + doffset) + goto perform_csum_check; + + if (!sg) { +@@ -2865,8 +2888,28 @@ struct sk_buff *skb_segment(struct sk_bu + + skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; + +- while (pos < offset + len && i < nfrags) { +- *frag = skb_shinfo(skb)->frags[i]; ++ while (pos < offset + len) { ++ if (i >= nfrags) { ++ BUG_ON(skb_headlen(fskb)); ++ ++ i = 0; ++ nfrags = skb_shinfo(fskb)->nr_frags; ++ skb_frag = skb_shinfo(fskb)->frags; ++ ++ BUG_ON(!nfrags); ++ ++ fskb = fskb->next; ++ } ++ ++ if (unlikely(skb_shinfo(nskb)->nr_frags >= ++ MAX_SKB_FRAGS)) { ++ net_warn_ratelimited( ++ "skb_segment: too many frags: %u %u\n", ++ pos, mss); ++ goto err; ++ } ++ ++ *frag = *skb_frag; + __skb_frag_ref(frag); + size = skb_frag_size(frag); + +@@ -2879,6 +2922,7 @@ struct sk_buff *skb_segment(struct sk_bu + + if (pos + size <= offset + len) { + i++; ++ skb_frag++; + pos += size; + } else { + skb_frag_size_sub(frag, pos + size - (offset + len)); +@@ -2888,25 +2932,6 @@ struct sk_buff *skb_segment(struct sk_bu + frag++; + } + +- if (pos < offset + len) { +- struct sk_buff *fskb2 = fskb; +- +- BUG_ON(pos + fskb->len != offset + len); +- +- pos += fskb->len; +- fskb = fskb->next; +- +- if (fskb2->next) { +- fskb2 = skb_clone(fskb2, GFP_ATOMIC); +- if (!fskb2) +- goto err; +- } else +- skb_get(fskb2); +- +- SKB_FRAG_ASSERT(nskb); +- skb_shinfo(nskb)->frag_list = fskb2; +- } +- + skip_fraglist: + nskb->data_len = len - hsize; + nskb->len += nskb->data_len; diff --git a/queue-3.12/inet-fix-addr_len-msg-msg_namelen-assignment-in-recv_error-and-rxpmtu-functions.patch b/queue-3.12/inet-fix-addr_len-msg-msg_namelen-assignment-in-recv_error-and-rxpmtu-functions.patch new file mode 100644 index 00000000000..45af2a5f618 --- /dev/null +++ b/queue-3.12/inet-fix-addr_len-msg-msg_namelen-assignment-in-recv_error-and-rxpmtu-functions.patch @@ -0,0 +1,229 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Hannes Frederic Sowa +Date: Sat, 23 Nov 2013 00:46:12 +0100 +Subject: inet: fix addr_len/msg->msg_namelen assignment in recv_error and rxpmtu functions + +From: Hannes Frederic Sowa + +[ Upstream commit 85fbaa75037d0b6b786ff18658ddf0b4014ce2a4 ] + +Commit bceaa90240b6019ed73b49965eac7d167610be69 ("inet: prevent leakage +of uninitialized memory to user in recv syscalls") conditionally updated +addr_len if the msg_name is written to. The recv_error and rxpmtu +functions relied on the recvmsg functions to set up addr_len before. + +As this does not happen any more we have to pass addr_len to those +functions as well and set it to the size of the corresponding sockaddr +length. + +This broke traceroute and such. + +Fixes: bceaa90240b6 ("inet: prevent leakage of uninitialized memory to user in recv syscalls") +Reported-by: Brad Spengler +Reported-by: Tom Labanowski +Cc: mpb +Cc: David S. Miller +Cc: Eric Dumazet +Signed-off-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/ip.h | 2 +- + include/net/ipv6.h | 6 ++++-- + include/net/ping.h | 3 ++- + net/ipv4/ip_sockglue.c | 3 ++- + net/ipv4/ping.c | 5 +++-- + net/ipv4/raw.c | 2 +- + net/ipv4/udp.c | 2 +- + net/ipv6/datagram.c | 7 +++++-- + net/ipv6/ping.c | 3 ++- + net/ipv6/raw.c | 4 ++-- + net/ipv6/udp.c | 4 ++-- + net/l2tp/l2tp_ip6.c | 2 +- + 12 files changed, 26 insertions(+), 17 deletions(-) + +--- a/include/net/ip.h ++++ b/include/net/ip.h +@@ -464,7 +464,7 @@ extern int compat_ip_getsockopt(struct s + int optname, char __user *optval, int __user *optlen); + extern int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct sock *)); + +-extern int ip_recv_error(struct sock *sk, struct msghdr *msg, int len); ++extern int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len); + extern void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, + __be16 port, u32 info, u8 *payload); + extern void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport, +--- a/include/net/ipv6.h ++++ b/include/net/ipv6.h +@@ -802,8 +802,10 @@ extern int compat_ipv6_getsockopt(stru + extern int ip6_datagram_connect(struct sock *sk, + struct sockaddr *addr, int addr_len); + +-extern int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len); +-extern int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len); ++extern int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, ++ int *addr_len); ++extern int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len, ++ int *addr_len); + extern void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, + u32 info, u8 *payload); + extern void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info); +--- a/include/net/ping.h ++++ b/include/net/ping.h +@@ -31,7 +31,8 @@ + + /* Compatibility glue so we can support IPv6 when it's compiled as a module */ + struct pingv6_ops { +- int (*ipv6_recv_error)(struct sock *sk, struct msghdr *msg, int len); ++ int (*ipv6_recv_error)(struct sock *sk, struct msghdr *msg, int len, ++ int *addr_len); + int (*ip6_datagram_recv_ctl)(struct sock *sk, struct msghdr *msg, + struct sk_buff *skb); + int (*icmpv6_err_convert)(u8 type, u8 code, int *err); +--- a/net/ipv4/ip_sockglue.c ++++ b/net/ipv4/ip_sockglue.c +@@ -368,7 +368,7 @@ void ip_local_error(struct sock *sk, int + /* + * Handle MSG_ERRQUEUE + */ +-int ip_recv_error(struct sock *sk, struct msghdr *msg, int len) ++int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) + { + struct sock_exterr_skb *serr; + struct sk_buff *skb, *skb2; +@@ -405,6 +405,7 @@ int ip_recv_error(struct sock *sk, struc + serr->addr_offset); + sin->sin_port = serr->port; + memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); ++ *addr_len = sizeof(*sin); + } + + memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); +--- a/net/ipv4/ping.c ++++ b/net/ipv4/ping.c +@@ -838,10 +838,11 @@ int ping_recvmsg(struct kiocb *iocb, str + + if (flags & MSG_ERRQUEUE) { + if (family == AF_INET) { +- return ip_recv_error(sk, msg, len); ++ return ip_recv_error(sk, msg, len, addr_len); + #if IS_ENABLED(CONFIG_IPV6) + } else if (family == AF_INET6) { +- return pingv6_ops.ipv6_recv_error(sk, msg, len); ++ return pingv6_ops.ipv6_recv_error(sk, msg, len, ++ addr_len); + #endif + } + } +--- a/net/ipv4/raw.c ++++ b/net/ipv4/raw.c +@@ -695,7 +695,7 @@ static int raw_recvmsg(struct kiocb *ioc + goto out; + + if (flags & MSG_ERRQUEUE) { +- err = ip_recv_error(sk, msg, len); ++ err = ip_recv_error(sk, msg, len, addr_len); + goto out; + } + +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -1210,7 +1210,7 @@ int udp_recvmsg(struct kiocb *iocb, stru + bool slow; + + if (flags & MSG_ERRQUEUE) +- return ip_recv_error(sk, msg, len); ++ return ip_recv_error(sk, msg, len, addr_len); + + try_again: + skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), +--- a/net/ipv6/datagram.c ++++ b/net/ipv6/datagram.c +@@ -318,7 +318,7 @@ void ipv6_local_rxpmtu(struct sock *sk, + /* + * Handle MSG_ERRQUEUE + */ +-int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) ++int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) + { + struct ipv6_pinfo *np = inet6_sk(sk); + struct sock_exterr_skb *serr; +@@ -369,6 +369,7 @@ int ipv6_recv_error(struct sock *sk, str + &sin->sin6_addr); + sin->sin6_scope_id = 0; + } ++ *addr_len = sizeof(*sin); + } + + memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); +@@ -423,7 +424,8 @@ EXPORT_SYMBOL_GPL(ipv6_recv_error); + /* + * Handle IPV6_RECVPATHMTU + */ +-int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len) ++int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len, ++ int *addr_len) + { + struct ipv6_pinfo *np = inet6_sk(sk); + struct sk_buff *skb; +@@ -457,6 +459,7 @@ int ipv6_recv_rxpmtu(struct sock *sk, st + sin->sin6_port = 0; + sin->sin6_scope_id = mtu_info.ip6m_addr.sin6_scope_id; + sin->sin6_addr = mtu_info.ip6m_addr.sin6_addr; ++ *addr_len = sizeof(*sin); + } + + put_cmsg(msg, SOL_IPV6, IPV6_PATHMTU, sizeof(mtu_info), &mtu_info); +--- a/net/ipv6/ping.c ++++ b/net/ipv6/ping.c +@@ -57,7 +57,8 @@ static struct inet_protosw pingv6_protos + + + /* Compatibility glue so we can support IPv6 when it's compiled as a module */ +-static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) ++static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, ++ int *addr_len) + { + return -EAFNOSUPPORT; + } +--- a/net/ipv6/raw.c ++++ b/net/ipv6/raw.c +@@ -467,10 +467,10 @@ static int rawv6_recvmsg(struct kiocb *i + return -EOPNOTSUPP; + + if (flags & MSG_ERRQUEUE) +- return ipv6_recv_error(sk, msg, len); ++ return ipv6_recv_error(sk, msg, len, addr_len); + + if (np->rxpmtu && np->rxopt.bits.rxpmtu) +- return ipv6_recv_rxpmtu(sk, msg, len); ++ return ipv6_recv_rxpmtu(sk, msg, len, addr_len); + + skb = skb_recv_datagram(sk, flags, noblock, &err); + if (!skb) +--- a/net/ipv6/udp.c ++++ b/net/ipv6/udp.c +@@ -375,10 +375,10 @@ int udpv6_recvmsg(struct kiocb *iocb, st + bool slow; + + if (flags & MSG_ERRQUEUE) +- return ipv6_recv_error(sk, msg, len); ++ return ipv6_recv_error(sk, msg, len, addr_len); + + if (np->rxpmtu && np->rxopt.bits.rxpmtu) +- return ipv6_recv_rxpmtu(sk, msg, len); ++ return ipv6_recv_rxpmtu(sk, msg, len, addr_len); + + try_again: + skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), +--- a/net/l2tp/l2tp_ip6.c ++++ b/net/l2tp/l2tp_ip6.c +@@ -665,7 +665,7 @@ static int l2tp_ip6_recvmsg(struct kiocb + *addr_len = sizeof(*lsa); + + if (flags & MSG_ERRQUEUE) +- return ipv6_recv_error(sk, msg, len); ++ return ipv6_recv_error(sk, msg, len, addr_len); + + skb = skb_recv_datagram(sk, flags, noblock, &err); + if (!skb) diff --git a/queue-3.12/inet-fix-possible-seqlock-deadlocks.patch b/queue-3.12/inet-fix-possible-seqlock-deadlocks.patch new file mode 100644 index 00000000000..d28c5c79e02 --- /dev/null +++ b/queue-3.12/inet-fix-possible-seqlock-deadlocks.patch @@ -0,0 +1,64 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Eric Dumazet +Date: Thu, 28 Nov 2013 09:51:22 -0800 +Subject: inet: fix possible seqlock deadlocks + +From: Eric Dumazet + +[ Upstream commit f1d8cba61c3c4b1eb88e507249c4cb8d635d9a76 ] + +In commit c9e9042994d3 ("ipv4: fix possible seqlock deadlock") I left +another places where IP_INC_STATS_BH() were improperly used. + +udp_sendmsg(), ping_v4_sendmsg() and tcp_v4_connect() are called from +process context, not from softirq context. + +This was detected by lockdep seqlock support. + +Reported-by: jongman heo +Fixes: 584bdf8cbdf6 ("[IPV4]: Fix "ipOutNoRoutes" counter error for TCP and UDP") +Fixes: c319b4d76b9e ("net: ipv4: add IPPROTO_ICMP socket kind") +Signed-off-by: Eric Dumazet +Cc: Hannes Frederic Sowa +Acked-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ping.c | 2 +- + net/ipv4/tcp_ipv4.c | 2 +- + net/ipv4/udp.c | 2 +- + 3 files changed, 3 insertions(+), 3 deletions(-) + +--- a/net/ipv4/ping.c ++++ b/net/ipv4/ping.c +@@ -769,7 +769,7 @@ int ping_v4_sendmsg(struct kiocb *iocb, + err = PTR_ERR(rt); + rt = NULL; + if (err == -ENETUNREACH) +- IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); ++ IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); + goto out; + } + +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -177,7 +177,7 @@ int tcp_v4_connect(struct sock *sk, stru + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + if (err == -ENETUNREACH) +- IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); ++ IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); + return err; + } + +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -973,7 +973,7 @@ int udp_sendmsg(struct kiocb *iocb, stru + err = PTR_ERR(rt); + rt = NULL; + if (err == -ENETUNREACH) +- IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); ++ IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); + goto out; + } + diff --git a/queue-3.12/inet-prevent-leakage-of-uninitialized-memory-to-user-in-recv-syscalls.patch b/queue-3.12/inet-prevent-leakage-of-uninitialized-memory-to-user-in-recv-syscalls.patch new file mode 100644 index 00000000000..a415ccfb144 --- /dev/null +++ b/queue-3.12/inet-prevent-leakage-of-uninitialized-memory-to-user-in-recv-syscalls.patch @@ -0,0 +1,238 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Hannes Frederic Sowa +Date: Mon, 18 Nov 2013 04:20:45 +0100 +Subject: inet: prevent leakage of uninitialized memory to user in recv syscalls + +From: Hannes Frederic Sowa + +[ Upstream commit bceaa90240b6019ed73b49965eac7d167610be69 ] + +Only update *addr_len when we actually fill in sockaddr, otherwise we +can return uninitialized memory from the stack to the caller in the +recvfrom, recvmmsg and recvmsg syscalls. Drop the the (addr_len == NULL) +checks because we only get called with a valid addr_len pointer either +from sock_common_recvmsg or inet_recvmsg. + +If a blocking read waits on a socket which is concurrently shut down we +now return zero and set msg_msgnamelen to 0. + +Reported-by: mpb +Suggested-by: Eric Dumazet +Signed-off-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ieee802154/dgram.c | 3 +-- + net/ipv4/ping.c | 19 +++++++------------ + net/ipv4/raw.c | 4 +--- + net/ipv4/udp.c | 7 +------ + net/ipv6/raw.c | 4 +--- + net/ipv6/udp.c | 5 +---- + net/l2tp/l2tp_ip.c | 4 +--- + net/phonet/datagram.c | 9 ++++----- + 8 files changed, 17 insertions(+), 38 deletions(-) + +--- a/net/ieee802154/dgram.c ++++ b/net/ieee802154/dgram.c +@@ -315,9 +315,8 @@ static int dgram_recvmsg(struct kiocb *i + if (saddr) { + saddr->family = AF_IEEE802154; + saddr->addr = mac_cb(skb)->sa; +- } +- if (addr_len) + *addr_len = sizeof(*saddr); ++ } + + if (flags & MSG_TRUNC) + copied = skb->len; +--- a/net/ipv4/ping.c ++++ b/net/ipv4/ping.c +@@ -827,8 +827,6 @@ int ping_recvmsg(struct kiocb *iocb, str + { + struct inet_sock *isk = inet_sk(sk); + int family = sk->sk_family; +- struct sockaddr_in *sin; +- struct sockaddr_in6 *sin6; + struct sk_buff *skb; + int copied, err; + +@@ -838,13 +836,6 @@ int ping_recvmsg(struct kiocb *iocb, str + if (flags & MSG_OOB) + goto out; + +- if (addr_len) { +- if (family == AF_INET) +- *addr_len = sizeof(*sin); +- else if (family == AF_INET6 && addr_len) +- *addr_len = sizeof(*sin6); +- } +- + if (flags & MSG_ERRQUEUE) { + if (family == AF_INET) { + return ip_recv_error(sk, msg, len); +@@ -874,11 +865,13 @@ int ping_recvmsg(struct kiocb *iocb, str + + /* Copy the address and add cmsg data. */ + if (family == AF_INET) { +- sin = (struct sockaddr_in *) msg->msg_name; ++ struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; ++ + sin->sin_family = AF_INET; + sin->sin_port = 0 /* skb->h.uh->source */; + sin->sin_addr.s_addr = ip_hdr(skb)->saddr; + memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); ++ *addr_len = sizeof(*sin); + + if (isk->cmsg_flags) + ip_cmsg_recv(msg, skb); +@@ -887,17 +880,19 @@ int ping_recvmsg(struct kiocb *iocb, str + } else if (family == AF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6hdr *ip6 = ipv6_hdr(skb); +- sin6 = (struct sockaddr_in6 *) msg->msg_name; ++ struct sockaddr_in6 *sin6 = ++ (struct sockaddr_in6 *)msg->msg_name; ++ + sin6->sin6_family = AF_INET6; + sin6->sin6_port = 0; + sin6->sin6_addr = ip6->saddr; +- + sin6->sin6_flowinfo = 0; + if (np->sndflow) + sin6->sin6_flowinfo = ip6_flowinfo(ip6); + + sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, + IP6CB(skb)->iif); ++ *addr_len = sizeof(*sin6); + + if (inet6_sk(sk)->rxopt.all) + pingv6_ops.ip6_datagram_recv_ctl(sk, msg, skb); +--- a/net/ipv4/raw.c ++++ b/net/ipv4/raw.c +@@ -694,9 +694,6 @@ static int raw_recvmsg(struct kiocb *ioc + if (flags & MSG_OOB) + goto out; + +- if (addr_len) +- *addr_len = sizeof(*sin); +- + if (flags & MSG_ERRQUEUE) { + err = ip_recv_error(sk, msg, len); + goto out; +@@ -724,6 +721,7 @@ static int raw_recvmsg(struct kiocb *ioc + sin->sin_addr.s_addr = ip_hdr(skb)->saddr; + sin->sin_port = 0; + memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); ++ *addr_len = sizeof(*sin); + } + if (inet->cmsg_flags) + ip_cmsg_recv(msg, skb); +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -1209,12 +1209,6 @@ int udp_recvmsg(struct kiocb *iocb, stru + int is_udplite = IS_UDPLITE(sk); + bool slow; + +- /* +- * Check any passed addresses +- */ +- if (addr_len) +- *addr_len = sizeof(*sin); +- + if (flags & MSG_ERRQUEUE) + return ip_recv_error(sk, msg, len); + +@@ -1276,6 +1270,7 @@ try_again: + sin->sin_port = udp_hdr(skb)->source; + sin->sin_addr.s_addr = ip_hdr(skb)->saddr; + memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); ++ *addr_len = sizeof(*sin); + } + if (inet->cmsg_flags) + ip_cmsg_recv(msg, skb); +--- a/net/ipv6/raw.c ++++ b/net/ipv6/raw.c +@@ -466,9 +466,6 @@ static int rawv6_recvmsg(struct kiocb *i + if (flags & MSG_OOB) + return -EOPNOTSUPP; + +- if (addr_len) +- *addr_len=sizeof(*sin6); +- + if (flags & MSG_ERRQUEUE) + return ipv6_recv_error(sk, msg, len); + +@@ -507,6 +504,7 @@ static int rawv6_recvmsg(struct kiocb *i + sin6->sin6_flowinfo = 0; + sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, + IP6CB(skb)->iif); ++ *addr_len = sizeof(*sin6); + } + + sock_recv_ts_and_drops(msg, sk, skb); +--- a/net/ipv6/udp.c ++++ b/net/ipv6/udp.c +@@ -374,9 +374,6 @@ int udpv6_recvmsg(struct kiocb *iocb, st + int is_udp4; + bool slow; + +- if (addr_len) +- *addr_len = sizeof(struct sockaddr_in6); +- + if (flags & MSG_ERRQUEUE) + return ipv6_recv_error(sk, msg, len); + +@@ -462,7 +459,7 @@ try_again: + ipv6_iface_scope_id(&sin6->sin6_addr, + IP6CB(skb)->iif); + } +- ++ *addr_len = sizeof(*sin6); + } + if (is_udp4) { + if (inet->cmsg_flags) +--- a/net/l2tp/l2tp_ip.c ++++ b/net/l2tp/l2tp_ip.c +@@ -518,9 +518,6 @@ static int l2tp_ip_recvmsg(struct kiocb + if (flags & MSG_OOB) + goto out; + +- if (addr_len) +- *addr_len = sizeof(*sin); +- + skb = skb_recv_datagram(sk, flags, noblock, &err); + if (!skb) + goto out; +@@ -543,6 +540,7 @@ static int l2tp_ip_recvmsg(struct kiocb + sin->sin_addr.s_addr = ip_hdr(skb)->saddr; + sin->sin_port = 0; + memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); ++ *addr_len = sizeof(*sin); + } + if (inet->cmsg_flags) + ip_cmsg_recv(msg, skb); +--- a/net/phonet/datagram.c ++++ b/net/phonet/datagram.c +@@ -139,9 +139,6 @@ static int pn_recvmsg(struct kiocb *iocb + MSG_CMSG_COMPAT)) + goto out_nofree; + +- if (addr_len) +- *addr_len = sizeof(sa); +- + skb = skb_recv_datagram(sk, flags, noblock, &rval); + if (skb == NULL) + goto out_nofree; +@@ -162,8 +159,10 @@ static int pn_recvmsg(struct kiocb *iocb + + rval = (flags & MSG_TRUNC) ? skb->len : copylen; + +- if (msg->msg_name != NULL) +- memcpy(msg->msg_name, &sa, sizeof(struct sockaddr_pn)); ++ if (msg->msg_name != NULL) { ++ memcpy(msg->msg_name, &sa, sizeof(sa)); ++ *addr_len = sizeof(sa); ++ } + + out: + skb_free_datagram(sk, skb); diff --git a/queue-3.12/ip6_output-fragment-outgoing-reassembled-skb-properly.patch b/queue-3.12/ip6_output-fragment-outgoing-reassembled-skb-properly.patch new file mode 100644 index 00000000000..252057eaae5 --- /dev/null +++ b/queue-3.12/ip6_output-fragment-outgoing-reassembled-skb-properly.patch @@ -0,0 +1,36 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Jiri Pirko +Date: Wed, 6 Nov 2013 17:52:19 +0100 +Subject: ip6_output: fragment outgoing reassembled skb properly + +From: Jiri Pirko + +[ Upstream commit 9037c3579a277f3a23ba476664629fda8c35f7c4 ] + +If reassembled packet would fit into outdev MTU, it is not fragmented +according the original frag size and it is send as single big packet. + +The second case is if skb is gso. In that case fragmentation does not happen +according to the original frag size. + +This patch fixes these. + +Signed-off-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_output.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -125,7 +125,8 @@ static int ip6_finish_output2(struct sk_ + static int ip6_finish_output(struct sk_buff *skb) + { + if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || +- dst_allfrag(skb_dst(skb))) ++ dst_allfrag(skb_dst(skb)) || ++ (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) + return ip6_fragment(skb, ip6_finish_output2); + else + return ip6_finish_output2(skb); diff --git a/queue-3.12/ip6tnl-fix-use-after-free-of-fb_tnl_dev.patch b/queue-3.12/ip6tnl-fix-use-after-free-of-fb_tnl_dev.patch new file mode 100644 index 00000000000..3a9c053a752 --- /dev/null +++ b/queue-3.12/ip6tnl-fix-use-after-free-of-fb_tnl_dev.patch @@ -0,0 +1,82 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Nicolas Dichtel +Date: Thu, 14 Nov 2013 15:47:03 +0100 +Subject: ip6tnl: fix use after free of fb_tnl_dev + +From: Nicolas Dichtel + +[ Upstream commit 1e9f3d6f1c403dd2b6270f654b4747147aa2306f ] + +Bug has been introduced by commit bb8140947a24 ("ip6tnl: allow to use rtnl ops +on fb tunnel"). + +When ip6_tunnel.ko is unloaded, FB device is delete by rtnl_link_unregister() +and then we try to use the pointer in ip6_tnl_destroy_tunnels(). + +Let's add an handler for dellink, which will never remove the FB tunnel. With +this patch it will no more be possible to remove it via 'ip link del ip6tnl0', +but it's safer. + +The same fix was already proposed by Willem de Bruijn for +sit interfaces. + +CC: Willem de Bruijn +Reported-by: Steven Rostedt +Signed-off-by: Nicolas Dichtel +Acked-by: Willem de Bruijn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_tunnel.c | 18 +++++++++++++----- + 1 file changed, 13 insertions(+), 5 deletions(-) + +--- a/net/ipv6/ip6_tunnel.c ++++ b/net/ipv6/ip6_tunnel.c +@@ -1635,6 +1635,15 @@ static int ip6_tnl_changelink(struct net + return ip6_tnl_update(t, &p); + } + ++static void ip6_tnl_dellink(struct net_device *dev, struct list_head *head) ++{ ++ struct net *net = dev_net(dev); ++ struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); ++ ++ if (dev != ip6n->fb_tnl_dev) ++ unregister_netdevice_queue(dev, head); ++} ++ + static size_t ip6_tnl_get_size(const struct net_device *dev) + { + return +@@ -1699,6 +1708,7 @@ static struct rtnl_link_ops ip6_link_ops + .validate = ip6_tnl_validate, + .newlink = ip6_tnl_newlink, + .changelink = ip6_tnl_changelink, ++ .dellink = ip6_tnl_dellink, + .get_size = ip6_tnl_get_size, + .fill_info = ip6_tnl_fill_info, + }; +@@ -1715,9 +1725,9 @@ static struct xfrm6_tunnel ip6ip6_handle + .priority = 1, + }; + +-static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n) ++static void __net_exit ip6_tnl_destroy_tunnels(struct net *net) + { +- struct net *net = dev_net(ip6n->fb_tnl_dev); ++ struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + struct net_device *dev, *aux; + int h; + struct ip6_tnl *t; +@@ -1785,10 +1795,8 @@ err_alloc_dev: + + static void __net_exit ip6_tnl_exit_net(struct net *net) + { +- struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); +- + rtnl_lock(); +- ip6_tnl_destroy_tunnels(ip6n); ++ ip6_tnl_destroy_tunnels(net); + rtnl_unlock(); + } + diff --git a/queue-3.12/ipv4-fix-possible-seqlock-deadlock.patch b/queue-3.12/ipv4-fix-possible-seqlock-deadlock.patch new file mode 100644 index 00000000000..e3241ea1789 --- /dev/null +++ b/queue-3.12/ipv4-fix-possible-seqlock-deadlock.patch @@ -0,0 +1,34 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Eric Dumazet +Date: Thu, 14 Nov 2013 13:37:54 -0800 +Subject: ipv4: fix possible seqlock deadlock + +From: Eric Dumazet + +[ Upstream commit c9e9042994d37cbc1ee538c500e9da1bb9d1bcdf ] + +ip4_datagram_connect() being called from process context, +it should use IP_INC_STATS() instead of IP_INC_STATS_BH() +otherwise we can deadlock on 32bit arches, or get corruptions of +SNMP counters. + +Fixes: 584bdf8cbdf6 ("[IPV4]: Fix "ipOutNoRoutes" counter error for TCP and UDP") +Signed-off-by: Eric Dumazet +Reported-by: Dave Jones +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/datagram.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/datagram.c ++++ b/net/ipv4/datagram.c +@@ -57,7 +57,7 @@ int ip4_datagram_connect(struct sock *sk + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + if (err == -ENETUNREACH) +- IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); ++ IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); + goto out; + } + diff --git a/queue-3.12/ipv4-fix-race-in-concurrent-ip_route_input_slow.patch b/queue-3.12/ipv4-fix-race-in-concurrent-ip_route_input_slow.patch new file mode 100644 index 00000000000..934425f51e9 --- /dev/null +++ b/queue-3.12/ipv4-fix-race-in-concurrent-ip_route_input_slow.patch @@ -0,0 +1,49 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Alexei Starovoitov +Date: Tue, 19 Nov 2013 19:12:34 -0800 +Subject: ipv4: fix race in concurrent ip_route_input_slow() + +From: Alexei Starovoitov + +[ Upstream commit dcdfdf56b4a6c9437fc37dbc9cee94a788f9b0c4 ] + +CPUs can ask for local route via ip_route_input_noref() concurrently. +if nh_rth_input is not cached yet, CPUs will proceed to allocate +equivalent DSTs on 'lo' and then will try to cache them in nh_rth_input +via rt_cache_route() +Most of the time they succeed, but on occasion the following two lines: + orig = *p; + prev = cmpxchg(p, orig, rt); +in rt_cache_route() do race and one of the cpus fails to complete cmpxchg. +But ip_route_input_slow() doesn't check the return code of rt_cache_route(), +so dst is leaking. dst_destroy() is never called and 'lo' device +refcnt doesn't go to zero, which can be seen in the logs as: + unregister_netdevice: waiting for lo to become free. Usage count = 1 +Adding mdelay() between above two lines makes it easily reproducible. +Fix it similar to nh_pcpu_rth_output case. + +Fixes: d2d68ba9fe8b ("ipv4: Cache input routes in fib_info nexthops.") +Signed-off-by: Alexei Starovoitov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/route.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -1772,8 +1772,12 @@ local_input: + rth->dst.error= -err; + rth->rt_flags &= ~RTCF_LOCAL; + } +- if (do_cache) +- rt_cache_route(&FIB_RES_NH(res), rth); ++ if (do_cache) { ++ if (unlikely(!rt_cache_route(&FIB_RES_NH(res), rth))) { ++ rth->dst.flags |= DST_NOCACHE; ++ rt_add_uncached_list(rth); ++ } ++ } + skb_dst_set(skb, &rth->dst); + err = 0; + goto out; diff --git a/queue-3.12/ipv6-fix-headroom-calculation-in-udp6_ufo_fragment.patch b/queue-3.12/ipv6-fix-headroom-calculation-in-udp6_ufo_fragment.patch new file mode 100644 index 00000000000..4066c576623 --- /dev/null +++ b/queue-3.12/ipv6-fix-headroom-calculation-in-udp6_ufo_fragment.patch @@ -0,0 +1,40 @@ +From foo@baz Thu Dec 5 16:16:37 PST 2013 +From: Hannes Frederic Sowa +Date: Tue, 5 Nov 2013 02:41:27 +0100 +Subject: ipv6: fix headroom calculation in udp6_ufo_fragment + +From: Hannes Frederic Sowa + +[ Upstream commit 0e033e04c2678dbbe74a46b23fffb7bb918c288e ] + +Commit 1e2bd517c108816220f262d7954b697af03b5f9c ("udp6: Fix udp +fragmentation for tunnel traffic.") changed the calculation if +there is enough space to include a fragment header in the skb from a +skb->mac_header dervived one to skb_headroom. Because we already peeled +off the skb to transport_header this is wrong. Change this back to check +if we have enough room before the mac_header. + +This fixes a panic Saran Neti reported. He used the tbf scheduler which +skb_gso_segments the skb. The offsets get negative and we panic in memcpy +because the skb was erroneously not expanded at the head. + +Reported-by: Saran Neti +Cc: Pravin B Shelar +Signed-off-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/udp_offload.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv6/udp_offload.c ++++ b/net/ipv6/udp_offload.c +@@ -88,7 +88,7 @@ static struct sk_buff *udp6_ufo_fragment + + /* Check if there is enough headroom to insert fragment header. */ + tnl_hlen = skb_tnl_header_len(skb); +- if (skb_headroom(skb) < (tnl_hlen + frag_hdr_sz)) { ++ if (skb->mac_header < (tnl_hlen + frag_hdr_sz)) { + if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz)) + goto out; + } diff --git a/queue-3.12/ipv6-fix-inet6_init-cleanup-order.patch b/queue-3.12/ipv6-fix-inet6_init-cleanup-order.patch new file mode 100644 index 00000000000..aebf140a4eb --- /dev/null +++ b/queue-3.12/ipv6-fix-inet6_init-cleanup-order.patch @@ -0,0 +1,40 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Vlad Yasevich +Date: Sat, 16 Nov 2013 15:17:24 -0500 +Subject: ipv6: Fix inet6_init() cleanup order + +From: Vlad Yasevich + +Commit 6d0bfe22611602f36617bc7aa2ffa1bbb2f54c67 + net: ipv6: Add IPv6 support to the ping socket + +introduced a change in the cleanup logic of inet6_init and +has a bug in that ipv6_packet_cleanup() may not be called. +Fix the cleanup ordering. + +CC: Hannes Frederic Sowa +CC: Lorenzo Colitti +CC: Fabio Estevam +Signed-off-by: Vlad Yasevich +Acked-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/af_inet6.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/ipv6/af_inet6.c ++++ b/net/ipv6/af_inet6.c +@@ -965,10 +965,10 @@ out: + + #ifdef CONFIG_SYSCTL + sysctl_fail: +- ipv6_packet_cleanup(); ++ pingv6_exit(); + #endif + pingv6_fail: +- pingv6_exit(); ++ ipv6_packet_cleanup(); + ipv6_packet_fail: + tcpv6_exit(); + tcpv6_fail: diff --git a/queue-3.12/ipv6-fix-leaking-uninitialized-port-number-of-offender-sockaddr.patch b/queue-3.12/ipv6-fix-leaking-uninitialized-port-number-of-offender-sockaddr.patch new file mode 100644 index 00000000000..0f1fa849ed9 --- /dev/null +++ b/queue-3.12/ipv6-fix-leaking-uninitialized-port-number-of-offender-sockaddr.patch @@ -0,0 +1,28 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Hannes Frederic Sowa +Date: Sat, 23 Nov 2013 07:22:33 +0100 +Subject: ipv6: fix leaking uninitialized port number of offender sockaddr + +From: Hannes Frederic Sowa + +[ Upstream commit 1fa4c710b6fe7b0aac9907240291b6fe6aafc3b8 ] + +Offenders don't have port numbers, so set it to 0. + +Signed-off-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/datagram.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ipv6/datagram.c ++++ b/net/ipv6/datagram.c +@@ -378,6 +378,7 @@ int ipv6_recv_error(struct sock *sk, str + if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) { + sin->sin6_family = AF_INET6; + sin->sin6_flowinfo = 0; ++ sin->sin6_port = 0; + if (skb->protocol == htons(ETH_P_IPV6)) { + sin->sin6_addr = ipv6_hdr(skb)->saddr; + if (np->rxopt.all) diff --git a/queue-3.12/ipv6-fix-possible-seqlock-deadlock-in-ip6_finish_output2.patch b/queue-3.12/ipv6-fix-possible-seqlock-deadlock-in-ip6_finish_output2.patch new file mode 100644 index 00000000000..c090dd356dd --- /dev/null +++ b/queue-3.12/ipv6-fix-possible-seqlock-deadlock-in-ip6_finish_output2.patch @@ -0,0 +1,35 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Hannes Frederic Sowa +Date: Fri, 29 Nov 2013 06:39:44 +0100 +Subject: ipv6: fix possible seqlock deadlock in ip6_finish_output2 + +From: Hannes Frederic Sowa + +[ Upstream commit 7f88c6b23afbd31545c676dea77ba9593a1a14bf ] + +IPv6 stats are 64 bits and thus are protected with a seqlock. By not +disabling bottom-half we could deadlock here if we don't disable bh and +a softirq reentrantly updates the same mib. + +Cc: Eric Dumazet +Signed-off-by: Hannes Frederic Sowa +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_output.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -116,8 +116,8 @@ static int ip6_finish_output2(struct sk_ + } + rcu_read_unlock_bh(); + +- IP6_INC_STATS_BH(dev_net(dst->dev), +- ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); ++ IP6_INC_STATS(dev_net(dst->dev), ++ ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); + kfree_skb(skb); + return -EINVAL; + } diff --git a/queue-3.12/ipv6-protect-for_each_sk_fl_rcu-in-mem_check-with.patch b/queue-3.12/ipv6-protect-for_each_sk_fl_rcu-in-mem_check-with.patch new file mode 100644 index 00000000000..57edc06a5e4 --- /dev/null +++ b/queue-3.12/ipv6-protect-for_each_sk_fl_rcu-in-mem_check-with.patch @@ -0,0 +1,33 @@ +From foo@baz Thu Dec 5 16:16:37 PST 2013 +From: Hannes Frederic Sowa +Date: Fri, 8 Nov 2013 19:26:21 +0100 +Subject: ipv6: protect for_each_sk_fl_rcu in mem_check with + rcu_read_lock_bh + +From: Hannes Frederic Sowa + +[ Upstream commit f8c31c8f80dd882f7eb49276989a4078d33d67a7 ] + +Fixes a suspicious rcu derference warning. + +Cc: Florent Fourcot +Signed-off-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_flowlabel.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/ipv6/ip6_flowlabel.c ++++ b/net/ipv6/ip6_flowlabel.c +@@ -453,8 +453,10 @@ static int mem_check(struct sock *sk) + if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK) + return 0; + ++ rcu_read_lock_bh(); + for_each_sk_fl_rcu(np, sfl) + count++; ++ rcu_read_unlock_bh(); + + if (room <= 0 || + ((count >= FL_MAX_PER_SOCK || diff --git a/queue-3.12/ipv6-use-rt6_get_dflt_router-to-get-default-router-in.patch b/queue-3.12/ipv6-use-rt6_get_dflt_router-to-get-default-router-in.patch new file mode 100644 index 00000000000..57b1b0411cf --- /dev/null +++ b/queue-3.12/ipv6-use-rt6_get_dflt_router-to-get-default-router-in.patch @@ -0,0 +1,44 @@ +From foo@baz Thu Dec 5 16:16:37 PST 2013 +From: Duan Jiong +Date: Fri, 8 Nov 2013 09:56:53 +0800 +Subject: ipv6: use rt6_get_dflt_router to get default router in + rt6_route_rcv + +From: Duan Jiong + +[ Upstream commit f104a567e673f382b09542a8dc3500aa689957b4 ] + +As the rfc 4191 said, the Router Preference and Lifetime values in a +::/0 Route Information Option should override the preference and lifetime +values in the Router Advertisement header. But when the kernel deals with +a ::/0 Route Information Option, the rt6_get_route_info() always return +NULL, that means that overriding will not happen, because those default +routers were added without flag RTF_ROUTEINFO in rt6_add_dflt_router(). + +In order to deal with that condition, we should call rt6_get_dflt_router +when the prefix length is 0. + +Signed-off-by: Duan Jiong +Acked-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/route.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -731,8 +731,11 @@ int rt6_route_rcv(struct net_device *dev + prefix = &prefix_buf; + } + +- rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr, +- dev->ifindex); ++ if (rinfo->prefix_len == 0) ++ rt = rt6_get_dflt_router(gwaddr, dev); ++ else ++ rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, ++ gwaddr, dev->ifindex); + + if (rt && !lifetime) { + ip6_del_rt(rt); diff --git a/queue-3.12/isdnloop-use-strlcpy-instead-of-strcpy.patch b/queue-3.12/isdnloop-use-strlcpy-instead-of-strcpy.patch new file mode 100644 index 00000000000..281d5fefd0b --- /dev/null +++ b/queue-3.12/isdnloop-use-strlcpy-instead-of-strcpy.patch @@ -0,0 +1,43 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Dan Carpenter +Date: Thu, 14 Nov 2013 11:21:10 +0300 +Subject: isdnloop: use strlcpy() instead of strcpy() + +From: Dan Carpenter + +[ Upstream commit f9a23c84486ed350cce7bb1b2828abd1f6658796 ] + +These strings come from a copy_from_user() and there is no way to be +sure they are NUL terminated. + +Signed-off-by: Dan Carpenter +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/isdn/isdnloop/isdnloop.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/drivers/isdn/isdnloop/isdnloop.c ++++ b/drivers/isdn/isdnloop/isdnloop.c +@@ -1083,8 +1083,10 @@ isdnloop_start(isdnloop_card *card, isdn + spin_unlock_irqrestore(&card->isdnloop_lock, flags); + return -ENOMEM; + } +- for (i = 0; i < 3; i++) +- strcpy(card->s0num[i], sdef.num[i]); ++ for (i = 0; i < 3; i++) { ++ strlcpy(card->s0num[i], sdef.num[i], ++ sizeof(card->s0num[0])); ++ } + break; + case ISDN_PTYPE_1TR6: + if (isdnloop_fake(card, "DRV1.04TC-1TR6-CAPI-CNS-BASIS-29.11.95", +@@ -1097,7 +1099,7 @@ isdnloop_start(isdnloop_card *card, isdn + spin_unlock_irqrestore(&card->isdnloop_lock, flags); + return -ENOMEM; + } +- strcpy(card->s0num[0], sdef.num[0]); ++ strlcpy(card->s0num[0], sdef.num[0], sizeof(card->s0num[0])); + card->s0num[1][0] = '\0'; + card->s0num[2][0] = '\0'; + break; diff --git a/queue-3.12/macvtap-limit-head-length-of-skb-allocated.patch b/queue-3.12/macvtap-limit-head-length-of-skb-allocated.patch new file mode 100644 index 00000000000..1385898fc78 --- /dev/null +++ b/queue-3.12/macvtap-limit-head-length-of-skb-allocated.patch @@ -0,0 +1,58 @@ +From foo@baz Thu Dec 5 16:16:38 PST 2013 +From: Jason Wang +Date: Wed, 13 Nov 2013 14:00:40 +0800 +Subject: macvtap: limit head length of skb allocated + +From: Jason Wang + +[ Upstream commit 16a3fa28630331e28208872fa5341ce210b901c7 ] + +We currently use hdr_len as a hint of head length which is advertised by +guest. But when guest advertise a very big value, it can lead to an 64K+ +allocating of kmalloc() which has a very high possibility of failure when host +memory is fragmented or under heavy stress. The huge hdr_len also reduce the +effect of zerocopy or even disable if a gso skb is linearized in guest. + +To solves those issues, this patch introduces an upper limit (PAGE_SIZE) of the +head, which guarantees an order 0 allocation each time. + +Cc: Stefan Hajnoczi +Cc: Michael S. Tsirkin +Signed-off-by: Jason Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/macvtap.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/drivers/net/macvtap.c ++++ b/drivers/net/macvtap.c +@@ -628,6 +628,7 @@ static ssize_t macvtap_get_user(struct m + const struct iovec *iv, unsigned long total_len, + size_t count, int noblock) + { ++ int good_linear = SKB_MAX_HEAD(NET_IP_ALIGN); + struct sk_buff *skb; + struct macvlan_dev *vlan; + unsigned long len = total_len; +@@ -670,6 +671,8 @@ static ssize_t macvtap_get_user(struct m + + if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) { + copylen = vnet_hdr.hdr_len ? vnet_hdr.hdr_len : GOODCOPY_LEN; ++ if (copylen > good_linear) ++ copylen = good_linear; + linear = copylen; + if (iov_pages(iv, vnet_hdr_len + copylen, count) + <= MAX_SKB_FRAGS) +@@ -678,7 +681,10 @@ static ssize_t macvtap_get_user(struct m + + if (!zerocopy) { + copylen = len; +- linear = vnet_hdr.hdr_len; ++ if (vnet_hdr.hdr_len > good_linear) ++ linear = good_linear; ++ else ++ linear = vnet_hdr.hdr_len; + } + + skb = macvtap_alloc_skb(&q->sk, NET_IP_ALIGN, copylen, diff --git a/queue-3.12/net-8139cp-fix-a-bug_on-triggered-by-wrong-bytes_compl.patch b/queue-3.12/net-8139cp-fix-a-bug_on-triggered-by-wrong-bytes_compl.patch new file mode 100644 index 00000000000..96f9f7180cf --- /dev/null +++ b/queue-3.12/net-8139cp-fix-a-bug_on-triggered-by-wrong-bytes_compl.patch @@ -0,0 +1,107 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Yang Yingliang +Date: Wed, 27 Nov 2013 14:32:52 +0800 +Subject: net: 8139cp: fix a BUG_ON triggered by wrong bytes_compl + +From: Yang Yingliang + +[ Upstream commit 7fe0ee099ad5e3dea88d4ee1b6f20246b1ca57c3 ] + +Using iperf to send packets(GSO mode is on), a bug is triggered: + +[ 212.672781] kernel BUG at lib/dynamic_queue_limits.c:26! +[ 212.673396] invalid opcode: 0000 [#1] SMP +[ 212.673882] Modules linked in: 8139cp(O) nls_utf8 edd fuse loop dm_mod ipv6 i2c_piix4 8139too i2c_core intel_agp joydev pcspkr hid_generic intel_gtt floppy sr_mod mii button sg cdrom ext3 jbd mbcache usbhid hid uhci_hcd ehci_hcd usbcore sd_mod usb_common crc_t10dif crct10dif_common processor thermal_sys hwmon scsi_dh_emc scsi_dh_rdac scsi_dh_hp_sw scsi_dh ata_generic ata_piix libata scsi_mod [last unloaded: 8139cp] +[ 212.676084] CPU: 0 PID: 4124 Comm: iperf Tainted: G O 3.12.0-0.7-default+ #16 +[ 212.676084] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 +[ 212.676084] task: ffff8800d83966c0 ti: ffff8800db4c8000 task.ti: ffff8800db4c8000 +[ 212.676084] RIP: 0010:[] [] dql_completed+0x17f/0x190 +[ 212.676084] RSP: 0018:ffff880116e03e30 EFLAGS: 00010083 +[ 212.676084] RAX: 00000000000005ea RBX: 0000000000000f7c RCX: 0000000000000002 +[ 212.676084] RDX: ffff880111dd0dc0 RSI: 0000000000000bd4 RDI: ffff8800db6ffcc0 +[ 212.676084] RBP: ffff880116e03e48 R08: 0000000000000992 R09: 0000000000000000 +[ 212.676084] R10: ffffffff8181e400 R11: 0000000000000004 R12: 000000000000000f +[ 212.676084] R13: ffff8800d94ec840 R14: ffff8800db440c80 R15: 000000000000000e +[ 212.676084] FS: 00007f6685a3c700(0000) GS:ffff880116e00000(0000) knlGS:0000000000000000 +[ 212.676084] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 212.676084] CR2: 00007f6685ad6460 CR3: 00000000db714000 CR4: 00000000000006f0 +[ 212.676084] Stack: +[ 212.676084] ffff8800db6ffc00 000000000000000f ffff8800d94ec840 ffff880116e03eb8 +[ 212.676084] ffffffffa041509f ffff880116e03e88 0000000f16e03e88 ffff8800d94ec000 +[ 212.676084] 00000bd400059858 000000050000000f ffffffff81094c36 ffff880116e03eb8 +[ 212.676084] Call Trace: +[ 212.676084] +[ 212.676084] [] cp_interrupt+0x4ef/0x590 [8139cp] +[ 212.676084] [] ? ktime_get+0x56/0xd0 +[ 212.676084] [] handle_irq_event_percpu+0x53/0x170 +[ 212.676084] [] handle_irq_event+0x3c/0x60 +[ 212.676084] [] handle_fasteoi_irq+0x55/0xf0 +[ 212.676084] [] handle_irq+0x1f/0x30 +[ 212.676084] [] do_IRQ+0x5b/0xe0 +[ 212.676084] [] common_interrupt+0x6a/0x6a +[ 212.676084] +[ 212.676084] [] ? cp_start_xmit+0x621/0x97c [8139cp] +[ 212.676084] [] ? cp_start_xmit+0x609/0x97c [8139cp] +[ 212.676084] [] dev_hard_start_xmit+0x2c9/0x550 +[ 212.676084] [] sch_direct_xmit+0x179/0x1d0 +[ 212.676084] [] dev_queue_xmit+0x293/0x440 +[ 212.676084] [] ip_finish_output+0x236/0x450 +[ 212.676084] [] ? __alloc_pages_nodemask+0x187/0xb10 +[ 212.676084] [] ip_output+0x88/0x90 +[ 212.676084] [] ip_local_out+0x24/0x30 +[ 212.676084] [] ip_queue_xmit+0x14d/0x3e0 +[ 212.676084] [] tcp_transmit_skb+0x501/0x840 +[ 212.676084] [] tcp_write_xmit+0x1e3/0xb20 +[ 212.676084] [] ? skb_page_frag_refill+0x87/0xd0 +[ 212.676084] [] tcp_push_one+0x2b/0x40 +[ 212.676084] [] tcp_sendmsg+0x926/0xc90 +[ 212.676084] [] inet_sendmsg+0x61/0xc0 +[ 212.676084] [] sock_aio_write+0x101/0x120 +[ 212.676084] [] ? vma_adjust+0x2e1/0x5d0 +[ 212.676084] [] ? timerqueue_add+0x60/0xb0 +[ 212.676084] [] do_sync_write+0x60/0x90 +[ 212.676084] [] ? rw_verify_area+0x54/0xf0 +[ 212.676084] [] vfs_write+0x186/0x190 +[ 212.676084] [] SyS_write+0x5d/0xa0 +[ 212.676084] [] system_call_fastpath+0x16/0x1b +[ 212.676084] Code: ca 41 89 dc 41 29 cc 45 31 db 29 c2 41 89 c5 89 d0 45 29 c5 f7 d0 c1 e8 1f e9 43 ff ff ff 66 0f 1f 44 00 00 31 c0 e9 7b ff ff ff <0f> 0b eb fe 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 c7 47 40 00 +[ 212.676084] RIP [] dql_completed+0x17f/0x190 +------------[ cut here ]------------ + +When a skb has frags, bytes_compl plus skb->len nr_frags times in cp_tx(). +It's not the correct value(actually, it should plus skb->len once) and it +will trigger the BUG_ON(bytes_compl > num_queued - dql->num_completed). +So only increase bytes_compl when finish sending all frags. pkts_compl also +has a wrong value, fix it too. + +It's introduced by commit 871f0d4c ("8139cp: enable bql"). + +Suggested-by: Eric Dumazet +Signed-off-by: Yang Yingliang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/realtek/8139cp.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/drivers/net/ethernet/realtek/8139cp.c ++++ b/drivers/net/ethernet/realtek/8139cp.c +@@ -678,9 +678,6 @@ static void cp_tx (struct cp_private *cp + le32_to_cpu(txd->opts1) & 0xffff, + PCI_DMA_TODEVICE); + +- bytes_compl += skb->len; +- pkts_compl++; +- + if (status & LastFrag) { + if (status & (TxError | TxFIFOUnder)) { + netif_dbg(cp, tx_err, cp->dev, +@@ -702,6 +699,8 @@ static void cp_tx (struct cp_private *cp + netif_dbg(cp, tx_done, cp->dev, + "tx done, slot %d\n", tx_tail); + } ++ bytes_compl += skb->len; ++ pkts_compl++; + dev_kfree_skb_irq(skb); + } + diff --git a/queue-3.12/net-add-bug_on-if-kernel-advertises-msg_namelen-sizeof-struct-sockaddr_storage.patch b/queue-3.12/net-add-bug_on-if-kernel-advertises-msg_namelen-sizeof-struct-sockaddr_storage.patch new file mode 100644 index 00000000000..613c013293c --- /dev/null +++ b/queue-3.12/net-add-bug_on-if-kernel-advertises-msg_namelen-sizeof-struct-sockaddr_storage.patch @@ -0,0 +1,40 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Hannes Frederic Sowa +Date: Thu, 21 Nov 2013 03:14:34 +0100 +Subject: net: add BUG_ON if kernel advertises msg_namelen > sizeof(struct sockaddr_storage) + +From: Hannes Frederic Sowa + +[ Upstream commit 68c6beb373955da0886d8f4f5995b3922ceda4be ] + +In that case it is probable that kernel code overwrote part of the +stack. So we should bail out loudly here. + +The BUG_ON may be removed in future if we are sure all protocols are +conformant. + +Suggested-by: Eric Dumazet +Signed-off-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/socket.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/socket.c ++++ b/net/socket.c +@@ -221,12 +221,13 @@ static int move_addr_to_user(struct sock + int err; + int len; + ++ BUG_ON(klen > sizeof(struct sockaddr_storage)); + err = get_user(len, ulen); + if (err) + return err; + if (len > klen) + len = klen; +- if (len < 0 || len > sizeof(struct sockaddr_storage)) ++ if (len < 0) + return -EINVAL; + if (len) { + if (audit_sockaddr(klen, kaddr)) diff --git a/queue-3.12/net-clamp-msg_namelen-instead-of-returning-an-error.patch b/queue-3.12/net-clamp-msg_namelen-instead-of-returning-an-error.patch new file mode 100644 index 00000000000..e12c185aedb --- /dev/null +++ b/queue-3.12/net-clamp-msg_namelen-instead-of-returning-an-error.patch @@ -0,0 +1,52 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Dan Carpenter +Date: Wed, 27 Nov 2013 15:40:21 +0300 +Subject: net: clamp ->msg_namelen instead of returning an error + +From: Dan Carpenter + +[ Upstream commit db31c55a6fb245fdbb752a2ca4aefec89afabb06 ] + +If kmsg->msg_namelen > sizeof(struct sockaddr_storage) then in the +original code that would lead to memory corruption in the kernel if you +had audit configured. If you didn't have audit configured it was +harmless. + +There are some programs such as beta versions of Ruby which use too +large of a buffer and returning an error code breaks them. We should +clamp the ->msg_namelen value instead. + +Fixes: 1661bf364ae9 ("net: heap overflow in __audit_sockaddr()") +Reported-by: Eric Wong +Signed-off-by: Dan Carpenter +Tested-by: Eric Wong +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/compat.c | 2 +- + net/socket.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +--- a/net/compat.c ++++ b/net/compat.c +@@ -72,7 +72,7 @@ int get_compat_msghdr(struct msghdr *kms + __get_user(kmsg->msg_flags, &umsg->msg_flags)) + return -EFAULT; + if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) +- return -EINVAL; ++ kmsg->msg_namelen = sizeof(struct sockaddr_storage); + kmsg->msg_name = compat_ptr(tmp1); + kmsg->msg_iov = compat_ptr(tmp2); + kmsg->msg_control = compat_ptr(tmp3); +--- a/net/socket.c ++++ b/net/socket.c +@@ -1973,7 +1973,7 @@ static int copy_msghdr_from_user(struct + if (copy_from_user(kmsg, umsg, sizeof(struct msghdr))) + return -EFAULT; + if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) +- return -EINVAL; ++ kmsg->msg_namelen = sizeof(struct sockaddr_storage); + return 0; + } + diff --git a/queue-3.12/net-core-always-propagate-flag-changes-to-interfaces.patch b/queue-3.12/net-core-always-propagate-flag-changes-to-interfaces.patch new file mode 100644 index 00000000000..e62f93ab506 --- /dev/null +++ b/queue-3.12/net-core-always-propagate-flag-changes-to-interfaces.patch @@ -0,0 +1,66 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Vlad Yasevich +Date: Tue, 19 Nov 2013 20:47:15 -0500 +Subject: net: core: Always propagate flag changes to interfaces + +From: Vlad Yasevich + +[ Upstream commit d2615bf450694c1302d86b9cc8a8958edfe4c3a4 ] + +The following commit: + b6c40d68ff6498b7f63ddf97cf0aa818d748dee7 + net: only invoke dev->change_rx_flags when device is UP + +tried to fix a problem with VLAN devices and promiscuouse flag setting. +The issue was that VLAN device was setting a flag on an interface that +was down, thus resulting in bad promiscuity count. +This commit blocked flag propagation to any device that is currently +down. + +A later commit: + deede2fabe24e00bd7e246eb81cd5767dc6fcfc7 + vlan: Don't propagate flag changes on down interfaces + +fixed VLAN code to only propagate flags when the VLAN interface is up, +thus fixing the same issue as above, only localized to VLAN. + +The problem we have now is that if we have create a complex stack +involving multiple software devices like bridges, bonds, and vlans, +then it is possible that the flags would not propagate properly to +the physical devices. A simple examle of the scenario is the +following: + + eth0----> bond0 ----> bridge0 ---> vlan50 + +If bond0 or eth0 happen to be down at the time bond0 is added to +the bridge, then eth0 will never have promisc mode set which is +currently required for operation as part of the bridge. As a +result, packets with vlan50 will be dropped by the interface. + +The only 2 devices that implement the special flag handling are +VLAN and DSA and they both have required code to prevent incorrect +flag propagation. As a result we can remove the generic solution +introduced in b6c40d68ff6498b7f63ddf97cf0aa818d748dee7 and leave +it to the individual devices to decide whether they will block +flag propagation or not. + +Reported-by: Stefan Priebe +Suggested-by: Veaceslav Falico +Signed-off-by: Vlad Yasevich +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -4815,7 +4815,7 @@ static void dev_change_rx_flags(struct n + { + const struct net_device_ops *ops = dev->netdev_ops; + +- if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags) ++ if (ops->ndo_change_rx_flags) + ops->ndo_change_rx_flags(dev, flags); + } + diff --git a/queue-3.12/net-fix-ip-rule-delete-table-256.patch b/queue-3.12/net-fix-ip-rule-delete-table-256.patch new file mode 100644 index 00000000000..84c754b1021 --- /dev/null +++ b/queue-3.12/net-fix-ip-rule-delete-table-256.patch @@ -0,0 +1,41 @@ +From foo@baz Thu Dec 5 16:16:37 PST 2013 +From: Andreas Henriksson +Date: Thu, 7 Nov 2013 18:26:38 +0100 +Subject: net: Fix "ip rule delete table 256" + +From: Andreas Henriksson + +[ Upstream commit 13eb2ab2d33c57ebddc57437a7d341995fc9138c ] + +When trying to delete a table >= 256 using iproute2 the local table +will be deleted. +The table id is specified as a netlink attribute when it needs more then +8 bits and iproute2 then sets the table field to RT_TABLE_UNSPEC (0). +Preconditions to matching the table id in the rule delete code +doesn't seem to take the "table id in netlink attribute" into condition +so the frh_get_table helper function never gets to do its job when +matching against current rule. +Use the helper function twice instead of peaking at the table value directly. + +Originally reported at: http://bugs.debian.org/724783 + +Reported-by: Nicolas HICHER +Signed-off-by: Andreas Henriksson +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/fib_rules.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/core/fib_rules.c ++++ b/net/core/fib_rules.c +@@ -460,7 +460,8 @@ static int fib_nl_delrule(struct sk_buff + if (frh->action && (frh->action != rule->action)) + continue; + +- if (frh->table && (frh_get_table(frh, tb) != rule->table)) ++ if (frh_get_table(frh, tb) && ++ (frh_get_table(frh, tb) != rule->table)) + continue; + + if (tb[FRA_PRIORITY] && diff --git a/queue-3.12/net-mlx4_en-fixed-crash-when-port-type-is-changed.patch b/queue-3.12/net-mlx4_en-fixed-crash-when-port-type-is-changed.patch new file mode 100644 index 00000000000..2c21a3bc5b2 --- /dev/null +++ b/queue-3.12/net-mlx4_en-fixed-crash-when-port-type-is-changed.patch @@ -0,0 +1,44 @@ +From foo@baz Thu Dec 5 16:16:37 PST 2013 +From: Amir Vadai +Date: Thu, 7 Nov 2013 11:08:30 +0200 +Subject: net/mlx4_en: Fixed crash when port type is changed + +From: Amir Vadai + +[ Upstream commit 1ec4864b10171b0691ee196d7006ae56d2c153f2 ] + +timecounter_init() was was called only after first potential +timecounter_read(). +Moved mlx4_en_init_timestamp() before mlx4_en_init_netdev() + +Signed-off-by: Amir Vadai +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx4/en_main.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx4/en_main.c ++++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c +@@ -264,6 +264,10 @@ static void *mlx4_en_add(struct mlx4_dev + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) + mdev->port_cnt++; + ++ /* Initialize time stamp mechanism */ ++ if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) ++ mlx4_en_init_timestamp(mdev); ++ + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) { + if (!dev->caps.comp_pool) { + mdev->profile.prof[i].rx_ring_num = +@@ -301,10 +305,6 @@ static void *mlx4_en_add(struct mlx4_dev + mdev->pndev[i] = NULL; + } + +- /* Initialize time stamp mechanism */ +- if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) +- mlx4_en_init_timestamp(mdev); +- + return mdev; + + err_mr: diff --git a/queue-3.12/net-mv643xx_eth-potential-null-dereference-in.patch b/queue-3.12/net-mv643xx_eth-potential-null-dereference-in.patch index 25ff158a1f2..be03565c5ce 100644 --- a/queue-3.12/net-mv643xx_eth-potential-null-dereference-in.patch +++ b/queue-3.12/net-mv643xx_eth-potential-null-dereference-in.patch @@ -1,11 +1,12 @@ From foo@baz Thu Dec 5 16:16:37 PST 2013 From: Dan Carpenter Date: Wed, 13 Nov 2013 10:52:47 +0300 -Subject: net: mv643xx_eth: potential NULL dereference in - probe() +Subject: net: mv643xx_eth: potential NULL dereference in probe() From: Dan Carpenter +upstream commit 6115c11fe1a5a636ac99fc823b00df4ff3c0674e + We assume that "mp->phy" can be NULL a couple lines before the dereference. diff --git a/queue-3.12/net-rework-recvmsg-handler-msg_name-and-msg_namelen-logic.patch b/queue-3.12/net-rework-recvmsg-handler-msg_name-and-msg_namelen-logic.patch new file mode 100644 index 00000000000..fd0412e9337 --- /dev/null +++ b/queue-3.12/net-rework-recvmsg-handler-msg_name-and-msg_namelen-logic.patch @@ -0,0 +1,700 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Hannes Frederic Sowa +Date: Thu, 21 Nov 2013 03:14:22 +0100 +Subject: net: rework recvmsg handler msg_name and msg_namelen logic + +From: Hannes Frederic Sowa + +[ Upstream commit f3d3342602f8bcbf37d7c46641cb9bca7618eb1c ] + +This patch now always passes msg->msg_namelen as 0. recvmsg handlers must +set msg_namelen to the proper size <= sizeof(struct sockaddr_storage) +to return msg_name to the user. + +This prevents numerous uninitialized memory leaks we had in the +recvmsg handlers and makes it harder for new code to accidentally leak +uninitialized memory. + +Optimize for the case recvfrom is called with NULL as address. We don't +need to copy the address at all, so set it to NULL before invoking the +recvmsg handler. We can do so, because all the recvmsg handlers must +cope with the case a plain read() is called on them. read() also sets +msg_name to NULL. + +Also document these changes in include/linux/net.h as suggested by David +Miller. + +Changes since RFC: + +Set msg->msg_name = NULL if user specified a NULL in msg_name but had a +non-null msg_namelen in verify_iovec/verify_compat_iovec. This doesn't +affect sendto as it would bail out earlier while trying to copy-in the +address. It also more naturally reflects the logic by the callers of +verify_iovec. + +With this change in place I could remove " +if (!uaddr || msg_sys->msg_namelen == 0) + msg->msg_name = NULL +". + +This change does not alter the user visible error logic as we ignore +msg_namelen as long as msg_name is NULL. + +Also remove two unnecessary curly brackets in ___sys_recvmsg and change +comments to netdev style. + +Cc: David Miller +Suggested-by: Eric Dumazet +Signed-off-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + crypto/algif_hash.c | 2 -- + crypto/algif_skcipher.c | 1 - + drivers/isdn/mISDN/socket.c | 13 ++++--------- + drivers/net/ppp/pppoe.c | 2 -- + include/linux/net.h | 8 ++++++++ + net/appletalk/ddp.c | 16 +++++++--------- + net/atm/common.c | 2 -- + net/ax25/af_ax25.c | 4 ++-- + net/bluetooth/af_bluetooth.c | 4 ---- + net/bluetooth/hci_sock.c | 2 -- + net/bluetooth/rfcomm/sock.c | 1 - + net/bluetooth/sco.c | 1 - + net/caif/caif_socket.c | 4 ---- + net/compat.c | 3 ++- + net/core/iovec.c | 3 ++- + net/ipx/af_ipx.c | 3 +-- + net/irda/af_irda.c | 4 ---- + net/iucv/af_iucv.c | 2 -- + net/key/af_key.c | 1 - + net/l2tp/l2tp_ppp.c | 2 -- + net/llc/af_llc.c | 2 -- + net/netlink/af_netlink.c | 2 -- + net/netrom/af_netrom.c | 3 +-- + net/nfc/llcp_sock.c | 2 -- + net/nfc/rawsock.c | 2 -- + net/packet/af_packet.c | 32 +++++++++++++++----------------- + net/rds/recv.c | 2 -- + net/rose/af_rose.c | 8 +++++--- + net/rxrpc/ar-recvmsg.c | 9 ++++++--- + net/socket.c | 19 +++++++++++-------- + net/tipc/socket.c | 6 ------ + net/unix/af_unix.c | 5 ----- + net/vmw_vsock/af_vsock.c | 2 -- + net/vmw_vsock/vmci_transport.c | 2 -- + net/x25/af_x25.c | 3 +-- + 35 files changed, 65 insertions(+), 112 deletions(-) + +--- a/crypto/algif_hash.c ++++ b/crypto/algif_hash.c +@@ -161,8 +161,6 @@ static int hash_recvmsg(struct kiocb *un + else if (len < ds) + msg->msg_flags |= MSG_TRUNC; + +- msg->msg_namelen = 0; +- + lock_sock(sk); + if (ctx->more) { + ctx->more = 0; +--- a/crypto/algif_skcipher.c ++++ b/crypto/algif_skcipher.c +@@ -432,7 +432,6 @@ static int skcipher_recvmsg(struct kiocb + long copied = 0; + + lock_sock(sk); +- msg->msg_namelen = 0; + for (iov = msg->msg_iov, iovlen = msg->msg_iovlen; iovlen > 0; + iovlen--, iov++) { + unsigned long seglen = iov->iov_len; +--- a/drivers/isdn/mISDN/socket.c ++++ b/drivers/isdn/mISDN/socket.c +@@ -117,7 +117,6 @@ mISDN_sock_recvmsg(struct kiocb *iocb, s + { + struct sk_buff *skb; + struct sock *sk = sock->sk; +- struct sockaddr_mISDN *maddr; + + int copied, err; + +@@ -135,9 +134,9 @@ mISDN_sock_recvmsg(struct kiocb *iocb, s + if (!skb) + return err; + +- if (msg->msg_namelen >= sizeof(struct sockaddr_mISDN)) { +- msg->msg_namelen = sizeof(struct sockaddr_mISDN); +- maddr = (struct sockaddr_mISDN *)msg->msg_name; ++ if (msg->msg_name) { ++ struct sockaddr_mISDN *maddr = msg->msg_name; ++ + maddr->family = AF_ISDN; + maddr->dev = _pms(sk)->dev->id; + if ((sk->sk_protocol == ISDN_P_LAPD_TE) || +@@ -150,11 +149,7 @@ mISDN_sock_recvmsg(struct kiocb *iocb, s + maddr->sapi = _pms(sk)->ch.addr & 0xFF; + maddr->tei = (_pms(sk)->ch.addr >> 8) & 0xFF; + } +- } else { +- if (msg->msg_namelen) +- printk(KERN_WARNING "%s: too small namelen %d\n", +- __func__, msg->msg_namelen); +- msg->msg_namelen = 0; ++ msg->msg_namelen = sizeof(*maddr); + } + + copied = skb->len + MISDN_HEADER_LEN; +--- a/drivers/net/ppp/pppoe.c ++++ b/drivers/net/ppp/pppoe.c +@@ -979,8 +979,6 @@ static int pppoe_recvmsg(struct kiocb *i + if (error < 0) + goto end; + +- m->msg_namelen = 0; +- + if (skb) { + total_len = min_t(size_t, total_len, skb->len); + error = skb_copy_datagram_iovec(skb, 0, m->msg_iov, total_len); +--- a/include/linux/net.h ++++ b/include/linux/net.h +@@ -163,6 +163,14 @@ struct proto_ops { + #endif + int (*sendmsg) (struct kiocb *iocb, struct socket *sock, + struct msghdr *m, size_t total_len); ++ /* Notes for implementing recvmsg: ++ * =============================== ++ * msg->msg_namelen should get updated by the recvmsg handlers ++ * iff msg_name != NULL. It is by default 0 to prevent ++ * returning uninitialized memory to user space. The recvfrom ++ * handlers can assume that msg.msg_name is either NULL or has ++ * a minimum size of sizeof(struct sockaddr_storage). ++ */ + int (*recvmsg) (struct kiocb *iocb, struct socket *sock, + struct msghdr *m, size_t total_len, + int flags); +--- a/net/appletalk/ddp.c ++++ b/net/appletalk/ddp.c +@@ -1735,7 +1735,6 @@ static int atalk_recvmsg(struct kiocb *i + size_t size, int flags) + { + struct sock *sk = sock->sk; +- struct sockaddr_at *sat = (struct sockaddr_at *)msg->msg_name; + struct ddpehdr *ddp; + int copied = 0; + int offset = 0; +@@ -1764,14 +1763,13 @@ static int atalk_recvmsg(struct kiocb *i + } + err = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, copied); + +- if (!err) { +- if (sat) { +- sat->sat_family = AF_APPLETALK; +- sat->sat_port = ddp->deh_sport; +- sat->sat_addr.s_node = ddp->deh_snode; +- sat->sat_addr.s_net = ddp->deh_snet; +- } +- msg->msg_namelen = sizeof(*sat); ++ if (!err && msg->msg_name) { ++ struct sockaddr_at *sat = msg->msg_name; ++ sat->sat_family = AF_APPLETALK; ++ sat->sat_port = ddp->deh_sport; ++ sat->sat_addr.s_node = ddp->deh_snode; ++ sat->sat_addr.s_net = ddp->deh_snet; ++ msg->msg_namelen = sizeof(*sat); + } + + skb_free_datagram(sk, skb); /* Free the datagram. */ +--- a/net/atm/common.c ++++ b/net/atm/common.c +@@ -531,8 +531,6 @@ int vcc_recvmsg(struct kiocb *iocb, stru + struct sk_buff *skb; + int copied, error = -EINVAL; + +- msg->msg_namelen = 0; +- + if (sock->state != SS_CONNECTED) + return -ENOTCONN; + +--- a/net/ax25/af_ax25.c ++++ b/net/ax25/af_ax25.c +@@ -1636,11 +1636,11 @@ static int ax25_recvmsg(struct kiocb *io + + skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + +- if (msg->msg_namelen != 0) { +- struct sockaddr_ax25 *sax = (struct sockaddr_ax25 *)msg->msg_name; ++ if (msg->msg_name) { + ax25_digi digi; + ax25_address src; + const unsigned char *mac = skb_mac_header(skb); ++ struct sockaddr_ax25 *sax = msg->msg_name; + + memset(sax, 0, sizeof(struct full_sockaddr_ax25)); + ax25_addr_parse(mac + 1, skb->data - mac - 1, &src, NULL, +--- a/net/bluetooth/af_bluetooth.c ++++ b/net/bluetooth/af_bluetooth.c +@@ -221,8 +221,6 @@ int bt_sock_recvmsg(struct kiocb *iocb, + if (flags & (MSG_OOB)) + return -EOPNOTSUPP; + +- msg->msg_namelen = 0; +- + skb = skb_recv_datagram(sk, flags, noblock, &err); + if (!skb) { + if (sk->sk_shutdown & RCV_SHUTDOWN) +@@ -287,8 +285,6 @@ int bt_sock_stream_recvmsg(struct kiocb + if (flags & MSG_OOB) + return -EOPNOTSUPP; + +- msg->msg_namelen = 0; +- + BT_DBG("sk %p size %zu", sk, size); + + lock_sock(sk); +--- a/net/bluetooth/hci_sock.c ++++ b/net/bluetooth/hci_sock.c +@@ -752,8 +752,6 @@ static int hci_sock_recvmsg(struct kiocb + if (!skb) + return err; + +- msg->msg_namelen = 0; +- + copied = skb->len; + if (len < copied) { + msg->msg_flags |= MSG_TRUNC; +--- a/net/bluetooth/rfcomm/sock.c ++++ b/net/bluetooth/rfcomm/sock.c +@@ -608,7 +608,6 @@ static int rfcomm_sock_recvmsg(struct ki + + if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) { + rfcomm_dlc_accept(d); +- msg->msg_namelen = 0; + return 0; + } + +--- a/net/bluetooth/sco.c ++++ b/net/bluetooth/sco.c +@@ -715,7 +715,6 @@ static int sco_sock_recvmsg(struct kiocb + test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) { + sco_conn_defer_accept(pi->conn->hcon, pi->setting); + sk->sk_state = BT_CONFIG; +- msg->msg_namelen = 0; + + release_sock(sk); + return 0; +--- a/net/caif/caif_socket.c ++++ b/net/caif/caif_socket.c +@@ -286,8 +286,6 @@ static int caif_seqpkt_recvmsg(struct ki + if (m->msg_flags&MSG_OOB) + goto read_error; + +- m->msg_namelen = 0; +- + skb = skb_recv_datagram(sk, flags, 0 , &ret); + if (!skb) + goto read_error; +@@ -361,8 +359,6 @@ static int caif_stream_recvmsg(struct ki + if (flags&MSG_OOB) + goto out; + +- msg->msg_namelen = 0; +- + /* + * Lock the socket to prevent queue disordering + * while sleeps in memcpy_tomsg +--- a/net/compat.c ++++ b/net/compat.c +@@ -93,7 +93,8 @@ int verify_compat_iovec(struct msghdr *k + if (err < 0) + return err; + } +- kern_msg->msg_name = kern_address; ++ if (kern_msg->msg_name) ++ kern_msg->msg_name = kern_address; + } else + kern_msg->msg_name = NULL; + +--- a/net/core/iovec.c ++++ b/net/core/iovec.c +@@ -48,7 +48,8 @@ int verify_iovec(struct msghdr *m, struc + if (err < 0) + return err; + } +- m->msg_name = address; ++ if (m->msg_name) ++ m->msg_name = address; + } else { + m->msg_name = NULL; + } +--- a/net/ipx/af_ipx.c ++++ b/net/ipx/af_ipx.c +@@ -1823,8 +1823,6 @@ static int ipx_recvmsg(struct kiocb *ioc + if (skb->tstamp.tv64) + sk->sk_stamp = skb->tstamp; + +- msg->msg_namelen = sizeof(*sipx); +- + if (sipx) { + sipx->sipx_family = AF_IPX; + sipx->sipx_port = ipx->ipx_source.sock; +@@ -1832,6 +1830,7 @@ static int ipx_recvmsg(struct kiocb *ioc + sipx->sipx_network = IPX_SKB_CB(skb)->ipx_source_net; + sipx->sipx_type = ipx->ipx_type; + sipx->sipx_zero = 0; ++ msg->msg_namelen = sizeof(*sipx); + } + rc = copied; + +--- a/net/irda/af_irda.c ++++ b/net/irda/af_irda.c +@@ -1385,8 +1385,6 @@ static int irda_recvmsg_dgram(struct kio + + IRDA_DEBUG(4, "%s()\n", __func__); + +- msg->msg_namelen = 0; +- + skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, + flags & MSG_DONTWAIT, &err); + if (!skb) +@@ -1451,8 +1449,6 @@ static int irda_recvmsg_stream(struct ki + target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); + timeo = sock_rcvtimeo(sk, noblock); + +- msg->msg_namelen = 0; +- + do { + int chunk; + struct sk_buff *skb = skb_dequeue(&sk->sk_receive_queue); +--- a/net/iucv/af_iucv.c ++++ b/net/iucv/af_iucv.c +@@ -1324,8 +1324,6 @@ static int iucv_sock_recvmsg(struct kioc + int err = 0; + u32 offset; + +- msg->msg_namelen = 0; +- + if ((sk->sk_state == IUCV_DISCONN) && + skb_queue_empty(&iucv->backlog_skb_q) && + skb_queue_empty(&sk->sk_receive_queue) && +--- a/net/key/af_key.c ++++ b/net/key/af_key.c +@@ -3616,7 +3616,6 @@ static int pfkey_recvmsg(struct kiocb *k + if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) + goto out; + +- msg->msg_namelen = 0; + skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err); + if (skb == NULL) + goto out; +--- a/net/l2tp/l2tp_ppp.c ++++ b/net/l2tp/l2tp_ppp.c +@@ -197,8 +197,6 @@ static int pppol2tp_recvmsg(struct kiocb + if (sk->sk_state & PPPOX_BOUND) + goto end; + +- msg->msg_namelen = 0; +- + err = 0; + skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, + flags & MSG_DONTWAIT, &err); +--- a/net/llc/af_llc.c ++++ b/net/llc/af_llc.c +@@ -720,8 +720,6 @@ static int llc_ui_recvmsg(struct kiocb * + int target; /* Read at least this many bytes */ + long timeo; + +- msg->msg_namelen = 0; +- + lock_sock(sk); + copied = -ENOTCONN; + if (unlikely(sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN)) +--- a/net/netlink/af_netlink.c ++++ b/net/netlink/af_netlink.c +@@ -2335,8 +2335,6 @@ static int netlink_recvmsg(struct kiocb + } + #endif + +- msg->msg_namelen = 0; +- + copied = data_skb->len; + if (len < copied) { + msg->msg_flags |= MSG_TRUNC; +--- a/net/netrom/af_netrom.c ++++ b/net/netrom/af_netrom.c +@@ -1179,10 +1179,9 @@ static int nr_recvmsg(struct kiocb *iocb + sax->sax25_family = AF_NETROM; + skb_copy_from_linear_data_offset(skb, 7, sax->sax25_call.ax25_call, + AX25_ADDR_LEN); ++ msg->msg_namelen = sizeof(*sax); + } + +- msg->msg_namelen = sizeof(*sax); +- + skb_free_datagram(sk, skb); + + release_sock(sk); +--- a/net/nfc/llcp_sock.c ++++ b/net/nfc/llcp_sock.c +@@ -807,8 +807,6 @@ static int llcp_sock_recvmsg(struct kioc + + pr_debug("%p %zu\n", sk, len); + +- msg->msg_namelen = 0; +- + lock_sock(sk); + + if (sk->sk_state == LLCP_CLOSED && +--- a/net/nfc/rawsock.c ++++ b/net/nfc/rawsock.c +@@ -241,8 +241,6 @@ static int rawsock_recvmsg(struct kiocb + if (!skb) + return rc; + +- msg->msg_namelen = 0; +- + copied = skb->len; + if (len < copied) { + msg->msg_flags |= MSG_TRUNC; +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -2660,7 +2660,6 @@ static int packet_recvmsg(struct kiocb * + struct sock *sk = sock->sk; + struct sk_buff *skb; + int copied, err; +- struct sockaddr_ll *sll; + int vnet_hdr_len = 0; + + err = -EINVAL; +@@ -2744,22 +2743,10 @@ static int packet_recvmsg(struct kiocb * + goto out_free; + } + +- /* +- * If the address length field is there to be filled in, we fill +- * it in now. ++ /* You lose any data beyond the buffer you gave. If it worries ++ * a user program they can ask the device for its MTU ++ * anyway. + */ +- +- sll = &PACKET_SKB_CB(skb)->sa.ll; +- if (sock->type == SOCK_PACKET) +- msg->msg_namelen = sizeof(struct sockaddr_pkt); +- else +- msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr); +- +- /* +- * You lose any data beyond the buffer you gave. If it worries a +- * user program they can ask the device for its MTU anyway. +- */ +- + copied = skb->len; + if (copied > len) { + copied = len; +@@ -2772,9 +2759,20 @@ static int packet_recvmsg(struct kiocb * + + sock_recv_ts_and_drops(msg, sk, skb); + +- if (msg->msg_name) ++ if (msg->msg_name) { ++ /* If the address length field is there to be filled ++ * in, we fill it in now. ++ */ ++ if (sock->type == SOCK_PACKET) { ++ msg->msg_namelen = sizeof(struct sockaddr_pkt); ++ } else { ++ struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll; ++ msg->msg_namelen = sll->sll_halen + ++ offsetof(struct sockaddr_ll, sll_addr); ++ } + memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, + msg->msg_namelen); ++ } + + if (pkt_sk(sk)->auxdata) { + struct tpacket_auxdata aux; +--- a/net/rds/recv.c ++++ b/net/rds/recv.c +@@ -410,8 +410,6 @@ int rds_recvmsg(struct kiocb *iocb, stru + + rdsdebug("size %zu flags 0x%x timeo %ld\n", size, msg_flags, timeo); + +- msg->msg_namelen = 0; +- + if (msg_flags & MSG_OOB) + goto out; + +--- a/net/rose/af_rose.c ++++ b/net/rose/af_rose.c +@@ -1216,7 +1216,6 @@ static int rose_recvmsg(struct kiocb *io + { + struct sock *sk = sock->sk; + struct rose_sock *rose = rose_sk(sk); +- struct sockaddr_rose *srose = (struct sockaddr_rose *)msg->msg_name; + size_t copied; + unsigned char *asmptr; + struct sk_buff *skb; +@@ -1252,8 +1251,11 @@ static int rose_recvmsg(struct kiocb *io + + skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + +- if (srose != NULL) { +- memset(srose, 0, msg->msg_namelen); ++ if (msg->msg_name) { ++ struct sockaddr_rose *srose; ++ ++ memset(msg->msg_name, 0, sizeof(struct full_sockaddr_rose)); ++ srose = msg->msg_name; + srose->srose_family = AF_ROSE; + srose->srose_addr = rose->dest_addr; + srose->srose_call = rose->dest_call; +--- a/net/rxrpc/ar-recvmsg.c ++++ b/net/rxrpc/ar-recvmsg.c +@@ -143,10 +143,13 @@ int rxrpc_recvmsg(struct kiocb *iocb, st + + /* copy the peer address and timestamp */ + if (!continue_call) { +- if (msg->msg_name && msg->msg_namelen > 0) ++ if (msg->msg_name) { ++ size_t len = ++ sizeof(call->conn->trans->peer->srx); + memcpy(msg->msg_name, +- &call->conn->trans->peer->srx, +- sizeof(call->conn->trans->peer->srx)); ++ &call->conn->trans->peer->srx, len); ++ msg->msg_namelen = len; ++ } + sock_recv_ts_and_drops(msg, &rx->sk, skb); + } + +--- a/net/socket.c ++++ b/net/socket.c +@@ -1840,8 +1840,10 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void + msg.msg_iov = &iov; + iov.iov_len = size; + iov.iov_base = ubuf; +- msg.msg_name = (struct sockaddr *)&address; +- msg.msg_namelen = sizeof(address); ++ /* Save some cycles and don't copy the address if not needed */ ++ msg.msg_name = addr ? (struct sockaddr *)&address : NULL; ++ /* We assume all kernel code knows the size of sockaddr_storage */ ++ msg.msg_namelen = 0; + if (sock->file->f_flags & O_NONBLOCK) + flags |= MSG_DONTWAIT; + err = sock_recvmsg(sock, &msg, size, flags); +@@ -2221,16 +2223,14 @@ static int ___sys_recvmsg(struct socket + goto out; + } + +- /* +- * Save the user-mode address (verify_iovec will change the +- * kernel msghdr to use the kernel address space) ++ /* Save the user-mode address (verify_iovec will change the ++ * kernel msghdr to use the kernel address space) + */ +- + uaddr = (__force void __user *)msg_sys->msg_name; + uaddr_len = COMPAT_NAMELEN(msg); +- if (MSG_CMSG_COMPAT & flags) { ++ if (MSG_CMSG_COMPAT & flags) + err = verify_compat_iovec(msg_sys, iov, &addr, VERIFY_WRITE); +- } else ++ else + err = verify_iovec(msg_sys, iov, &addr, VERIFY_WRITE); + if (err < 0) + goto out_freeiov; +@@ -2239,6 +2239,9 @@ static int ___sys_recvmsg(struct socket + cmsg_ptr = (unsigned long)msg_sys->msg_control; + msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); + ++ /* We assume all kernel code knows the size of sockaddr_storage */ ++ msg_sys->msg_namelen = 0; ++ + if (sock->file->f_flags & O_NONBLOCK) + flags |= MSG_DONTWAIT; + err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, +--- a/net/tipc/socket.c ++++ b/net/tipc/socket.c +@@ -984,9 +984,6 @@ static int recv_msg(struct kiocb *iocb, + goto exit; + } + +- /* will be updated in set_orig_addr() if needed */ +- m->msg_namelen = 0; +- + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + restart: + +@@ -1095,9 +1092,6 @@ static int recv_stream(struct kiocb *ioc + goto exit; + } + +- /* will be updated in set_orig_addr() if needed */ +- m->msg_namelen = 0; +- + target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len); + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -1754,7 +1754,6 @@ static void unix_copy_addr(struct msghdr + { + struct unix_sock *u = unix_sk(sk); + +- msg->msg_namelen = 0; + if (u->addr) { + msg->msg_namelen = u->addr->len; + memcpy(msg->msg_name, u->addr->name, u->addr->len); +@@ -1778,8 +1777,6 @@ static int unix_dgram_recvmsg(struct kio + if (flags&MSG_OOB) + goto out; + +- msg->msg_namelen = 0; +- + err = mutex_lock_interruptible(&u->readlock); + if (err) { + err = sock_intr_errno(sock_rcvtimeo(sk, noblock)); +@@ -1924,8 +1921,6 @@ static int unix_stream_recvmsg(struct ki + target = sock_rcvlowat(sk, flags&MSG_WAITALL, size); + timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT); + +- msg->msg_namelen = 0; +- + /* Lock the socket to prevent queue disordering + * while sleeps in memcpy_tomsg + */ +--- a/net/vmw_vsock/af_vsock.c ++++ b/net/vmw_vsock/af_vsock.c +@@ -1662,8 +1662,6 @@ vsock_stream_recvmsg(struct kiocb *kiocb + vsk = vsock_sk(sk); + err = 0; + +- msg->msg_namelen = 0; +- + lock_sock(sk); + + if (sk->sk_state != SS_CONNECTED) { +--- a/net/vmw_vsock/vmci_transport.c ++++ b/net/vmw_vsock/vmci_transport.c +@@ -1746,8 +1746,6 @@ static int vmci_transport_dgram_dequeue( + if (flags & MSG_OOB || flags & MSG_ERRQUEUE) + return -EOPNOTSUPP; + +- msg->msg_namelen = 0; +- + /* Retrieve the head sk_buff from the socket's receive queue. */ + err = 0; + skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err); +--- a/net/x25/af_x25.c ++++ b/net/x25/af_x25.c +@@ -1340,10 +1340,9 @@ static int x25_recvmsg(struct kiocb *ioc + if (sx25) { + sx25->sx25_family = AF_X25; + sx25->sx25_addr = x25->dest_addr; ++ msg->msg_namelen = sizeof(*sx25); + } + +- msg->msg_namelen = sizeof(struct sockaddr_x25); +- + x25_check_rbuf(sk); + rc = copied; + out_free_dgram: diff --git a/queue-3.12/net-smc91-fix-crash-regression-on-the-versatile.patch b/queue-3.12/net-smc91-fix-crash-regression-on-the-versatile.patch new file mode 100644 index 00000000000..a9af1d2467f --- /dev/null +++ b/queue-3.12/net-smc91-fix-crash-regression-on-the-versatile.patch @@ -0,0 +1,139 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Linus Walleij +Date: Thu, 28 Nov 2013 14:33:52 +0100 +Subject: net: smc91: fix crash regression on the versatile + +From: Linus Walleij + +[ Upstream commit a0c20fb02592d372e744d1d739cda3e1b3defaae ] + +After commit e9e4ea74f06635f2ffc1dffe5ef40c854faa0a90 +"net: smc91x: dont't use SMC_outw for fixing up halfword-aligned data" +The Versatile SMSC LAN91C111 is crashing like this: + +------------[ cut here ]------------ +kernel BUG at /home/linus/linux/drivers/net/ethernet/smsc/smc91x.c:599! +Internal error: Oops - BUG: 0 [#1] ARM +Modules linked in: +CPU: 0 PID: 43 Comm: udhcpc Not tainted 3.13.0-rc1+ #24 +task: c6ccfaa0 ti: c6cd0000 task.ti: c6cd0000 +PC is at smc_hardware_send_pkt+0x198/0x22c +LR is at smc_hardware_send_pkt+0x24/0x22c +pc : [] lr : [] psr: 20000013 +sp : c6cd1d08 ip : 00000001 fp : 00000000 +r10: c02adb08 r9 : 00000000 r8 : c6ced802 +r7 : c786fba0 r6 : 00000146 r5 : c8800000 r4 : c78d6000 +r3 : 0000000f r2 : 00000146 r1 : 00000000 r0 : 00000031 +Flags: nzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment user +Control: 0005317f Table: 06cf4000 DAC: 00000015 +Process udhcpc (pid: 43, stack limit = 0xc6cd01c0) +Stack: (0xc6cd1d08 to 0xc6cd2000) +1d00: 00000010 c8800000 c78d6000 c786fba0 c78d6000 c01be868 +1d20: c01be7a4 00004000 00000000 c786fba0 c6c12b80 c0208554 000004d0 c780fc60 +1d40: 00000220 c01fb734 00000000 00000000 00000000 c6c9a440 c6c12b80 c78d6000 +1d60: c786fba0 c6c9a440 00000000 c021d1d8 00000000 00000000 c6c12b80 c78d6000 +1d80: c786fba0 00000001 c6c9a440 c02087f8 c6c9a4a0 00080008 00000000 00000000 +1da0: c78d6000 c786fba0 c78d6000 00000138 00000000 00000000 00000000 00000000 +1dc0: 00000000 c027ba74 00000138 00000138 00000001 00000010 c6cedc00 00000000 +1de0: 00000008 c7404400 c6cd1eec c6cd1f14 c067a73c c065c0b8 00000000 c067a740 +1e00: 01ffffff 002040d0 00000000 00000000 00000000 00000000 00000000 ffffffff +1e20: 43004400 00110022 c6cdef20 c027ae8c c6ccfaa0 be82d65c 00000014 be82d3cc +1e40: 00000000 00000000 00000000 c01f2870 00000000 00000000 00000000 c6cd1e88 +1e60: c6ccfaa0 00000000 00000000 00000000 00000000 00000000 00000000 00000000 +1e80: 00000000 00000000 00000031 c7802310 c7802300 00000138 c7404400 c0771da0 +1ea0: 00000000 c6cd1eec c7800340 00000138 be82d65c 00000014 be82d3cc c6cd1f08 +1ec0: 00000014 00000000 c7404400 c7404400 00000138 c01f4628 c78d6000 00000000 +1ee0: 00000000 be82d3cc 00000138 c6cd1f08 00000014 c6cd1ee4 00000001 00000000 +1f00: 00000000 00000000 00080011 00000002 06000000 ffffffff 0000ffff 00000002 +1f20: 06000000 ffffffff 0000ffff c00928c8 c065c520 c6cd1f58 00000003 c009299c +1f40: 00000003 c065c520 c7404400 00000000 c7404400 c01f2218 c78106b0 c7441cb0 +1f60: 00000000 00000006 c06799fc 00000000 00000000 00000006 00000000 c01f3ee0 +1f80: 00000000 00000000 be82d678 be82d65c 00000014 00000001 00000122 c00139c8 +1fa0: c6cd0000 c0013840 be82d65c 00000014 00000006 be82d3cc 00000138 00000000 +1fc0: be82d65c 00000014 00000001 00000122 00000000 00000000 00018cb1 00000000 +1fe0: 00003801 be82d3a8 0003a0c7 b6e9af08 60000010 00000006 00000000 00000000 +[] (smc_hardware_send_pkt+0x198/0x22c) from [] (smc_hard_start_xmit+0xc4/0x1e8) +[] (smc_hard_start_xmit+0xc4/0x1e8) from [] (dev_hard_start_xmit+0x460/0x4cc) +[] (dev_hard_start_xmit+0x460/0x4cc) from [] (sch_direct_xmit+0x94/0x18c) +[] (sch_direct_xmit+0x94/0x18c) from [] (dev_queue_xmit+0x238/0x42c) +[] (dev_queue_xmit+0x238/0x42c) from [] (packet_sendmsg+0xbe8/0xd28) +[] (packet_sendmsg+0xbe8/0xd28) from [] (sock_sendmsg+0x84/0xa8) +[] (sock_sendmsg+0x84/0xa8) from [] (SyS_sendto+0xb8/0xdc) +[] (SyS_sendto+0xb8/0xdc) from [] (ret_fast_syscall+0x0/0x2c) +Code: e3130002 1a000001 e3130001 0affffcd (e7f001f2) +---[ end trace 81104fe70e8da7fe ]--- +Kernel panic - not syncing: Fatal exception in interrupt + +This is because the macro operations in smc91x.h defined +for Versatile are missing SMC_outsw() as used in this +commit. + +The Versatile needs and uses the same accessors as the other +platforms in the first if(...) clause, just switch it to using +that and we have one problem less to worry about. + +This includes a hunk of a patch from Will Deacon fixin +the other 32bit platforms as well: Innokom, Ramses, PXA, +PCM027. + +Checkpatch complains about spacing, but I have opted to +follow the style of this .h-file. + +Cc: Russell King +Cc: Nicolas Pitre +Cc: Eric Miao +Cc: Jonathan Cameron +Cc: stable@vger.kernel.org +Signed-off-by: Will Deacon +Signed-off-by: Linus Walleij +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/smsc/smc91x.h | 22 ++++------------------ + 1 file changed, 4 insertions(+), 18 deletions(-) + +--- a/drivers/net/ethernet/smsc/smc91x.h ++++ b/drivers/net/ethernet/smsc/smc91x.h +@@ -46,7 +46,8 @@ + defined(CONFIG_MACH_LITTLETON) ||\ + defined(CONFIG_MACH_ZYLONITE2) ||\ + defined(CONFIG_ARCH_VIPER) ||\ +- defined(CONFIG_MACH_STARGATE2) ++ defined(CONFIG_MACH_STARGATE2) ||\ ++ defined(CONFIG_ARCH_VERSATILE) + + #include + +@@ -154,6 +155,8 @@ static inline void SMC_outw(u16 val, voi + #define SMC_outl(v, a, r) writel(v, (a) + (r)) + #define SMC_insl(a, r, p, l) readsl((a) + (r), p, l) + #define SMC_outsl(a, r, p, l) writesl((a) + (r), p, l) ++#define SMC_insw(a, r, p, l) readsw((a) + (r), p, l) ++#define SMC_outsw(a, r, p, l) writesw((a) + (r), p, l) + #define SMC_IRQ_FLAGS (-1) /* from resource */ + + /* We actually can't write halfwords properly if not word aligned */ +@@ -206,23 +209,6 @@ SMC_outw(u16 val, void __iomem *ioaddr, + #define RPC_LSA_DEFAULT RPC_LED_TX_RX + #define RPC_LSB_DEFAULT RPC_LED_100_10 + +-#elif defined(CONFIG_ARCH_VERSATILE) +- +-#define SMC_CAN_USE_8BIT 1 +-#define SMC_CAN_USE_16BIT 1 +-#define SMC_CAN_USE_32BIT 1 +-#define SMC_NOWAIT 1 +- +-#define SMC_inb(a, r) readb((a) + (r)) +-#define SMC_inw(a, r) readw((a) + (r)) +-#define SMC_inl(a, r) readl((a) + (r)) +-#define SMC_outb(v, a, r) writeb(v, (a) + (r)) +-#define SMC_outw(v, a, r) writew(v, (a) + (r)) +-#define SMC_outl(v, a, r) writel(v, (a) + (r)) +-#define SMC_insl(a, r, p, l) readsl((a) + (r), p, l) +-#define SMC_outsl(a, r, p, l) writesl((a) + (r), p, l) +-#define SMC_IRQ_FLAGS (-1) /* from resource */ +- + #elif defined(CONFIG_MN10300) + + /* diff --git a/queue-3.12/net-tcp-fix-panic-in-tcp_fastopen_cache_set.patch b/queue-3.12/net-tcp-fix-panic-in-tcp_fastopen_cache_set.patch new file mode 100644 index 00000000000..fb38704d707 --- /dev/null +++ b/queue-3.12/net-tcp-fix-panic-in-tcp_fastopen_cache_set.patch @@ -0,0 +1,43 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Eric Dumazet +Date: Wed, 13 Nov 2013 15:00:46 -0800 +Subject: net-tcp: fix panic in tcp_fastopen_cache_set() + +From: Eric Dumazet + +[ Upstream commit dccf76ca6b626c0c4a4e09bb221adee3270ab0ef ] + +We had some reports of crashes using TCP fastopen, and Dave Jones +gave a nice stack trace pointing to the error. + +Issue is that tcp_get_metrics() should not be called with a NULL dst + +Fixes: 1fe4c481ba637 ("net-tcp: Fast Open client - cookie cache") +Signed-off-by: Eric Dumazet +Reported-by: Dave Jones +Cc: Yuchung Cheng +Acked-by: Yuchung Cheng +Tested-by: Dave Jones +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_metrics.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/net/ipv4/tcp_metrics.c ++++ b/net/ipv4/tcp_metrics.c +@@ -659,10 +659,13 @@ void tcp_fastopen_cache_get(struct sock + void tcp_fastopen_cache_set(struct sock *sk, u16 mss, + struct tcp_fastopen_cookie *cookie, bool syn_lost) + { ++ struct dst_entry *dst = __sk_dst_get(sk); + struct tcp_metrics_block *tm; + ++ if (!dst) ++ return; + rcu_read_lock(); +- tm = tcp_get_metrics(sk, __sk_dst_get(sk), true); ++ tm = tcp_get_metrics(sk, dst, true); + if (tm) { + struct tcp_fastopen_metrics *tfom = &tm->tcpm_fastopen; + diff --git a/queue-3.12/net-update-consumers-of-msg_more-to-recognize-msg_sendpage_notlast.patch b/queue-3.12/net-update-consumers-of-msg_more-to-recognize-msg_sendpage_notlast.patch new file mode 100644 index 00000000000..35ef99d75be --- /dev/null +++ b/queue-3.12/net-update-consumers-of-msg_more-to-recognize-msg_sendpage_notlast.patch @@ -0,0 +1,71 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Shawn Landden +Date: Sun, 24 Nov 2013 22:36:28 -0800 +Subject: net: update consumers of MSG_MORE to recognize MSG_SENDPAGE_NOTLAST + +From: Shawn Landden + +[ Upstream commit d3f7d56a7a4671d395e8af87071068a195257bf6 ] + +Commit 35f9c09fe (tcp: tcp_sendpages() should call tcp_push() once) +added an internal flag MSG_SENDPAGE_NOTLAST, similar to +MSG_MORE. + +algif_hash, algif_skcipher, and udp used MSG_MORE from tcp_sendpages() +and need to see the new flag as identical to MSG_MORE. + +This fixes sendfile() on AF_ALG. + +v3: also fix udp + +Cc: Tom Herbert +Cc: Eric Dumazet +Cc: David S. Miller +Cc: # 3.4.x + 3.2.x +Reported-and-tested-by: Shawn Landden +Original-patch: Richard Weinberger +Signed-off-by: Shawn Landden +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + crypto/algif_hash.c | 3 +++ + crypto/algif_skcipher.c | 3 +++ + net/ipv4/udp.c | 3 +++ + 3 files changed, 9 insertions(+) + +--- a/crypto/algif_hash.c ++++ b/crypto/algif_hash.c +@@ -114,6 +114,9 @@ static ssize_t hash_sendpage(struct sock + struct hash_ctx *ctx = ask->private; + int err; + ++ if (flags & MSG_SENDPAGE_NOTLAST) ++ flags |= MSG_MORE; ++ + lock_sock(sk); + sg_init_table(ctx->sgl.sg, 1); + sg_set_page(ctx->sgl.sg, page, size, offset); +--- a/crypto/algif_skcipher.c ++++ b/crypto/algif_skcipher.c +@@ -378,6 +378,9 @@ static ssize_t skcipher_sendpage(struct + struct skcipher_sg_list *sgl; + int err = -EINVAL; + ++ if (flags & MSG_SENDPAGE_NOTLAST) ++ flags |= MSG_MORE; ++ + lock_sock(sk); + if (!ctx->more && ctx->used) + goto unlock; +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -1072,6 +1072,9 @@ int udp_sendpage(struct sock *sk, struct + struct udp_sock *up = udp_sk(sk); + int ret; + ++ if (flags & MSG_SENDPAGE_NOTLAST) ++ flags |= MSG_MORE; ++ + if (!up->pending) { + struct msghdr msg = { .msg_flags = flags|MSG_MORE }; + diff --git a/queue-3.12/net-x86-bpf-don-t-forget-to-free-sk_filter-v2.patch b/queue-3.12/net-x86-bpf-don-t-forget-to-free-sk_filter-v2.patch new file mode 100644 index 00000000000..d5abdc75995 --- /dev/null +++ b/queue-3.12/net-x86-bpf-don-t-forget-to-free-sk_filter-v2.patch @@ -0,0 +1,58 @@ +From foo@baz Thu Dec 5 16:16:37 PST 2013 +From: Andrey Vagin +Date: Thu, 7 Nov 2013 08:35:12 +0400 +Subject: net: x86: bpf: don't forget to free sk_filter (v2) + +From: Andrey Vagin + +[ Upstream commit 98bbc06aabac5a2dcc46580d20c59baf8ebe479f ] + +sk_filter isn't freed if bpf_func is equal to sk_run_filter. + +This memory leak was introduced by v3.12-rc3-224-gd45ed4a4 +"net: fix unsafe set_memory_rw from softirq". + +Before this patch sk_filter was freed in sk_filter_release_rcu, +now it should be freed in bpf_jit_free. + +Here is output of kmemleak: +unreferenced object 0xffff8800b774eab0 (size 128): + comm "systemd", pid 1, jiffies 4294669014 (age 124.062s) + hex dump (first 32 bytes): + 00 00 00 00 0b 00 00 00 20 63 7f b7 00 88 ff ff ........ c...... + 60 d4 55 81 ff ff ff ff 30 d9 55 81 ff ff ff ff `.U.....0.U..... + backtrace: + [] kmemleak_alloc+0x4e/0xb0 + [] __kmalloc+0xef/0x260 + [] sock_kmalloc+0x38/0x60 + [] sk_attach_filter+0x5d/0x190 + [] sock_setsockopt+0x991/0x9e0 + [] SyS_setsockopt+0xb6/0xd0 + [] system_call_fastpath+0x16/0x1b + [] 0xffffffffffffffff + +v2: add extra { } after else + +Fixes: d45ed4a4e33a ("net: fix unsafe set_memory_rw from softirq") +Acked-by: Daniel Borkmann +Cc: Alexei Starovoitov +Cc: Eric Dumazet +Cc: "David S. Miller" +Signed-off-by: Andrey Vagin +Acked-by: Alexei Starovoitov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/net/bpf_jit_comp.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/arch/x86/net/bpf_jit_comp.c ++++ b/arch/x86/net/bpf_jit_comp.c +@@ -788,5 +788,7 @@ void bpf_jit_free(struct sk_filter *fp) + if (fp->bpf_func != sk_run_filter) { + INIT_WORK(&fp->work, bpf_jit_free_deferred); + schedule_work(&fp->work); ++ } else { ++ kfree(fp); + } + } diff --git a/queue-3.12/netfilter-push-reasm-skb-through-instead-of-original-frag-skbs.patch b/queue-3.12/netfilter-push-reasm-skb-through-instead-of-original-frag-skbs.patch new file mode 100644 index 00000000000..8b7f1b11bfc --- /dev/null +++ b/queue-3.12/netfilter-push-reasm-skb-through-instead-of-original-frag-skbs.patch @@ -0,0 +1,473 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Jiri Pirko +Date: Wed, 6 Nov 2013 17:52:20 +0100 +Subject: netfilter: push reasm skb through instead of original frag skbs + +From: Jiri Pirko + +[ Upstream commit 6aafeef03b9d9ecf255f3a80ed85ee070260e1ae ] + +Pushing original fragments through causes several problems. For example +for matching, frags may not be matched correctly. Take following +example: + + +On HOSTA do: +ip6tables -I INPUT -p icmpv6 -j DROP +ip6tables -I INPUT -p icmpv6 -m icmp6 --icmpv6-type 128 -j ACCEPT + +and on HOSTB you do: +ping6 HOSTA -s2000 (MTU is 1500) + +Incoming echo requests will be filtered out on HOSTA. This issue does +not occur with smaller packets than MTU (where fragmentation does not happen) + + +As was discussed previously, the only correct solution seems to be to use +reassembled skb instead of separete frags. Doing this has positive side +effects in reducing sk_buff by one pointer (nfct_reasm) and also the reams +dances in ipvs and conntrack can be removed. + +Future plan is to remove net/ipv6/netfilter/nf_conntrack_reasm.c +entirely and use code in net/ipv6/reassembly.c instead. + +Signed-off-by: Jiri Pirko +Acked-by: Julian Anastasov +Signed-off-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/skbuff.h | 32 -------------- + include/net/ip_vs.h | 32 -------------- + include/net/netfilter/ipv6/nf_defrag_ipv6.h | 5 -- + net/core/skbuff.c | 3 - + net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 54 ------------------------ + net/ipv6/netfilter/nf_conntrack_reasm.c | 19 -------- + net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 7 ++- + net/netfilter/ipvs/ip_vs_core.c | 55 ------------------------- + net/netfilter/ipvs/ip_vs_pe_sip.c | 8 --- + 9 files changed, 13 insertions(+), 202 deletions(-) + +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -333,11 +333,6 @@ typedef unsigned int sk_buff_data_t; + typedef unsigned char *sk_buff_data_t; + #endif + +-#if defined(CONFIG_NF_DEFRAG_IPV4) || defined(CONFIG_NF_DEFRAG_IPV4_MODULE) || \ +- defined(CONFIG_NF_DEFRAG_IPV6) || defined(CONFIG_NF_DEFRAG_IPV6_MODULE) +-#define NET_SKBUFF_NF_DEFRAG_NEEDED 1 +-#endif +- + /** + * struct sk_buff - socket buffer + * @next: Next buffer in list +@@ -370,7 +365,6 @@ typedef unsigned char *sk_buff_data_t; + * @protocol: Packet protocol from driver + * @destructor: Destruct function + * @nfct: Associated connection, if any +- * @nfct_reasm: netfilter conntrack re-assembly pointer + * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c + * @skb_iif: ifindex of device we arrived on + * @tc_index: Traffic control index +@@ -459,9 +453,6 @@ struct sk_buff { + #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + struct nf_conntrack *nfct; + #endif +-#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED +- struct sk_buff *nfct_reasm; +-#endif + #ifdef CONFIG_BRIDGE_NETFILTER + struct nf_bridge_info *nf_bridge; + #endif +@@ -2605,18 +2596,6 @@ static inline void nf_conntrack_get(stru + atomic_inc(&nfct->use); + } + #endif +-#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED +-static inline void nf_conntrack_get_reasm(struct sk_buff *skb) +-{ +- if (skb) +- atomic_inc(&skb->users); +-} +-static inline void nf_conntrack_put_reasm(struct sk_buff *skb) +-{ +- if (skb) +- kfree_skb(skb); +-} +-#endif + #ifdef CONFIG_BRIDGE_NETFILTER + static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge) + { +@@ -2635,10 +2614,6 @@ static inline void nf_reset(struct sk_bu + nf_conntrack_put(skb->nfct); + skb->nfct = NULL; + #endif +-#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED +- nf_conntrack_put_reasm(skb->nfct_reasm); +- skb->nfct_reasm = NULL; +-#endif + #ifdef CONFIG_BRIDGE_NETFILTER + nf_bridge_put(skb->nf_bridge); + skb->nf_bridge = NULL; +@@ -2660,10 +2635,6 @@ static inline void __nf_copy(struct sk_b + nf_conntrack_get(src->nfct); + dst->nfctinfo = src->nfctinfo; + #endif +-#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED +- dst->nfct_reasm = src->nfct_reasm; +- nf_conntrack_get_reasm(src->nfct_reasm); +-#endif + #ifdef CONFIG_BRIDGE_NETFILTER + dst->nf_bridge = src->nf_bridge; + nf_bridge_get(src->nf_bridge); +@@ -2675,9 +2646,6 @@ static inline void nf_copy(struct sk_buf + #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + nf_conntrack_put(dst->nfct); + #endif +-#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED +- nf_conntrack_put_reasm(dst->nfct_reasm); +-#endif + #ifdef CONFIG_BRIDGE_NETFILTER + nf_bridge_put(dst->nf_bridge); + #endif +--- a/include/net/ip_vs.h ++++ b/include/net/ip_vs.h +@@ -109,7 +109,6 @@ extern int ip_vs_conn_tab_size; + struct ip_vs_iphdr { + __u32 len; /* IPv4 simply where L4 starts + IPv6 where L4 Transport Header starts */ +- __u32 thoff_reasm; /* Transport Header Offset in nfct_reasm skb */ + __u16 fragoffs; /* IPv6 fragment offset, 0 if first frag (or not frag)*/ + __s16 protocol; + __s32 flags; +@@ -117,34 +116,12 @@ struct ip_vs_iphdr { + union nf_inet_addr daddr; + }; + +-/* Dependency to module: nf_defrag_ipv6 */ +-#if defined(CONFIG_NF_DEFRAG_IPV6) || defined(CONFIG_NF_DEFRAG_IPV6_MODULE) +-static inline struct sk_buff *skb_nfct_reasm(const struct sk_buff *skb) +-{ +- return skb->nfct_reasm; +-} + static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset, + int len, void *buffer, + const struct ip_vs_iphdr *ipvsh) + { +- if (unlikely(ipvsh->fragoffs && skb_nfct_reasm(skb))) +- return skb_header_pointer(skb_nfct_reasm(skb), +- ipvsh->thoff_reasm, len, buffer); +- + return skb_header_pointer(skb, offset, len, buffer); + } +-#else +-static inline struct sk_buff *skb_nfct_reasm(const struct sk_buff *skb) +-{ +- return NULL; +-} +-static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset, +- int len, void *buffer, +- const struct ip_vs_iphdr *ipvsh) +-{ +- return skb_header_pointer(skb, offset, len, buffer); +-} +-#endif + + static inline void + ip_vs_fill_ip4hdr(const void *nh, struct ip_vs_iphdr *iphdr) +@@ -171,19 +148,12 @@ ip_vs_fill_iph_skb(int af, const struct + (struct ipv6hdr *)skb_network_header(skb); + iphdr->saddr.in6 = iph->saddr; + iphdr->daddr.in6 = iph->daddr; +- /* ipv6_find_hdr() updates len, flags, thoff_reasm */ +- iphdr->thoff_reasm = 0; ++ /* ipv6_find_hdr() updates len, flags */ + iphdr->len = 0; + iphdr->flags = 0; + iphdr->protocol = ipv6_find_hdr(skb, &iphdr->len, -1, + &iphdr->fragoffs, + &iphdr->flags); +- /* get proto from re-assembled packet and it's offset */ +- if (skb_nfct_reasm(skb)) +- iphdr->protocol = ipv6_find_hdr(skb_nfct_reasm(skb), +- &iphdr->thoff_reasm, +- -1, NULL, NULL); +- + } else + #endif + { +--- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h ++++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h +@@ -6,10 +6,7 @@ extern void nf_defrag_ipv6_enable(void); + extern int nf_ct_frag6_init(void); + extern void nf_ct_frag6_cleanup(void); + extern struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user); +-extern void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, +- struct net_device *in, +- struct net_device *out, +- int (*okfn)(struct sk_buff *)); ++extern void nf_ct_frag6_consume_orig(struct sk_buff *skb); + + struct inet_frags_ctl; + +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -580,9 +580,6 @@ static void skb_release_head_state(struc + #if IS_ENABLED(CONFIG_NF_CONNTRACK) + nf_conntrack_put(skb->nfct); + #endif +-#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED +- nf_conntrack_put_reasm(skb->nfct_reasm); +-#endif + #ifdef CONFIG_BRIDGE_NETFILTER + nf_bridge_put(skb->nf_bridge); + #endif +--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c ++++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +@@ -169,63 +169,13 @@ out: + return nf_conntrack_confirm(skb); + } + +-static unsigned int __ipv6_conntrack_in(struct net *net, +- unsigned int hooknum, +- struct sk_buff *skb, +- const struct net_device *in, +- const struct net_device *out, +- int (*okfn)(struct sk_buff *)) +-{ +- struct sk_buff *reasm = skb->nfct_reasm; +- const struct nf_conn_help *help; +- struct nf_conn *ct; +- enum ip_conntrack_info ctinfo; +- +- /* This packet is fragmented and has reassembled packet. */ +- if (reasm) { +- /* Reassembled packet isn't parsed yet ? */ +- if (!reasm->nfct) { +- unsigned int ret; +- +- ret = nf_conntrack_in(net, PF_INET6, hooknum, reasm); +- if (ret != NF_ACCEPT) +- return ret; +- } +- +- /* Conntrack helpers need the entire reassembled packet in the +- * POST_ROUTING hook. In case of unconfirmed connections NAT +- * might reassign a helper, so the entire packet is also +- * required. +- */ +- ct = nf_ct_get(reasm, &ctinfo); +- if (ct != NULL && !nf_ct_is_untracked(ct)) { +- help = nfct_help(ct); +- if ((help && help->helper) || !nf_ct_is_confirmed(ct)) { +- nf_conntrack_get_reasm(reasm); +- NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm, +- (struct net_device *)in, +- (struct net_device *)out, +- okfn, NF_IP6_PRI_CONNTRACK + 1); +- return NF_DROP_ERR(-ECANCELED); +- } +- } +- +- nf_conntrack_get(reasm->nfct); +- skb->nfct = reasm->nfct; +- skb->nfctinfo = reasm->nfctinfo; +- return NF_ACCEPT; +- } +- +- return nf_conntrack_in(net, PF_INET6, hooknum, skb); +-} +- + static unsigned int ipv6_conntrack_in(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) + { +- return __ipv6_conntrack_in(dev_net(in), hooknum, skb, in, out, okfn); ++ return nf_conntrack_in(dev_net(in), PF_INET6, hooknum, skb); + } + + static unsigned int ipv6_conntrack_local(unsigned int hooknum, +@@ -239,7 +189,7 @@ static unsigned int ipv6_conntrack_local + net_notice_ratelimited("ipv6_conntrack_local: packet too short\n"); + return NF_ACCEPT; + } +- return __ipv6_conntrack_in(dev_net(out), hooknum, skb, in, out, okfn); ++ return nf_conntrack_in(dev_net(out), PF_INET6, hooknum, skb); + } + + static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { +--- a/net/ipv6/netfilter/nf_conntrack_reasm.c ++++ b/net/ipv6/netfilter/nf_conntrack_reasm.c +@@ -621,31 +621,16 @@ ret_orig: + return skb; + } + +-void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, +- struct net_device *in, struct net_device *out, +- int (*okfn)(struct sk_buff *)) ++void nf_ct_frag6_consume_orig(struct sk_buff *skb) + { + struct sk_buff *s, *s2; +- unsigned int ret = 0; + + for (s = NFCT_FRAG6_CB(skb)->orig; s;) { +- nf_conntrack_put_reasm(s->nfct_reasm); +- nf_conntrack_get_reasm(skb); +- s->nfct_reasm = skb; +- + s2 = s->next; + s->next = NULL; +- +- if (ret != -ECANCELED) +- ret = NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s, +- in, out, okfn, +- NF_IP6_PRI_CONNTRACK_DEFRAG + 1); +- else +- kfree_skb(s); +- ++ consume_skb(s); + s = s2; + } +- nf_conntrack_put_reasm(skb); + } + + static int nf_ct_net_init(struct net *net) +--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c ++++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +@@ -75,8 +75,11 @@ static unsigned int ipv6_defrag(unsigned + if (reasm == skb) + return NF_ACCEPT; + +- nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in, +- (struct net_device *)out, okfn); ++ nf_ct_frag6_consume_orig(reasm); ++ ++ NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm, ++ (struct net_device *) in, (struct net_device *) out, ++ okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1); + + return NF_STOLEN; + } +--- a/net/netfilter/ipvs/ip_vs_core.c ++++ b/net/netfilter/ipvs/ip_vs_core.c +@@ -1139,12 +1139,6 @@ ip_vs_out(unsigned int hooknum, struct s + ip_vs_fill_iph_skb(af, skb, &iph); + #ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) { +- if (!iph.fragoffs && skb_nfct_reasm(skb)) { +- struct sk_buff *reasm = skb_nfct_reasm(skb); +- /* Save fw mark for coming frags */ +- reasm->ipvs_property = 1; +- reasm->mark = skb->mark; +- } + if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { + int related; + int verdict = ip_vs_out_icmp_v6(skb, &related, +@@ -1614,12 +1608,6 @@ ip_vs_in(unsigned int hooknum, struct sk + + #ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) { +- if (!iph.fragoffs && skb_nfct_reasm(skb)) { +- struct sk_buff *reasm = skb_nfct_reasm(skb); +- /* Save fw mark for coming frags. */ +- reasm->ipvs_property = 1; +- reasm->mark = skb->mark; +- } + if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { + int related; + int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum, +@@ -1671,9 +1659,8 @@ ip_vs_in(unsigned int hooknum, struct sk + /* sorry, all this trouble for a no-hit :) */ + IP_VS_DBG_PKT(12, af, pp, skb, 0, + "ip_vs_in: packet continues traversal as normal"); +- if (iph.fragoffs && !skb_nfct_reasm(skb)) { ++ if (iph.fragoffs) { + /* Fragment that couldn't be mapped to a conn entry +- * and don't have any pointer to a reasm skb + * is missing module nf_defrag_ipv6 + */ + IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n"); +@@ -1756,38 +1743,6 @@ ip_vs_local_request4(unsigned int hooknu + #ifdef CONFIG_IP_VS_IPV6 + + /* +- * AF_INET6 fragment handling +- * Copy info from first fragment, to the rest of them. +- */ +-static unsigned int +-ip_vs_preroute_frag6(unsigned int hooknum, struct sk_buff *skb, +- const struct net_device *in, +- const struct net_device *out, +- int (*okfn)(struct sk_buff *)) +-{ +- struct sk_buff *reasm = skb_nfct_reasm(skb); +- struct net *net; +- +- /* Skip if not a "replay" from nf_ct_frag6_output or first fragment. +- * ipvs_property is set when checking first fragment +- * in ip_vs_in() and ip_vs_out(). +- */ +- if (reasm) +- IP_VS_DBG(2, "Fragment recv prop:%d\n", reasm->ipvs_property); +- if (!reasm || !reasm->ipvs_property) +- return NF_ACCEPT; +- +- net = skb_net(skb); +- if (!net_ipvs(net)->enable) +- return NF_ACCEPT; +- +- /* Copy stored fw mark, saved in ip_vs_{in,out} */ +- skb->mark = reasm->mark; +- +- return NF_ACCEPT; +-} +- +-/* + * AF_INET6 handler in NF_INET_LOCAL_IN chain + * Schedule and forward packets from remote clients + */ +@@ -1924,14 +1879,6 @@ static struct nf_hook_ops ip_vs_ops[] __ + .priority = 100, + }, + #ifdef CONFIG_IP_VS_IPV6 +- /* After mangle & nat fetch 2:nd fragment and following */ +- { +- .hook = ip_vs_preroute_frag6, +- .owner = THIS_MODULE, +- .pf = NFPROTO_IPV6, +- .hooknum = NF_INET_PRE_ROUTING, +- .priority = NF_IP6_PRI_NAT_DST + 1, +- }, + /* After packet filtering, change source only for VS/NAT */ + { + .hook = ip_vs_reply6, +--- a/net/netfilter/ipvs/ip_vs_pe_sip.c ++++ b/net/netfilter/ipvs/ip_vs_pe_sip.c +@@ -65,7 +65,6 @@ static int get_callid(const char *dptr, + static int + ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) + { +- struct sk_buff *reasm = skb_nfct_reasm(skb); + struct ip_vs_iphdr iph; + unsigned int dataoff, datalen, matchoff, matchlen; + const char *dptr; +@@ -79,15 +78,10 @@ ip_vs_sip_fill_param(struct ip_vs_conn_p + /* todo: IPv6 fragments: + * I think this only should be done for the first fragment. /HS + */ +- if (reasm) { +- skb = reasm; +- dataoff = iph.thoff_reasm + sizeof(struct udphdr); +- } else +- dataoff = iph.len + sizeof(struct udphdr); ++ dataoff = iph.len + sizeof(struct udphdr); + + if (dataoff >= skb->len) + return -EINVAL; +- /* todo: Check if this will mess-up the reasm skb !!! /HS */ + retc = skb_linearize(skb); + if (retc < 0) + return retc; diff --git a/queue-3.12/packet-fix-use-after-free-race-in-send-path-when-dev-is-released.patch b/queue-3.12/packet-fix-use-after-free-race-in-send-path-when-dev-is-released.patch new file mode 100644 index 00000000000..c36f19e7445 --- /dev/null +++ b/queue-3.12/packet-fix-use-after-free-race-in-send-path-when-dev-is-released.patch @@ -0,0 +1,223 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Daniel Borkmann +Date: Thu, 21 Nov 2013 16:50:58 +0100 +Subject: packet: fix use after free race in send path when dev is released + +From: Daniel Borkmann + +[ Upstream commit e40526cb20b5ee53419452e1f03d97092f144418 ] + +Salam reported a use after free bug in PF_PACKET that occurs when +we're sending out frames on a socket bound device and suddenly the +net device is being unregistered. It appears that commit 827d9780 +introduced a possible race condition between {t,}packet_snd() and +packet_notifier(). In the case of a bound socket, packet_notifier() +can drop the last reference to the net_device and {t,}packet_snd() +might end up suddenly sending a packet over a freed net_device. + +To avoid reverting 827d9780 and thus introducing a performance +regression compared to the current state of things, we decided to +hold a cached RCU protected pointer to the net device and maintain +it on write side via bind spin_lock protected register_prot_hook() +and __unregister_prot_hook() calls. + +In {t,}packet_snd() path, we access this pointer under rcu_read_lock +through packet_cached_dev_get() that holds reference to the device +to prevent it from being freed through packet_notifier() while +we're in send path. This is okay to do as dev_put()/dev_hold() are +per-cpu counters, so this should not be a performance issue. Also, +the code simplifies a bit as we don't need need_rls_dev anymore. + +Fixes: 827d978037d7 ("af-packet: Use existing netdev reference for bound sockets.") +Reported-by: Salam Noureddine +Signed-off-by: Daniel Borkmann +Signed-off-by: Salam Noureddine +Cc: Ben Greear +Cc: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/packet/af_packet.c | 59 +++++++++++++++++++++++++++++-------------------- + net/packet/internal.h | 1 + 2 files changed, 37 insertions(+), 23 deletions(-) + +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -244,11 +244,15 @@ static void __fanout_link(struct sock *s + static void register_prot_hook(struct sock *sk) + { + struct packet_sock *po = pkt_sk(sk); ++ + if (!po->running) { +- if (po->fanout) ++ if (po->fanout) { + __fanout_link(sk, po); +- else ++ } else { + dev_add_pack(&po->prot_hook); ++ rcu_assign_pointer(po->cached_dev, po->prot_hook.dev); ++ } ++ + sock_hold(sk); + po->running = 1; + } +@@ -266,10 +270,13 @@ static void __unregister_prot_hook(struc + struct packet_sock *po = pkt_sk(sk); + + po->running = 0; +- if (po->fanout) ++ if (po->fanout) { + __fanout_unlink(sk, po); +- else ++ } else { + __dev_remove_pack(&po->prot_hook); ++ RCU_INIT_POINTER(po->cached_dev, NULL); ++ } ++ + __sock_put(sk); + + if (sync) { +@@ -2052,12 +2059,24 @@ static int tpacket_fill_skb(struct packe + return tp_len; + } + ++static struct net_device *packet_cached_dev_get(struct packet_sock *po) ++{ ++ struct net_device *dev; ++ ++ rcu_read_lock(); ++ dev = rcu_dereference(po->cached_dev); ++ if (dev) ++ dev_hold(dev); ++ rcu_read_unlock(); ++ ++ return dev; ++} ++ + static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) + { + struct sk_buff *skb; + struct net_device *dev; + __be16 proto; +- bool need_rls_dev = false; + int err, reserve = 0; + void *ph; + struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name; +@@ -2070,7 +2089,7 @@ static int tpacket_snd(struct packet_soc + mutex_lock(&po->pg_vec_lock); + + if (saddr == NULL) { +- dev = po->prot_hook.dev; ++ dev = packet_cached_dev_get(po); + proto = po->num; + addr = NULL; + } else { +@@ -2084,19 +2103,17 @@ static int tpacket_snd(struct packet_soc + proto = saddr->sll_protocol; + addr = saddr->sll_addr; + dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); +- need_rls_dev = true; + } + + err = -ENXIO; + if (unlikely(dev == NULL)) + goto out; +- +- reserve = dev->hard_header_len; +- + err = -ENETDOWN; + if (unlikely(!(dev->flags & IFF_UP))) + goto out_put; + ++ reserve = dev->hard_header_len; ++ + size_max = po->tx_ring.frame_size + - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); + +@@ -2173,8 +2190,7 @@ out_status: + __packet_set_status(po, ph, status); + kfree_skb(skb); + out_put: +- if (need_rls_dev) +- dev_put(dev); ++ dev_put(dev); + out: + mutex_unlock(&po->pg_vec_lock); + return err; +@@ -2212,7 +2228,6 @@ static int packet_snd(struct socket *soc + struct sk_buff *skb; + struct net_device *dev; + __be16 proto; +- bool need_rls_dev = false; + unsigned char *addr; + int err, reserve = 0; + struct virtio_net_hdr vnet_hdr = { 0 }; +@@ -2228,7 +2243,7 @@ static int packet_snd(struct socket *soc + */ + + if (saddr == NULL) { +- dev = po->prot_hook.dev; ++ dev = packet_cached_dev_get(po); + proto = po->num; + addr = NULL; + } else { +@@ -2240,19 +2255,17 @@ static int packet_snd(struct socket *soc + proto = saddr->sll_protocol; + addr = saddr->sll_addr; + dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); +- need_rls_dev = true; + } + + err = -ENXIO; +- if (dev == NULL) ++ if (unlikely(dev == NULL)) + goto out_unlock; +- if (sock->type == SOCK_RAW) +- reserve = dev->hard_header_len; +- + err = -ENETDOWN; +- if (!(dev->flags & IFF_UP)) ++ if (unlikely(!(dev->flags & IFF_UP))) + goto out_unlock; + ++ if (sock->type == SOCK_RAW) ++ reserve = dev->hard_header_len; + if (po->has_vnet_hdr) { + vnet_hdr_len = sizeof(vnet_hdr); + +@@ -2386,15 +2399,14 @@ static int packet_snd(struct socket *soc + if (err > 0 && (err = net_xmit_errno(err)) != 0) + goto out_unlock; + +- if (need_rls_dev) +- dev_put(dev); ++ dev_put(dev); + + return len; + + out_free: + kfree_skb(skb); + out_unlock: +- if (dev && need_rls_dev) ++ if (dev) + dev_put(dev); + out: + return err; +@@ -2614,6 +2626,7 @@ static int packet_create(struct net *net + po = pkt_sk(sk); + sk->sk_family = PF_PACKET; + po->num = proto; ++ RCU_INIT_POINTER(po->cached_dev, NULL); + + sk->sk_destruct = packet_sock_destruct; + sk_refcnt_debug_inc(sk); +--- a/net/packet/internal.h ++++ b/net/packet/internal.h +@@ -113,6 +113,7 @@ struct packet_sock { + unsigned int tp_loss:1; + unsigned int tp_tx_has_off:1; + unsigned int tp_tstamp; ++ struct net_device __rcu *cached_dev; + struct packet_type prot_hook ____cacheline_aligned_in_smp; + }; + diff --git a/queue-3.12/ping-prevent-null-pointer-dereference-on-write-to-msg_name.patch b/queue-3.12/ping-prevent-null-pointer-dereference-on-write-to-msg_name.patch new file mode 100644 index 00000000000..3c6c7e98a9b --- /dev/null +++ b/queue-3.12/ping-prevent-null-pointer-dereference-on-write-to-msg_name.patch @@ -0,0 +1,69 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Hannes Frederic Sowa +Date: Mon, 18 Nov 2013 07:07:45 +0100 +Subject: ping: prevent NULL pointer dereference on write to msg_name + +From: Hannes Frederic Sowa + +[ Upstream commit cf970c002d270c36202bd5b9c2804d3097a52da0 ] + +A plain read() on a socket does set msg->msg_name to NULL. So check for +NULL pointer first. + +Signed-off-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ping.c | 34 +++++++++++++++++++--------------- + 1 file changed, 19 insertions(+), 15 deletions(-) + +--- a/net/ipv4/ping.c ++++ b/net/ipv4/ping.c +@@ -867,11 +867,13 @@ int ping_recvmsg(struct kiocb *iocb, str + if (family == AF_INET) { + struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; + +- sin->sin_family = AF_INET; +- sin->sin_port = 0 /* skb->h.uh->source */; +- sin->sin_addr.s_addr = ip_hdr(skb)->saddr; +- memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); +- *addr_len = sizeof(*sin); ++ if (sin) { ++ sin->sin_family = AF_INET; ++ sin->sin_port = 0 /* skb->h.uh->source */; ++ sin->sin_addr.s_addr = ip_hdr(skb)->saddr; ++ memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); ++ *addr_len = sizeof(*sin); ++ } + + if (isk->cmsg_flags) + ip_cmsg_recv(msg, skb); +@@ -883,16 +885,18 @@ int ping_recvmsg(struct kiocb *iocb, str + struct sockaddr_in6 *sin6 = + (struct sockaddr_in6 *)msg->msg_name; + +- sin6->sin6_family = AF_INET6; +- sin6->sin6_port = 0; +- sin6->sin6_addr = ip6->saddr; +- sin6->sin6_flowinfo = 0; +- if (np->sndflow) +- sin6->sin6_flowinfo = ip6_flowinfo(ip6); +- +- sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, +- IP6CB(skb)->iif); +- *addr_len = sizeof(*sin6); ++ if (sin6) { ++ sin6->sin6_family = AF_INET6; ++ sin6->sin6_port = 0; ++ sin6->sin6_addr = ip6->saddr; ++ sin6->sin6_flowinfo = 0; ++ if (np->sndflow) ++ sin6->sin6_flowinfo = ip6_flowinfo(ip6); ++ sin6->sin6_scope_id = ++ ipv6_iface_scope_id(&sin6->sin6_addr, ++ IP6CB(skb)->iif); ++ *addr_len = sizeof(*sin6); ++ } + + if (inet6_sk(sk)->rxopt.all) + pingv6_ops.ip6_datagram_recv_ctl(sk, msg, skb); diff --git a/queue-3.12/pkt_sched-fq-change-classification-of-control.patch b/queue-3.12/pkt_sched-fq-change-classification-of-control.patch new file mode 100644 index 00000000000..ef16e10add9 --- /dev/null +++ b/queue-3.12/pkt_sched-fq-change-classification-of-control.patch @@ -0,0 +1,55 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Maciej Å»enczykowski +Date: Thu, 14 Nov 2013 08:50:43 -0800 +Subject: pkt_sched: fq: change classification of control packets + +From: Maciej Å»enczykowski + +[ Upstream commit 2abc2f070eb30ac8421554a5c32229f8332c6206 ] + +Initial sch_fq implementation copied code from pfifo_fast to classify +a packet as a high prio packet. + +This clashes with setups using PRIO with say 7 bands, as one of the +band could be incorrectly (mis)classified by FQ. + +Packets would be queued in the 'internal' queue, and no pacing ever +happen for this special queue. + +Fixes: afe4fd062416 ("pkt_sched: fq: Fair Queue packet scheduler") +Signed-off-by: Maciej Å»enczykowski +Signed-off-by: Eric Dumazet +Cc: Stephen Hemminger +Cc: Willem de Bruijn +Cc: Yuchung Cheng +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_fq.c | 8 +------- + 1 file changed, 1 insertion(+), 7 deletions(-) + +--- a/net/sched/sch_fq.c ++++ b/net/sched/sch_fq.c +@@ -209,21 +209,15 @@ static void fq_gc(struct fq_sched_data * + } + } + +-static const u8 prio2band[TC_PRIO_MAX + 1] = { +- 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 +-}; +- + static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) + { + struct rb_node **p, *parent; + struct sock *sk = skb->sk; + struct rb_root *root; + struct fq_flow *f; +- int band; + + /* warning: no starvation prevention... */ +- band = prio2band[skb->priority & TC_PRIO_MAX]; +- if (unlikely(band == 0)) ++ if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL)) + return &q->internal; + + if (unlikely(!sk)) { diff --git a/queue-3.12/pkt_sched-fq-fix-pacing-for-small-frames.patch b/queue-3.12/pkt_sched-fq-fix-pacing-for-small-frames.patch new file mode 100644 index 00000000000..b4b18cbf893 --- /dev/null +++ b/queue-3.12/pkt_sched-fq-fix-pacing-for-small-frames.patch @@ -0,0 +1,135 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Eric Dumazet +Date: Fri, 15 Nov 2013 08:58:14 -0800 +Subject: pkt_sched: fq: fix pacing for small frames + +From: Eric Dumazet + +[ Upstream commit f52ed89971adbe79b6438c459814034707b8ab91 ] + +For performance reasons, sch_fq tried hard to not setup timers for every +sent packet, using a quantum based heuristic : A delay is setup only if +the flow exhausted its credit. + +Problem is that application limited flows can refill their credit +for every queued packet, and they can evade pacing. + +This problem can also be triggered when TCP flows use small MSS values, +as TSO auto sizing builds packets that are smaller than the default fq +quantum (3028 bytes) + +This patch adds a 40 ms delay to guard flow credit refill. + +Fixes: afe4fd062416 ("pkt_sched: fq: Fair Queue packet scheduler") +Signed-off-by: Eric Dumazet +Cc: Maciej Å»enczykowski +Cc: Willem de Bruijn +Cc: Yuchung Cheng +Cc: Neal Cardwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/uapi/linux/pkt_sched.h | 3 +++ + net/sched/sch_fq.c | 22 ++++++++++++++++++---- + 2 files changed, 21 insertions(+), 4 deletions(-) + +--- a/include/uapi/linux/pkt_sched.h ++++ b/include/uapi/linux/pkt_sched.h +@@ -764,6 +764,9 @@ enum { + TCA_FQ_FLOW_MAX_RATE, /* per flow max rate */ + + TCA_FQ_BUCKETS_LOG, /* log2(number of buckets) */ ++ ++ TCA_FQ_FLOW_REFILL_DELAY, /* flow credit refill delay in usec */ ++ + __TCA_FQ_MAX + }; + +--- a/net/sched/sch_fq.c ++++ b/net/sched/sch_fq.c +@@ -88,6 +88,7 @@ struct fq_sched_data { + struct fq_flow internal; /* for non classified or high prio packets */ + u32 quantum; + u32 initial_quantum; ++ u32 flow_refill_delay; + u32 flow_max_rate; /* optional max rate per flow */ + u32 flow_plimit; /* max packets per flow */ + struct rb_root *fq_root; +@@ -114,6 +115,7 @@ static struct fq_flow detached, throttle + static void fq_flow_set_detached(struct fq_flow *f) + { + f->next = &detached; ++ f->age = jiffies; + } + + static bool fq_flow_is_detached(const struct fq_flow *f) +@@ -365,17 +367,20 @@ static int fq_enqueue(struct sk_buff *sk + } + + f->qlen++; +- flow_queue_add(f, skb); + if (skb_is_retransmit(skb)) + q->stat_tcp_retrans++; + sch->qstats.backlog += qdisc_pkt_len(skb); + if (fq_flow_is_detached(f)) { + fq_flow_add_tail(&q->new_flows, f); +- if (q->quantum > f->credit) +- f->credit = q->quantum; ++ if (time_after(jiffies, f->age + q->flow_refill_delay)) ++ f->credit = max_t(u32, f->credit, q->quantum); + q->inactive_flows--; + qdisc_unthrottled(sch); + } ++ ++ /* Note: this overwrites f->age */ ++ flow_queue_add(f, skb); ++ + if (unlikely(f == &q->internal)) { + q->stat_internal_packets++; + qdisc_unthrottled(sch); +@@ -453,7 +458,6 @@ begin: + fq_flow_add_tail(&q->old_flows, f); + } else { + fq_flow_set_detached(f); +- f->age = jiffies; + q->inactive_flows++; + } + goto begin; +@@ -607,6 +611,7 @@ static const struct nla_policy fq_policy + [TCA_FQ_FLOW_DEFAULT_RATE] = { .type = NLA_U32 }, + [TCA_FQ_FLOW_MAX_RATE] = { .type = NLA_U32 }, + [TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 }, ++ [TCA_FQ_FLOW_REFILL_DELAY] = { .type = NLA_U32 }, + }; + + static int fq_change(struct Qdisc *sch, struct nlattr *opt) +@@ -663,6 +668,12 @@ static int fq_change(struct Qdisc *sch, + err = -EINVAL; + } + ++ if (tb[TCA_FQ_FLOW_REFILL_DELAY]) { ++ u32 usecs_delay = nla_get_u32(tb[TCA_FQ_FLOW_REFILL_DELAY]) ; ++ ++ q->flow_refill_delay = usecs_to_jiffies(usecs_delay); ++ } ++ + if (!err) + err = fq_resize(q, fq_log); + +@@ -698,6 +709,7 @@ static int fq_init(struct Qdisc *sch, st + q->flow_plimit = 100; + q->quantum = 2 * psched_mtu(qdisc_dev(sch)); + q->initial_quantum = 10 * psched_mtu(qdisc_dev(sch)); ++ q->flow_refill_delay = msecs_to_jiffies(40); + q->flow_max_rate = ~0U; + q->rate_enable = 1; + q->new_flows.first = NULL; +@@ -732,6 +744,8 @@ static int fq_dump(struct Qdisc *sch, st + nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) || + nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) || + nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, q->flow_max_rate) || ++ nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY, ++ jiffies_to_usecs(q->flow_refill_delay)) || + nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log)) + goto nla_put_failure; + diff --git a/queue-3.12/pkt_sched-fq-warn-users-using-defrate.patch b/queue-3.12/pkt_sched-fq-warn-users-using-defrate.patch new file mode 100644 index 00000000000..f1a2753a633 --- /dev/null +++ b/queue-3.12/pkt_sched-fq-warn-users-using-defrate.patch @@ -0,0 +1,75 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Eric Dumazet +Date: Fri, 15 Nov 2013 08:57:26 -0800 +Subject: pkt_sched: fq: warn users using defrate + +From: Eric Dumazet + +[ Upstream commit 65c5189a2b57b9aa1d89e4b79da39928257c9505 ] + +Commit 7eec4174ff29 ("pkt_sched: fq: fix non TCP flows pacing") +obsoleted TCA_FQ_FLOW_DEFAULT_RATE without notice for the users. + +Suggested by David Miller + +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/uapi/linux/pkt_sched.h | 4 +--- + net/sched/sch_fq.c | 10 ++++------ + 2 files changed, 5 insertions(+), 9 deletions(-) + +--- a/include/uapi/linux/pkt_sched.h ++++ b/include/uapi/linux/pkt_sched.h +@@ -759,9 +759,7 @@ enum { + + TCA_FQ_RATE_ENABLE, /* enable/disable rate limiting */ + +- TCA_FQ_FLOW_DEFAULT_RATE,/* for sockets with unspecified sk_rate, +- * use the following rate +- */ ++ TCA_FQ_FLOW_DEFAULT_RATE,/* obsolete, do not use */ + + TCA_FQ_FLOW_MAX_RATE, /* per flow max rate */ + +--- a/net/sched/sch_fq.c ++++ b/net/sched/sch_fq.c +@@ -88,7 +88,6 @@ struct fq_sched_data { + struct fq_flow internal; /* for non classified or high prio packets */ + u32 quantum; + u32 initial_quantum; +- u32 flow_default_rate;/* rate per flow : bytes per second */ + u32 flow_max_rate; /* optional max rate per flow */ + u32 flow_plimit; /* max packets per flow */ + struct rb_root *fq_root; +@@ -649,7 +648,8 @@ static int fq_change(struct Qdisc *sch, + q->initial_quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]); + + if (tb[TCA_FQ_FLOW_DEFAULT_RATE]) +- q->flow_default_rate = nla_get_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]); ++ pr_warn_ratelimited("sch_fq: defrate %u ignored.\n", ++ nla_get_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE])); + + if (tb[TCA_FQ_FLOW_MAX_RATE]) + q->flow_max_rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]); +@@ -698,7 +698,6 @@ static int fq_init(struct Qdisc *sch, st + q->flow_plimit = 100; + q->quantum = 2 * psched_mtu(qdisc_dev(sch)); + q->initial_quantum = 10 * psched_mtu(qdisc_dev(sch)); +- q->flow_default_rate = 0; + q->flow_max_rate = ~0U; + q->rate_enable = 1; + q->new_flows.first = NULL; +@@ -725,9 +724,8 @@ static int fq_dump(struct Qdisc *sch, st + if (opts == NULL) + goto nla_put_failure; + +- /* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore, +- * do not bother giving its value +- */ ++ /* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore */ ++ + if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) || + nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) || + nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) || diff --git a/queue-3.12/pktgen-xfrm-update-ipv4-header-total-len-and-checksum-after-tranformation.patch b/queue-3.12/pktgen-xfrm-update-ipv4-header-total-len-and-checksum-after-tranformation.patch new file mode 100644 index 00000000000..d3af3d9d25f --- /dev/null +++ b/queue-3.12/pktgen-xfrm-update-ipv4-header-total-len-and-checksum-after-tranformation.patch @@ -0,0 +1,57 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: "fan.du" +Date: Sun, 1 Dec 2013 16:28:48 +0800 +Subject: {pktgen, xfrm} Update IPv4 header total len and checksum after tranformation + +From: "fan.du" + +[ Upstream commit 3868204d6b89ea373a273e760609cb08020beb1a ] + +commit a553e4a6317b2cfc7659542c10fe43184ffe53da ("[PKTGEN]: IPSEC support") +tried to support IPsec ESP transport transformation for pktgen, but acctually +this doesn't work at all for two reasons(The orignal transformed packet has +bad IPv4 checksum value, as well as wrong auth value, reported by wireshark) + +- After transpormation, IPv4 header total length needs update, + because encrypted payload's length is NOT same as that of plain text. + +- After transformation, IPv4 checksum needs re-caculate because of payload + has been changed. + +With this patch, armmed pktgen with below cofiguration, Wireshark is able to +decrypted ESP packet generated by pktgen without any IPv4 checksum error or +auth value error. + +pgset "flag IPSEC" +pgset "flows 1" + +Signed-off-by: Fan Du +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/pktgen.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/net/core/pktgen.c ++++ b/net/core/pktgen.c +@@ -2527,6 +2527,8 @@ static int process_ipsec(struct pktgen_d + if (x) { + int ret; + __u8 *eth; ++ struct iphdr *iph; ++ + nhead = x->props.header_len - skb_headroom(skb); + if (nhead > 0) { + ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC); +@@ -2548,6 +2550,11 @@ static int process_ipsec(struct pktgen_d + eth = (__u8 *) skb_push(skb, ETH_HLEN); + memcpy(eth, pkt_dev->hh, 12); + *(u16 *) ð[12] = protocol; ++ ++ /* Update IPv4 header len as well as checksum value */ ++ iph = ip_hdr(skb); ++ iph->tot_len = htons(skb->len - ETH_HLEN); ++ ip_send_check(iph); + } + } + return 1; diff --git a/queue-3.12/r8169-check-aldps-bit-and-disable-it-if-enabled-for-the-8168g.patch b/queue-3.12/r8169-check-aldps-bit-and-disable-it-if-enabled-for-the-8168g.patch new file mode 100644 index 00000000000..6949657b81c --- /dev/null +++ b/queue-3.12/r8169-check-aldps-bit-and-disable-it-if-enabled-for-the-8168g.patch @@ -0,0 +1,42 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: David Chang +Date: Wed, 27 Nov 2013 15:48:36 +0800 +Subject: r8169: check ALDPS bit and disable it if enabled for the 8168g + +From: David Chang + +[ Upstream commit 1bac1072425c86f1ac85bd5967910706677ef8b3 ] + +Windows driver will enable ALDPS function, but linux driver and firmware +do not have any configuration related to ALDPS function for 8168g. +So restart system to linux and remove the NIC cable, LAN enter ALDPS, +then LAN RX will be disabled. + +This issue can be easily reproduced on dual boot windows and linux +system with RTL_GIGA_MAC_VER_40 chip. + +Realtek said, ALDPS function can be disabled by configuring to PHY, +switch to page 0x0A43, reg0x10 bit2=0. + +Signed-off-by: David Chang +Acked-by: Hayes Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/realtek/r8169.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/drivers/net/ethernet/realtek/r8169.c ++++ b/drivers/net/ethernet/realtek/r8169.c +@@ -3465,6 +3465,11 @@ static void rtl8168g_1_hw_phy_config(str + rtl_writephy(tp, 0x14, 0x9065); + rtl_writephy(tp, 0x14, 0x1065); + ++ /* Check ALDPS bit, disable it if enabled */ ++ rtl_writephy(tp, 0x1f, 0x0a43); ++ if (rtl_readphy(tp, 0x10) & 0x0004) ++ rtl_w1w0_phy(tp, 0x10, 0x0000, 0x0004); ++ + rtl_writephy(tp, 0x1f, 0x0000); + } + diff --git a/queue-3.12/random32-fix-off-by-one-in-seeding-requirement.patch b/queue-3.12/random32-fix-off-by-one-in-seeding-requirement.patch new file mode 100644 index 00000000000..46c43061d3f --- /dev/null +++ b/queue-3.12/random32-fix-off-by-one-in-seeding-requirement.patch @@ -0,0 +1,96 @@ +From foo@baz Thu Dec 5 16:16:37 PST 2013 +From: Daniel Borkmann +Date: Mon, 11 Nov 2013 12:20:32 +0100 +Subject: random32: fix off-by-one in seeding requirement + +From: Daniel Borkmann + +[ Upstream commit 51c37a70aaa3f95773af560e6db3073520513912 ] + +For properly initialising the Tausworthe generator [1], we have +a strict seeding requirement, that is, s1 > 1, s2 > 7, s3 > 15. + +Commit 697f8d0348 ("random32: seeding improvement") introduced +a __seed() function that imposes boundary checks proposed by the +errata paper [2] to properly ensure above conditions. + +However, we're off by one, as the function is implemented as: +"return (x < m) ? x + m : x;", and called with __seed(X, 1), +__seed(X, 7), __seed(X, 15). Thus, an unwanted seed of 1, 7, 15 +would be possible, whereas the lower boundary should actually +be of at least 2, 8, 16, just as GSL does. Fix this, as otherwise +an initialization with an unwanted seed could have the effect +that Tausworthe's PRNG properties cannot not be ensured. + +Note that this PRNG is *not* used for cryptography in the kernel. + + [1] http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps + [2] http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps + +Joint work with Hannes Frederic Sowa. + +Fixes: 697f8d0348a6 ("random32: seeding improvement") +Cc: Stephen Hemminger +Cc: Florian Weimer +Cc: Theodore Ts'o +Signed-off-by: Daniel Borkmann +Signed-off-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/random.h | 6 +++--- + lib/random32.c | 14 +++++++------- + 2 files changed, 10 insertions(+), 10 deletions(-) + +--- a/include/linux/random.h ++++ b/include/linux/random.h +@@ -50,9 +50,9 @@ static inline void prandom_seed_state(st + { + u32 i = (seed >> 32) ^ (seed << 10) ^ seed; + +- state->s1 = __seed(i, 1); +- state->s2 = __seed(i, 7); +- state->s3 = __seed(i, 15); ++ state->s1 = __seed(i, 2); ++ state->s2 = __seed(i, 8); ++ state->s3 = __seed(i, 16); + } + + #ifdef CONFIG_ARCH_RANDOM +--- a/lib/random32.c ++++ b/lib/random32.c +@@ -141,7 +141,7 @@ void prandom_seed(u32 entropy) + */ + for_each_possible_cpu (i) { + struct rnd_state *state = &per_cpu(net_rand_state, i); +- state->s1 = __seed(state->s1 ^ entropy, 1); ++ state->s1 = __seed(state->s1 ^ entropy, 2); + } + } + EXPORT_SYMBOL(prandom_seed); +@@ -158,9 +158,9 @@ static int __init prandom_init(void) + struct rnd_state *state = &per_cpu(net_rand_state,i); + + #define LCG(x) ((x) * 69069) /* super-duper LCG */ +- state->s1 = __seed(LCG(i + jiffies), 1); +- state->s2 = __seed(LCG(state->s1), 7); +- state->s3 = __seed(LCG(state->s2), 15); ++ state->s1 = __seed(LCG(i + jiffies), 2); ++ state->s2 = __seed(LCG(state->s1), 8); ++ state->s3 = __seed(LCG(state->s2), 16); + + /* "warm it up" */ + prandom_u32_state(state); +@@ -187,9 +187,9 @@ static int __init prandom_reseed(void) + u32 seeds[3]; + + get_random_bytes(&seeds, sizeof(seeds)); +- state->s1 = __seed(seeds[0], 1); +- state->s2 = __seed(seeds[1], 7); +- state->s3 = __seed(seeds[2], 15); ++ state->s1 = __seed(seeds[0], 2); ++ state->s2 = __seed(seeds[1], 8); ++ state->s3 = __seed(seeds[2], 16); + + /* mix it in */ + prandom_u32_state(state); diff --git a/queue-3.12/sch_tbf-handle-too-small-burst.patch b/queue-3.12/sch_tbf-handle-too-small-burst.patch new file mode 100644 index 00000000000..b27da8cf722 --- /dev/null +++ b/queue-3.12/sch_tbf-handle-too-small-burst.patch @@ -0,0 +1,103 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Eric Dumazet +Date: Sat, 23 Nov 2013 12:59:20 -0800 +Subject: sch_tbf: handle too small burst + +From: Eric Dumazet + +[ Upstream commit 4d0820cf6a55d72350cb2d24a4504f62fbde95d9 ] + +If a too small burst is inadvertently set on TBF, we might trigger +a bug in tbf_segment(), as 'skb' instead of 'segs' was used in a +qdisc_reshape_fail() call. + +tc qdisc add dev eth0 root handle 1: tbf latency 50ms burst 1KB rate +50mbit + +Fix the bug, and add a warning, as such configuration is not +going to work anyway for non GSO packets. + +(For some reason, one has to use a burst >= 1520 to get a working +configuration, even with old kernels. This is a probable iproute2/tc +bug) + +Based on a report and initial patch from Yang Yingliang + +Fixes: e43ac79a4bc6 ("sch_tbf: segment too big GSO packets") +Signed-off-by: Eric Dumazet +Reported-by: Yang Yingliang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_tbf.c | 32 +++++++++++++++++++++++++------- + 1 file changed, 25 insertions(+), 7 deletions(-) + +--- a/net/sched/sch_tbf.c ++++ b/net/sched/sch_tbf.c +@@ -21,6 +21,7 @@ + #include + #include + #include ++#include + + + /* Simple Token Bucket Filter. +@@ -117,6 +118,22 @@ struct tbf_sched_data { + }; + + ++/* ++ * Return length of individual segments of a gso packet, ++ * including all headers (MAC, IP, TCP/UDP) ++ */ ++static unsigned int skb_gso_seglen(const struct sk_buff *skb) ++{ ++ unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb); ++ const struct skb_shared_info *shinfo = skb_shinfo(skb); ++ ++ if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) ++ hdr_len += tcp_hdrlen(skb); ++ else ++ hdr_len += sizeof(struct udphdr); ++ return hdr_len + shinfo->gso_size; ++} ++ + /* GSO packet is too big, segment it so that tbf can transmit + * each segment in time + */ +@@ -136,12 +153,8 @@ static int tbf_segment(struct sk_buff *s + while (segs) { + nskb = segs->next; + segs->next = NULL; +- if (likely(segs->len <= q->max_size)) { +- qdisc_skb_cb(segs)->pkt_len = segs->len; +- ret = qdisc_enqueue(segs, q->qdisc); +- } else { +- ret = qdisc_reshape_fail(skb, sch); +- } ++ qdisc_skb_cb(segs)->pkt_len = segs->len; ++ ret = qdisc_enqueue(segs, q->qdisc); + if (ret != NET_XMIT_SUCCESS) { + if (net_xmit_drop_count(ret)) + sch->qstats.drops++; +@@ -163,7 +176,7 @@ static int tbf_enqueue(struct sk_buff *s + int ret; + + if (qdisc_pkt_len(skb) > q->max_size) { +- if (skb_is_gso(skb)) ++ if (skb_is_gso(skb) && skb_gso_seglen(skb) <= q->max_size) + return tbf_segment(skb, sch); + return qdisc_reshape_fail(skb, sch); + } +@@ -316,6 +329,11 @@ static int tbf_change(struct Qdisc *sch, + if (max_size < 0) + goto done; + ++ if (max_size < psched_mtu(qdisc_dev(sch))) ++ pr_warn_ratelimited("sch_tbf: burst %u is lower than device %s mtu (%u) !\n", ++ max_size, qdisc_dev(sch)->name, ++ psched_mtu(qdisc_dev(sch))); ++ + if (q->qdisc != &noop_qdisc) { + err = fifo_set_limit(q->qdisc, qopt->limit); + if (err) diff --git a/queue-3.12/series b/queue-3.12/series index ef29cfcca4c..355976c2afb 100644 --- a/queue-3.12/series +++ b/queue-3.12/series @@ -1,2 +1,60 @@ net-mv643xx_eth-add-missing-phy_addr_set-in-dt-mode.patch net-mv643xx_eth-potential-null-dereference-in.patch +ipv6-fix-headroom-calculation-in-udp6_ufo_fragment.patch +bonding-rcuify-bond_set_rx_mode.patch +net-x86-bpf-don-t-forget-to-free-sk_filter-v2.patch +net-mlx4_en-fixed-crash-when-port-type-is-changed.patch +net-fix-ip-rule-delete-table-256.patch +ipv6-use-rt6_get_dflt_router-to-get-default-router-in.patch +ipv6-protect-for_each_sk_fl_rcu-in-mem_check-with.patch +random32-fix-off-by-one-in-seeding-requirement.patch +bonding-don-t-permit-to-use-arp-monitoring-in-802.3ad.patch +usbnet-fix-status-interrupt-urb-handling.patch +core-dev-do-not-ignore-dmac-in-dev_forward_skb.patch +6lowpan-uncompression-of-traffic-class-field-was.patch +tuntap-limit-head-length-of-skb-allocated.patch +macvtap-limit-head-length-of-skb-allocated.patch +tcp-tsq-restore-minimal-amount-of-queueing.patch +bonding-fix-two-race-conditions-in.patch +net-tcp-fix-panic-in-tcp_fastopen_cache_set.patch +sit-fix-use-after-free-of-fb_tunnel_dev.patch +isdnloop-use-strlcpy-instead-of-strcpy.patch +ip6tnl-fix-use-after-free-of-fb_tnl_dev.patch +pkt_sched-fq-change-classification-of-control.patch +connector-improved-unaligned-access-error-fix.patch +ipv4-fix-possible-seqlock-deadlock.patch +pkt_sched-fq-warn-users-using-defrate.patch +pkt_sched-fq-fix-pacing-for-small-frames.patch +inet-prevent-leakage-of-uninitialized-memory-to-user-in-recv-syscalls.patch +ping-prevent-null-pointer-dereference-on-write-to-msg_name.patch +net-rework-recvmsg-handler-msg_name-and-msg_namelen-logic.patch +net-add-bug_on-if-kernel-advertises-msg_namelen-sizeof-struct-sockaddr_storage.patch +inet-fix-addr_len-msg-msg_namelen-assignment-in-recv_error-and-rxpmtu-functions.patch +net-clamp-msg_namelen-instead-of-returning-an-error.patch +ipv6-fix-leaking-uninitialized-port-number-of-offender-sockaddr.patch +ipv6-fix-inet6_init-cleanup-order.patch +ip6_output-fragment-outgoing-reassembled-skb-properly.patch +netfilter-push-reasm-skb-through-instead-of-original-frag-skbs.patch +xfrm-release-dst-if-this-dst-is-improper-for-vti-tunnel.patch +atm-idt77252-fix-dev-refcnt-leak.patch +tcp-don-t-update-snd_nxt-when-a-socket-is-switched-from-repair-mode.patch +ipv4-fix-race-in-concurrent-ip_route_input_slow.patch +net-core-always-propagate-flag-changes-to-interfaces.patch +bridge-flush-br-s-address-entry-in-fdb-when-remove-the-bridge-dev.patch +packet-fix-use-after-free-race-in-send-path-when-dev-is-released.patch +af_packet-block-bh-in-prb_shutdown_retire_blk_timer.patch +gso-handle-new-frag_list-of-frags-gro-packets.patch +gro-only-verify-tcp-checksums-for-candidates.patch +gro-clean-up-tcpx_gro_receive-checksum-verification.patch +sch_tbf-handle-too-small-burst.patch +xen-netback-include-definition-of-csum_ipv6_magic.patch +via-velocity-fix-netif_receive_skb-use-in-irq-disabled-section.patch +r8169-check-aldps-bit-and-disable-it-if-enabled-for-the-8168g.patch +net-8139cp-fix-a-bug_on-triggered-by-wrong-bytes_compl.patch +net-smc91-fix-crash-regression-on-the-versatile.patch +net-update-consumers-of-msg_more-to-recognize-msg_sendpage_notlast.patch +team-fix-master-carrier-set-when-user-linkup-is-enabled.patch +inet-fix-possible-seqlock-deadlocks.patch +ipv6-fix-possible-seqlock-deadlock-in-ip6_finish_output2.patch +pktgen-xfrm-update-ipv4-header-total-len-and-checksum-after-tranformation.patch +xfrm-fix-null-pointer-dereference-when-decoding-sessions.patch diff --git a/queue-3.12/sit-fix-use-after-free-of-fb_tunnel_dev.patch b/queue-3.12/sit-fix-use-after-free-of-fb_tunnel_dev.patch new file mode 100644 index 00000000000..d56aec1c207 --- /dev/null +++ b/queue-3.12/sit-fix-use-after-free-of-fb_tunnel_dev.patch @@ -0,0 +1,101 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Willem de Bruijn +Date: Wed, 13 Nov 2013 21:27:38 -0500 +Subject: sit: fix use after free of fb_tunnel_dev + +From: Willem de Bruijn + +[ Upstream commit 9434266f2c645d4fcf62a03a8e36ad8075e37943 ] + +Bug: The fallback device is created in sit_init_net and assumed to be +freed in sit_exit_net. First, it is dereferenced in that function, in +sit_destroy_tunnels: + + struct net *net = dev_net(sitn->fb_tunnel_dev); + +Prior to this, rtnl_unlink_register has removed all devices that match +rtnl_link_ops == sit_link_ops. + +Commit 205983c43700 added the line + ++ sitn->fb_tunnel_dev->rtnl_link_ops = &sit_link_ops; + +which cases the fallback device to match here and be freed before it +is last dereferenced. + +Fix: This commit adds an explicit .delllink callback to sit_link_ops +that skips deallocation at rtnl_unlink_register for the fallback +device. This mechanism is comparable to the one in ip_tunnel. + +It also modifies sit_destroy_tunnels and its only caller sit_exit_net +to avoid the offending dereference in the first place. That double +lookup is more complicated than required. + +Test: The bug is only triggered when CONFIG_NET_NS is enabled. It +causes a GPF only when CONFIG_DEBUG_SLAB is enabled. Verified that +this bug exists at the mentioned commit, at davem-net HEAD and at +3.11.y HEAD. Verified that it went away after applying this patch. + +Fixes: 205983c43700 ("sit: allow to use rtnl ops on fb tunnel") + +Signed-off-by: Willem de Bruijn +Acked-by: Nicolas Dichtel +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/sit.c | 18 ++++++++++++++---- + 1 file changed, 14 insertions(+), 4 deletions(-) + +--- a/net/ipv6/sit.c ++++ b/net/ipv6/sit.c +@@ -1594,6 +1594,15 @@ static const struct nla_policy ipip6_pol + #endif + }; + ++static void ipip6_dellink(struct net_device *dev, struct list_head *head) ++{ ++ struct net *net = dev_net(dev); ++ struct sit_net *sitn = net_generic(net, sit_net_id); ++ ++ if (dev != sitn->fb_tunnel_dev) ++ unregister_netdevice_queue(dev, head); ++} ++ + static struct rtnl_link_ops sit_link_ops __read_mostly = { + .kind = "sit", + .maxtype = IFLA_IPTUN_MAX, +@@ -1605,6 +1614,7 @@ static struct rtnl_link_ops sit_link_ops + .changelink = ipip6_changelink, + .get_size = ipip6_get_size, + .fill_info = ipip6_fill_info, ++ .dellink = ipip6_dellink, + }; + + static struct xfrm_tunnel sit_handler __read_mostly = { +@@ -1619,9 +1629,10 @@ static struct xfrm_tunnel ipip_handler _ + .priority = 2, + }; + +-static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_head *head) ++static void __net_exit sit_destroy_tunnels(struct net *net, ++ struct list_head *head) + { +- struct net *net = dev_net(sitn->fb_tunnel_dev); ++ struct sit_net *sitn = net_generic(net, sit_net_id); + struct net_device *dev, *aux; + int prio; + +@@ -1696,11 +1707,10 @@ err_alloc_dev: + + static void __net_exit sit_exit_net(struct net *net) + { +- struct sit_net *sitn = net_generic(net, sit_net_id); + LIST_HEAD(list); + + rtnl_lock(); +- sit_destroy_tunnels(sitn, &list); ++ sit_destroy_tunnels(net, &list); + unregister_netdevice_many(&list); + rtnl_unlock(); + } diff --git a/queue-3.12/tcp-don-t-update-snd_nxt-when-a-socket-is-switched-from-repair-mode.patch b/queue-3.12/tcp-don-t-update-snd_nxt-when-a-socket-is-switched-from-repair-mode.patch new file mode 100644 index 00000000000..ff38fb742ac --- /dev/null +++ b/queue-3.12/tcp-don-t-update-snd_nxt-when-a-socket-is-switched-from-repair-mode.patch @@ -0,0 +1,88 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Andrey Vagin +Date: Tue, 19 Nov 2013 22:10:06 +0400 +Subject: tcp: don't update snd_nxt, when a socket is switched from repair mode + +From: Andrey Vagin + +[ Upstream commit dbde497966804e63a38fdedc1e3815e77097efc2 ] + +snd_nxt must be updated synchronously with sk_send_head. Otherwise +tp->packets_out may be updated incorrectly, what may bring a kernel panic. + +Here is a kernel panic from my host. +[ 103.043194] BUG: unable to handle kernel NULL pointer dereference at 0000000000000048 +[ 103.044025] IP: [] tcp_rearm_rto+0xcf/0x150 +... +[ 146.301158] Call Trace: +[ 146.301158] [] tcp_ack+0xcc0/0x12c0 + +Before this panic a tcp socket was restored. This socket had sent and +unsent data in the write queue. Sent data was restored in repair mode, +then the socket was switched from reapair mode and unsent data was +restored. After that the socket was switched back into repair mode. + +In that moment we had a socket where write queue looks like this: +snd_una snd_nxt write_seq + |_________|________| + | + sk_send_head + +After a second switching from repair mode the state of socket was +changed: + +snd_una snd_nxt, write_seq + |_________ ________| + | + sk_send_head + +This state is inconsistent, because snd_nxt and sk_send_head are not +synchronized. + +Bellow you can find a call trace, how packets_out can be incremented +twice for one skb, if snd_nxt and sk_send_head are not synchronized. +In this case packets_out will be always positive, even when +sk_write_queue is empty. + +tcp_write_wakeup + skb = tcp_send_head(sk); + tcp_fragment + if (!before(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq)) + tcp_adjust_pcount(sk, skb, diff); + tcp_event_new_data_sent + tp->packets_out += tcp_skb_pcount(skb); + +I think update of snd_nxt isn't required, when a socket is switched from +repair mode. Because it's initialized in tcp_connect_init. Then when a +write queue is restored, snd_nxt is incremented in tcp_event_new_data_sent, +so it's always is in consistent state. + +I have checked, that the bug is not reproduced with this patch and +all tests about restoring tcp connections work fine. + +Cc: Pavel Emelyanov +Cc: Eric Dumazet +Cc: "David S. Miller" +Cc: Alexey Kuznetsov +Cc: James Morris +Cc: Hideaki YOSHIFUJI +Cc: Patrick McHardy +Signed-off-by: Andrey Vagin +Acked-by: Pavel Emelyanov +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_output.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -3112,7 +3112,6 @@ void tcp_send_window_probe(struct sock * + { + if (sk->sk_state == TCP_ESTABLISHED) { + tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1; +- tcp_sk(sk)->snd_nxt = tcp_sk(sk)->write_seq; + tcp_xmit_probe_skb(sk, 0); + } + } diff --git a/queue-3.12/tcp-tsq-restore-minimal-amount-of-queueing.patch b/queue-3.12/tcp-tsq-restore-minimal-amount-of-queueing.patch new file mode 100644 index 00000000000..6e4c3e03330 --- /dev/null +++ b/queue-3.12/tcp-tsq-restore-minimal-amount-of-queueing.patch @@ -0,0 +1,90 @@ +From foo@baz Thu Dec 5 16:16:38 PST 2013 +From: Eric Dumazet +Date: Wed, 13 Nov 2013 06:32:54 -0800 +Subject: tcp: tsq: restore minimal amount of queueing + +From: Eric Dumazet + +[ Upstream commit 98e09386c0ef4dfd48af7ba60ff908f0d525cdee ] + +After commit c9eeec26e32e ("tcp: TSQ can use a dynamic limit"), several +users reported throughput regressions, notably on mvneta and wifi +adapters. + +802.11 AMPDU requires a fair amount of queueing to be effective. + +This patch partially reverts the change done in tcp_write_xmit() +so that the minimal amount is sysctl_tcp_limit_output_bytes. + +It also remove the use of this sysctl while building skb stored +in write queue, as TSO autosizing does the right thing anyway. + +Users with well behaving NICS and correct qdisc (like sch_fq), +can then lower the default sysctl_tcp_limit_output_bytes value from +128KB to 8KB. + +This new usage of sysctl_tcp_limit_output_bytes permits each driver +authors to check how their driver performs when/if the value is set +to a minimum of 4KB. + +Normally, line rate for a single TCP flow should be possible, +but some drivers rely on timers to perform TX completion and +too long TX completion delays prevent reaching full throughput. + +Fixes: c9eeec26e32e ("tcp: TSQ can use a dynamic limit") +Signed-off-by: Eric Dumazet +Reported-by: Sujith Manoharan +Reported-by: Arnaud Ebalard +Tested-by: Sujith Manoharan +Cc: Felix Fietkau +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/networking/ip-sysctl.txt | 3 --- + net/ipv4/tcp.c | 6 ------ + net/ipv4/tcp_output.c | 6 +++++- + 3 files changed, 5 insertions(+), 10 deletions(-) + +--- a/Documentation/networking/ip-sysctl.txt ++++ b/Documentation/networking/ip-sysctl.txt +@@ -588,9 +588,6 @@ tcp_limit_output_bytes - INTEGER + typical pfifo_fast qdiscs. + tcp_limit_output_bytes limits the number of bytes on qdisc + or device to reduce artificial RTT/cwnd and reduce bufferbloat. +- Note: For GSO/TSO enabled flows, we try to have at least two +- packets in flight. Reducing tcp_limit_output_bytes might also +- reduce the size of individual GSO packet (64KB being the max) + Default: 131072 + + tcp_challenge_ack_limit - INTEGER +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -806,12 +806,6 @@ static unsigned int tcp_xmit_size_goal(s + xmit_size_goal = min_t(u32, gso_size, + sk->sk_gso_max_size - 1 - hlen); + +- /* TSQ : try to have at least two segments in flight +- * (one in NIC TX ring, another in Qdisc) +- */ +- xmit_size_goal = min_t(u32, xmit_size_goal, +- sysctl_tcp_limit_output_bytes >> 1); +- + xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal); + + /* We try hard to avoid divides here */ +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -1875,8 +1875,12 @@ static bool tcp_write_xmit(struct sock * + * - better RTT estimation and ACK scheduling + * - faster recovery + * - high rates ++ * Alas, some drivers / subsystems require a fair amount ++ * of queued bytes to ensure line rate. ++ * One example is wifi aggregation (802.11 AMPDU) + */ +- limit = max(skb->truesize, sk->sk_pacing_rate >> 10); ++ limit = max_t(unsigned int, sysctl_tcp_limit_output_bytes, ++ sk->sk_pacing_rate >> 10); + + if (atomic_read(&sk->sk_wmem_alloc) > limit) { + set_bit(TSQ_THROTTLED, &tp->tsq_flags); diff --git a/queue-3.12/team-fix-master-carrier-set-when-user-linkup-is-enabled.patch b/queue-3.12/team-fix-master-carrier-set-when-user-linkup-is-enabled.patch new file mode 100644 index 00000000000..ddde7e3bcad --- /dev/null +++ b/queue-3.12/team-fix-master-carrier-set-when-user-linkup-is-enabled.patch @@ -0,0 +1,51 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Jiri Pirko +Date: Thu, 28 Nov 2013 18:01:38 +0100 +Subject: team: fix master carrier set when user linkup is enabled + +From: Jiri Pirko + +[ Upstream commit f5e0d34382e18f396d7673a84df8e3342bea7eb6 ] + +When user linkup is enabled and user sets linkup of individual port, +we need to recompute linkup (carrier) of master interface so the change +is reflected. Fix this by calling __team_carrier_check() which does the +needed work. + +Please apply to all stable kernels as well. Thanks. + +Reported-by: Jan Tluka +Signed-off-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/team/team.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/drivers/net/team/team.c ++++ b/drivers/net/team/team.c +@@ -1366,6 +1366,8 @@ static int team_user_linkup_option_get(s + return 0; + } + ++static void __team_carrier_check(struct team *team); ++ + static int team_user_linkup_option_set(struct team *team, + struct team_gsetter_ctx *ctx) + { +@@ -1373,6 +1375,7 @@ static int team_user_linkup_option_set(s + + port->user.linkup = ctx->data.bool_val; + team_refresh_port_linkup(port); ++ __team_carrier_check(port->team); + return 0; + } + +@@ -1392,6 +1395,7 @@ static int team_user_linkup_en_option_se + + port->user.linkup_enabled = ctx->data.bool_val; + team_refresh_port_linkup(port); ++ __team_carrier_check(port->team); + return 0; + } + diff --git a/queue-3.12/tuntap-limit-head-length-of-skb-allocated.patch b/queue-3.12/tuntap-limit-head-length-of-skb-allocated.patch new file mode 100644 index 00000000000..5f1335ed66a --- /dev/null +++ b/queue-3.12/tuntap-limit-head-length-of-skb-allocated.patch @@ -0,0 +1,66 @@ +From foo@baz Thu Dec 5 16:16:38 PST 2013 +From: Jason Wang +Date: Wed, 13 Nov 2013 14:00:39 +0800 +Subject: tuntap: limit head length of skb allocated + +From: Jason Wang + +[ Upstream commit 96f8d9ecf227638c89f98ccdcdd50b569891976c ] + +We currently use hdr_len as a hint of head length which is advertised by +guest. But when guest advertise a very big value, it can lead to an 64K+ +allocating of kmalloc() which has a very high possibility of failure when host +memory is fragmented or under heavy stress. The huge hdr_len also reduce the +effect of zerocopy or even disable if a gso skb is linearized in guest. + +To solves those issues, this patch introduces an upper limit (PAGE_SIZE) of the +head, which guarantees an order 0 allocation each time. + +Cc: Stefan Hajnoczi +Cc: Michael S. Tsirkin +Signed-off-by: Jason Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/tun.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -981,6 +981,7 @@ static ssize_t tun_get_user(struct tun_s + struct sk_buff *skb; + size_t len = total_len, align = NET_SKB_PAD, linear; + struct virtio_net_hdr gso = { 0 }; ++ int good_linear; + int offset = 0; + int copylen; + bool zerocopy = false; +@@ -1021,12 +1022,16 @@ static ssize_t tun_get_user(struct tun_s + return -EINVAL; + } + ++ good_linear = SKB_MAX_HEAD(align); ++ + if (msg_control) { + /* There are 256 bytes to be copied in skb, so there is + * enough room for skb expand head in case it is used. + * The rest of the buffer is mapped from userspace. + */ + copylen = gso.hdr_len ? gso.hdr_len : GOODCOPY_LEN; ++ if (copylen > good_linear) ++ copylen = good_linear; + linear = copylen; + if (iov_pages(iv, offset + copylen, count) <= MAX_SKB_FRAGS) + zerocopy = true; +@@ -1034,7 +1039,10 @@ static ssize_t tun_get_user(struct tun_s + + if (!zerocopy) { + copylen = len; +- linear = gso.hdr_len; ++ if (gso.hdr_len > good_linear) ++ linear = good_linear; ++ else ++ linear = gso.hdr_len; + } + + skb = tun_alloc_skb(tfile, align, copylen, linear, noblock); diff --git a/queue-3.12/usbnet-fix-status-interrupt-urb-handling.patch b/queue-3.12/usbnet-fix-status-interrupt-urb-handling.patch new file mode 100644 index 00000000000..83963332a6a --- /dev/null +++ b/queue-3.12/usbnet-fix-status-interrupt-urb-handling.patch @@ -0,0 +1,37 @@ +From foo@baz Thu Dec 5 16:16:37 PST 2013 +From: Felix Fietkau +Date: Tue, 12 Nov 2013 16:34:41 +0100 +Subject: usbnet: fix status interrupt urb handling + +From: Felix Fietkau + +[ Upstream commit 52f48d0d9aaa621ffa5e08d79da99a3f8c93b848 ] + +Since commit 7b0c5f21f348a66de495868b8df0284e8dfd6bbf +"sierra_net: keep status interrupt URB active", sierra_net triggers +status interrupt polling before the net_device is opened (in order to +properly receive the sync message response). + +To be able to receive further interrupts, the interrupt urb needs to be +re-submitted, so this patch removes the bogus check for netif_running(). + +Signed-off-by: Felix Fietkau +Tested-by: Dan Williams +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/usbnet.c | 3 --- + 1 file changed, 3 deletions(-) + +--- a/drivers/net/usb/usbnet.c ++++ b/drivers/net/usb/usbnet.c +@@ -204,9 +204,6 @@ static void intr_complete (struct urb *u + break; + } + +- if (!netif_running (dev->net)) +- return; +- + status = usb_submit_urb (urb, GFP_ATOMIC); + if (status != 0) + netif_err(dev, timer, dev->net, diff --git a/queue-3.12/via-velocity-fix-netif_receive_skb-use-in-irq-disabled-section.patch b/queue-3.12/via-velocity-fix-netif_receive_skb-use-in-irq-disabled-section.patch new file mode 100644 index 00000000000..74b1bd0bb37 --- /dev/null +++ b/queue-3.12/via-velocity-fix-netif_receive_skb-use-in-irq-disabled-section.patch @@ -0,0 +1,71 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Francois Romieu +Date: Tue, 26 Nov 2013 00:40:58 +0100 +Subject: via-velocity: fix netif_receive_skb use in irq disabled section. + +From: Francois Romieu + +[ Upstream commit bc9627e7e918a85e906c1a3f6d01d9b8ef911a96 ] + +2fdac010bdcf10a30711b6924612dfc40daf19b8 ("via-velocity.c: update napi +implementation") overlooked an irq disabling spinlock when the Rx part +of the NAPI poll handler was converted from netif_rx to netif_receive_skb. + +NAPI Rx processing can be taken out of the locked section with a pair of +napi_{disable / enable} since it only races with the MTU change function. + +An heavier rework of the NAPI locking would be able to perform NAPI Tx +before Rx where I simply removed one of velocity_tx_srv calls. + +References: https://bugzilla.redhat.com/show_bug.cgi?id=1022733 +Fixes: 2fdac010bdcf (via-velocity.c: update napi implementation) +Signed-off-by: Francois Romieu +Tested-by: Alex A. Schmidt +Cc: Jamie Heilman +Cc: Michele Baldessari +Cc: Julia Lawall +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/via/via-velocity.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +--- a/drivers/net/ethernet/via/via-velocity.c ++++ b/drivers/net/ethernet/via/via-velocity.c +@@ -2172,16 +2172,13 @@ static int velocity_poll(struct napi_str + unsigned int rx_done; + unsigned long flags; + +- spin_lock_irqsave(&vptr->lock, flags); + /* + * Do rx and tx twice for performance (taken from the VIA + * out-of-tree driver). + */ +- rx_done = velocity_rx_srv(vptr, budget / 2); +- velocity_tx_srv(vptr); +- rx_done += velocity_rx_srv(vptr, budget - rx_done); ++ rx_done = velocity_rx_srv(vptr, budget); ++ spin_lock_irqsave(&vptr->lock, flags); + velocity_tx_srv(vptr); +- + /* If budget not fully consumed, exit the polling mode */ + if (rx_done < budget) { + napi_complete(napi); +@@ -2342,6 +2339,8 @@ static int velocity_change_mtu(struct ne + if (ret < 0) + goto out_free_tmp_vptr_1; + ++ napi_disable(&vptr->napi); ++ + spin_lock_irqsave(&vptr->lock, flags); + + netif_stop_queue(dev); +@@ -2362,6 +2361,8 @@ static int velocity_change_mtu(struct ne + + velocity_give_many_rx_descs(vptr); + ++ napi_enable(&vptr->napi); ++ + mac_enable_int(vptr->mac_regs); + netif_start_queue(dev); + diff --git a/queue-3.12/xen-netback-include-definition-of-csum_ipv6_magic.patch b/queue-3.12/xen-netback-include-definition-of-csum_ipv6_magic.patch new file mode 100644 index 00000000000..98ab3e1ed5b --- /dev/null +++ b/queue-3.12/xen-netback-include-definition-of-csum_ipv6_magic.patch @@ -0,0 +1,33 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Andy Whitcroft +Date: Mon, 25 Nov 2013 16:52:34 +0000 +Subject: xen-netback: include definition of csum_ipv6_magic + +From: Andy Whitcroft + +[ Upstream commit ae5e8127b712313ec1b99356019ce9226fea8b88 ] + +We are now using csum_ipv6_magic, include the appropriate header. +Avoids the following error: + + drivers/net/xen-netback/netback.c:1313:4: error: implicit declaration of function 'csum_ipv6_magic' [-Werror=implicit-function-declaration] + tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, + +Signed-off-by: Andy Whitcroft +Acked-by: Ian Campbell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/xen-netback/netback.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/xen-netback/netback.c ++++ b/drivers/net/xen-netback/netback.c +@@ -39,6 +39,7 @@ + #include + + #include ++#include + + #include + #include diff --git a/queue-3.12/xfrm-fix-null-pointer-dereference-when-decoding-sessions.patch b/queue-3.12/xfrm-fix-null-pointer-dereference-when-decoding-sessions.patch new file mode 100644 index 00000000000..c1d6e4834a3 --- /dev/null +++ b/queue-3.12/xfrm-fix-null-pointer-dereference-when-decoding-sessions.patch @@ -0,0 +1,64 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: Steffen Klassert +Date: Wed, 30 Oct 2013 11:16:28 +0100 +Subject: xfrm: Fix null pointer dereference when decoding sessions + +From: Steffen Klassert + +[ Upstream commit 84502b5ef9849a9694673b15c31bd3ac693010ae ] + +On some codepaths the skb does not have a dst entry +when xfrm_decode_session() is called. So check for +a valid skb_dst() before dereferencing the device +interface index. We use 0 as the device index if +there is no valid skb_dst(), or at reverse decoding +we use skb_iif as device interface index. + +Bug was introduced with git commit bafd4bd4dc +("xfrm: Decode sessions with output interface."). + +Reported-by: Meelis Roos +Tested-by: Meelis Roos +Signed-off-by: Steffen Klassert +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/xfrm4_policy.c | 6 +++++- + net/ipv6/xfrm6_policy.c | 6 +++++- + 2 files changed, 10 insertions(+), 2 deletions(-) + +--- a/net/ipv4/xfrm4_policy.c ++++ b/net/ipv4/xfrm4_policy.c +@@ -104,10 +104,14 @@ _decode_session4(struct sk_buff *skb, st + const struct iphdr *iph = ip_hdr(skb); + u8 *xprth = skb_network_header(skb) + iph->ihl * 4; + struct flowi4 *fl4 = &fl->u.ip4; ++ int oif = 0; ++ ++ if (skb_dst(skb)) ++ oif = skb_dst(skb)->dev->ifindex; + + memset(fl4, 0, sizeof(struct flowi4)); + fl4->flowi4_mark = skb->mark; +- fl4->flowi4_oif = skb_dst(skb)->dev->ifindex; ++ fl4->flowi4_oif = reverse ? skb->skb_iif : oif; + + if (!ip_is_fragment(iph)) { + switch (iph->protocol) { +--- a/net/ipv6/xfrm6_policy.c ++++ b/net/ipv6/xfrm6_policy.c +@@ -135,10 +135,14 @@ _decode_session6(struct sk_buff *skb, st + struct ipv6_opt_hdr *exthdr; + const unsigned char *nh = skb_network_header(skb); + u8 nexthdr = nh[IP6CB(skb)->nhoff]; ++ int oif = 0; ++ ++ if (skb_dst(skb)) ++ oif = skb_dst(skb)->dev->ifindex; + + memset(fl6, 0, sizeof(struct flowi6)); + fl6->flowi6_mark = skb->mark; +- fl6->flowi6_oif = skb_dst(skb)->dev->ifindex; ++ fl6->flowi6_oif = reverse ? skb->skb_iif : oif; + + fl6->daddr = reverse ? hdr->saddr : hdr->daddr; + fl6->saddr = reverse ? hdr->daddr : hdr->saddr; diff --git a/queue-3.12/xfrm-release-dst-if-this-dst-is-improper-for-vti-tunnel.patch b/queue-3.12/xfrm-release-dst-if-this-dst-is-improper-for-vti-tunnel.patch new file mode 100644 index 00000000000..bbdf3070073 --- /dev/null +++ b/queue-3.12/xfrm-release-dst-if-this-dst-is-improper-for-vti-tunnel.patch @@ -0,0 +1,33 @@ +From foo@baz Thu Dec 5 16:16:39 PST 2013 +From: "fan.du" +Date: Tue, 19 Nov 2013 16:53:28 +0800 +Subject: xfrm: Release dst if this dst is improper for vti tunnel + +From: "fan.du" + +[ Upstream commit 236c9f84868534c718b6889aa624de64763281f9 ] + +After searching rt by the vti tunnel dst/src parameter, +if this rt has neither attached to any transformation +nor the transformation is not tunnel oriented, this rt +should be released back to ip layer. + +otherwise causing dst memory leakage. + +Signed-off-by: Fan Du +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_vti.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ipv4/ip_vti.c ++++ b/net/ipv4/ip_vti.c +@@ -190,6 +190,7 @@ static netdev_tx_t vti_tunnel_xmit(struc + if (!rt->dst.xfrm || + rt->dst.xfrm->props.mode != XFRM_MODE_TUNNEL) { + dev->stats.tx_carrier_errors++; ++ ip_rt_put(rt); + goto tx_error_icmp; + } + tdev = rt->dst.dev;