From: Greg Kroah-Hartman Date: Sat, 26 Sep 2015 19:21:16 +0000 (-0700) Subject: 3.14-stable patches X-Git-Tag: v4.1.9~11 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=0bc44bdd10c256619604ef3c3f27e88e64d55250;p=thirdparty%2Fkernel%2Fstable-queue.git 3.14-stable patches added patches: bna-fix-interrupts-storm-caused-by-erroneous-packets.patch bonding-correct-the-mac-address-for-follow-fail_over_mac-policy.patch bonding-fix-destruction-of-bond-with-devices-different-from-arphrd_ether.patch bridge-mdb-fix-double-add-notification.patch bridge-mdb-zero-out-the-local-br_ip-variable-before-use.patch inet-frags-fix-defragmented-packet-s-ip-header-for-af_packet.patch ip_tunnel-fix-ipv4-pmtu-check-to-honor-inner-ip-header-df.patch ipv6-lock-socket-in-ip6_datagram_connect.patch ipv6-make-mld-packets-to-only-be-processed-locally.patch isdn-gigaset-reset-tty-receive_room-when-attaching-ser_gigaset.patch net-call-rcu_read_lock-early-in-process_backlog.patch net-clone-skb-before-setting-peeked-flag.patch net-do-not-process-device-backlog-during-unregistration.patch net-fix-skb-csum-races-when-peeking.patch net-fix-skb_set_peeked-use-after-free-bug.patch net-graceful-exit-from-netif_alloc_netdev_queues.patch net-mlx4_core-fix-wrong-index-in-propagating-port-change-event-to-vfs.patch net-pktgen-fix-race-between-pktgen_thread_worker-and-kthread_stop.patch net-tipc-initialize-security-state-for-new-connection-socket.patch netlink-don-t-hold-mutex-in-rcu-callback-when-releasing-mmapd-ring.patch packet-missing-dev_put-in-packet_do_bind.patch rds-fix-an-integer-overflow-test-in-rds_info_getsockopt.patch rtnetlink-verify-ifla_vf_info-attributes-before-passing-them-to-driver.patch subject-net-gso-use-feature-flag-argument-in-all-protocol-gso-handlers.patch udp-fix-dst-races-with-multicast-early-demux.patch --- diff --git a/queue-3.14/bna-fix-interrupts-storm-caused-by-erroneous-packets.patch b/queue-3.14/bna-fix-interrupts-storm-caused-by-erroneous-packets.patch new file mode 100644 index 00000000000..f5d76667906 --- /dev/null +++ b/queue-3.14/bna-fix-interrupts-storm-caused-by-erroneous-packets.patch @@ -0,0 +1,43 @@ +From foo@baz Sat Sep 26 11:19:08 PDT 2015 +From: Ivan Vecera +Date: Thu, 6 Aug 2015 22:48:23 +0200 +Subject: bna: fix interrupts storm caused by erroneous packets + +From: Ivan Vecera + +[ Upstream commit ade4dc3e616e33c80d7e62855fe1b6f9895bc7c3 ] + +The commit "e29aa33 bna: Enable Multi Buffer RX" moved packets counter +increment from the beginning of the NAPI processing loop after the check +for erroneous packets so they are never accounted. This counter is used +to inform firmware about number of processed completions (packets). +As these packets are never acked the firmware fires IRQs for them again +and again. + +Fixes: e29aa33 ("bna: Enable Multi Buffer RX") +Signed-off-by: Ivan Vecera +Acked-by: Rasesh Mody +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/brocade/bna/bnad.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/brocade/bna/bnad.c ++++ b/drivers/net/ethernet/brocade/bna/bnad.c +@@ -674,6 +674,7 @@ bnad_cq_process(struct bnad *bnad, struc + if (!next_cmpl->valid) + break; + } ++ packets++; + + /* TODO: BNA_CQ_EF_LOCAL ? */ + if (unlikely(flags & (BNA_CQ_EF_MAC_ERROR | +@@ -690,7 +691,6 @@ bnad_cq_process(struct bnad *bnad, struc + else + bnad_cq_setup_skb_frags(rcb, skb, sop_ci, nvecs, len); + +- packets++; + rcb->rxq->rx_packets++; + rcb->rxq->rx_bytes += totlen; + ccb->bytes_per_intr += totlen; diff --git a/queue-3.14/bonding-correct-the-mac-address-for-follow-fail_over_mac-policy.patch b/queue-3.14/bonding-correct-the-mac-address-for-follow-fail_over_mac-policy.patch new file mode 100644 index 00000000000..8bb2ba6a2dc --- /dev/null +++ b/queue-3.14/bonding-correct-the-mac-address-for-follow-fail_over_mac-policy.patch @@ -0,0 +1,80 @@ +From foo@baz Sat Sep 26 11:19:08 PDT 2015 +From: dingtianhong +Date: Thu, 16 Jul 2015 16:30:02 +0800 +Subject: bonding: correct the MAC address for "follow" fail_over_mac policy + +From: dingtianhong + +[ Upstream commit a951bc1e6ba58f11df5ed5ddc41311e10f5fd20b ] + +The "follow" fail_over_mac policy is useful for multiport devices that +either become confused or incur a performance penalty when multiple +ports are programmed with the same MAC address, but the same MAC +address still may happened by this steps for this policy: + +1) echo +eth0 > /sys/class/net/bond0/bonding/slaves + bond0 has the same mac address with eth0, it is MAC1. + +2) echo +eth1 > /sys/class/net/bond0/bonding/slaves + eth1 is backup, eth1 has MAC2. + +3) ifconfig eth0 down + eth1 became active slave, bond will swap MAC for eth0 and eth1, + so eth1 has MAC1, and eth0 has MAC2. + +4) ifconfig eth1 down + there is no active slave, and eth1 still has MAC1, eth2 has MAC2. + +5) ifconfig eth0 up + the eth0 became active slave again, the bond set eth0 to MAC1. + +Something wrong here, then if you set eth1 up, the eth0 and eth1 will have the same +MAC address, it will break this policy for ACTIVE_BACKUP mode. + +This patch will fix this problem by finding the old active slave and +swap them MAC address before change active slave. + +Signed-off-by: Ding Tianhong +Tested-by: Nikolay Aleksandrov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_main.c | 20 ++++++++++++++++++++ + 1 file changed, 20 insertions(+) + +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -636,6 +636,23 @@ static void bond_set_dev_addr(struct net + call_netdevice_notifiers(NETDEV_CHANGEADDR, bond_dev); + } + ++static struct slave *bond_get_old_active(struct bonding *bond, ++ struct slave *new_active) ++{ ++ struct slave *slave; ++ struct list_head *iter; ++ ++ bond_for_each_slave(bond, slave, iter) { ++ if (slave == new_active) ++ continue; ++ ++ if (ether_addr_equal(bond->dev->dev_addr, slave->dev->dev_addr)) ++ return slave; ++ } ++ ++ return NULL; ++} ++ + /* + * bond_do_fail_over_mac + * +@@ -672,6 +689,9 @@ static void bond_do_fail_over_mac(struct + + write_unlock_bh(&bond->curr_slave_lock); + ++ if (!old_active) ++ old_active = bond_get_old_active(bond, new_active); ++ + if (old_active) { + memcpy(tmp_mac, new_active->dev->dev_addr, ETH_ALEN); + memcpy(saddr.sa_data, old_active->dev->dev_addr, diff --git a/queue-3.14/bonding-fix-destruction-of-bond-with-devices-different-from-arphrd_ether.patch b/queue-3.14/bonding-fix-destruction-of-bond-with-devices-different-from-arphrd_ether.patch new file mode 100644 index 00000000000..1c93b4a64c0 --- /dev/null +++ b/queue-3.14/bonding-fix-destruction-of-bond-with-devices-different-from-arphrd_ether.patch @@ -0,0 +1,101 @@ +From foo@baz Sat Sep 26 11:19:08 PDT 2015 +From: Nikolay Aleksandrov +Date: Wed, 15 Jul 2015 21:52:51 +0200 +Subject: bonding: fix destruction of bond with devices different from arphrd_ether + +From: Nikolay Aleksandrov + +[ Upstream commit 06f6d1094aa0992432b1e2a0920b0ee86ccd83bf ] + +When the bonding is being unloaded and the netdevice notifier is +unregistered it executes NETDEV_UNREGISTER for each device which should +remove the bond's proc entry but if the device enslaved is not of +ARPHRD_ETHER type and is in front of the bonding, it may execute +bond_release_and_destroy() first which would release the last slave and +destroy the bond device leaving the proc entry and thus we will get the +following error (with dynamic debug on for bond_netdev_event to see the +events order): +[ 908.963051] eql: event: 9 +[ 908.963052] eql: IFF_SLAVE +[ 908.963054] eql: event: 2 +[ 908.963056] eql: IFF_SLAVE +[ 908.963058] eql: event: 6 +[ 908.963059] eql: IFF_SLAVE +[ 908.963110] bond0: Releasing active interface eql +[ 908.976168] bond0: Destroying bond bond0 +[ 908.976266] bond0 (unregistering): Released all slaves +[ 908.984097] ------------[ cut here ]------------ +[ 908.984107] WARNING: CPU: 0 PID: 1787 at fs/proc/generic.c:575 +remove_proc_entry+0x112/0x160() +[ 908.984110] remove_proc_entry: removing non-empty directory +'net/bonding', leaking at least 'bond0' +[ 908.984111] Modules linked in: bonding(-) eql(O) 9p nfsd auth_rpcgss +oid_registry nfs_acl nfs lockd grace fscache sunrpc crct10dif_pclmul +crc32_pclmul crc32c_intel ghash_clmulni_intel ppdev qxl drm_kms_helper +snd_hda_codec_generic aesni_intel ttm aes_x86_64 glue_helper pcspkr lrw +gf128mul ablk_helper cryptd snd_hda_intel virtio_console snd_hda_codec +psmouse serio_raw snd_hwdep snd_hda_core 9pnet_virtio 9pnet evdev joydev +drm virtio_balloon snd_pcm snd_timer snd soundcore i2c_piix4 i2c_core +pvpanic acpi_cpufreq parport_pc parport processor thermal_sys button +autofs4 ext4 crc16 mbcache jbd2 hid_generic usbhid hid sg sr_mod cdrom +ata_generic virtio_blk virtio_net floppy ata_piix e1000 libata ehci_pci +virtio_pci scsi_mod uhci_hcd ehci_hcd virtio_ring virtio usbcore +usb_common [last unloaded: bonding] + +[ 908.984168] CPU: 0 PID: 1787 Comm: rmmod Tainted: G W O +4.2.0-rc2+ #8 +[ 908.984170] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 +[ 908.984172] 0000000000000000 ffffffff81732d41 ffffffff81525b34 +ffff8800358dfda8 +[ 908.984175] ffffffff8106c521 ffff88003595af78 ffff88003595af40 +ffff88003e3a4280 +[ 908.984178] ffffffffa058d040 0000000000000000 ffffffff8106c59a +ffffffff8172ebd0 +[ 908.984181] Call Trace: +[ 908.984188] [] ? dump_stack+0x40/0x50 +[ 908.984193] [] ? warn_slowpath_common+0x81/0xb0 +[ 908.984196] [] ? warn_slowpath_fmt+0x4a/0x50 +[ 908.984199] [] ? remove_proc_entry+0x112/0x160 +[ 908.984205] [] ? bond_destroy_proc_dir+0x26/0x30 +[bonding] +[ 908.984208] [] ? bond_net_exit+0x8e/0xa0 [bonding] +[ 908.984217] [] ? ops_exit_list.isra.4+0x37/0x70 +[ 908.984225] [] ? +unregister_pernet_operations+0x8d/0xd0 +[ 908.984228] [] ? +unregister_pernet_subsys+0x1d/0x30 +[ 908.984232] [] ? bonding_exit+0x23/0xdba [bonding] +[ 908.984236] [] ? SyS_delete_module+0x18a/0x250 +[ 908.984241] [] ? task_work_run+0x89/0xc0 +[ 908.984244] [] ? +entry_SYSCALL_64_fastpath+0x16/0x75 +[ 908.984247] ---[ end trace 7c006ed4abbef24b ]--- + +Thus remove the proc entry manually if bond_release_and_destroy() is +used. Because of the checks in bond_remove_proc_entry() it's not a +problem for a bond device to change namespaces (the bug fixed by the +Fixes commit) but since commit +f9399814927ad ("bonding: Don't allow bond devices to change network +namespaces.") that can't happen anyway. + +Reported-by: Carol Soto +Signed-off-by: Nikolay Aleksandrov +Fixes: a64d49c3dd50 ("bonding: Manage /proc/net/bonding/ entries from + the netdev events") +Tested-by: Carol L Soto +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_main.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -1825,6 +1825,7 @@ static int bond_release_and_destroy(str + bond_dev->priv_flags |= IFF_DISABLE_NETPOLL; + pr_info("%s: destroying bond %s.\n", + bond_dev->name, bond_dev->name); ++ bond_remove_proc_entry(bond); + unregister_netdevice(bond_dev); + } + return ret; diff --git a/queue-3.14/bridge-mdb-fix-double-add-notification.patch b/queue-3.14/bridge-mdb-fix-double-add-notification.patch new file mode 100644 index 00000000000..7556c0e5090 --- /dev/null +++ b/queue-3.14/bridge-mdb-fix-double-add-notification.patch @@ -0,0 +1,41 @@ +From foo@baz Sat Sep 26 11:19:08 PDT 2015 +From: Nikolay Aleksandrov +Date: Mon, 13 Jul 2015 06:36:19 -0700 +Subject: bridge: mdb: fix double add notification + +From: Nikolay Aleksandrov + +[ Upstream commit 5ebc784625ea68a9570d1f70557e7932988cd1b4 ] + +Since the mdb add/del code was introduced there have been 2 br_mdb_notify +calls when doing br_mdb_add() resulting in 2 notifications on each add. + +Example: + Command: bridge mdb add dev br0 port eth1 grp 239.0.0.1 permanent + Before patch: + root@debian:~# bridge monitor all + [MDB]dev br0 port eth1 grp 239.0.0.1 permanent + [MDB]dev br0 port eth1 grp 239.0.0.1 permanent + + After patch: + root@debian:~# bridge monitor all + [MDB]dev br0 port eth1 grp 239.0.0.1 permanent + +Signed-off-by: Nikolay Aleksandrov +Fixes: cfd567543590 ("bridge: add support of adding and deleting mdb entries") +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_mdb.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/net/bridge/br_mdb.c ++++ b/net/bridge/br_mdb.c +@@ -347,7 +347,6 @@ static int br_mdb_add_group(struct net_b + return -ENOMEM; + rcu_assign_pointer(*pp, p); + +- br_mdb_notify(br->dev, port, group, RTM_NEWMDB); + return 0; + } + diff --git a/queue-3.14/bridge-mdb-zero-out-the-local-br_ip-variable-before-use.patch b/queue-3.14/bridge-mdb-zero-out-the-local-br_ip-variable-before-use.patch new file mode 100644 index 00000000000..4e540873a32 --- /dev/null +++ b/queue-3.14/bridge-mdb-zero-out-the-local-br_ip-variable-before-use.patch @@ -0,0 +1,57 @@ +From foo@baz Sat Sep 26 11:19:08 PDT 2015 +From: Nikolay Aleksandrov +Date: Tue, 7 Jul 2015 15:55:56 +0200 +Subject: bridge: mdb: zero out the local br_ip variable before use + +From: Nikolay Aleksandrov + +[ Upstream commit f1158b74e54f2e2462ba5e2f45a118246d9d5b43 ] + +Since commit b0e9a30dd669 ("bridge: Add vlan id to multicast groups") +there's a check in br_ip_equal() for a matching vlan id, but the mdb +functions were not modified to use (or at least zero it) so when an +entry was added it would have a garbage vlan id (from the local br_ip +variable in __br_mdb_add/del) and this would prevent it from being +matched and also deleted. So zero out the whole local ip var to protect +ourselves from future changes and also to fix the current bug, since +there's no vlan id support in the mdb uapi - use always vlan id 0. +Example before patch: +root@debian:~# bridge mdb add dev br0 port eth1 grp 239.0.0.1 permanent +root@debian:~# bridge mdb +dev br0 port eth1 grp 239.0.0.1 permanent +root@debian:~# bridge mdb del dev br0 port eth1 grp 239.0.0.1 permanent +RTNETLINK answers: Invalid argument + +After patch: +root@debian:~# bridge mdb add dev br0 port eth1 grp 239.0.0.1 permanent +root@debian:~# bridge mdb +dev br0 port eth1 grp 239.0.0.1 permanent +root@debian:~# bridge mdb del dev br0 port eth1 grp 239.0.0.1 permanent +root@debian:~# bridge mdb + +Signed-off-by: Nikolay Aleksandrov +Fixes: b0e9a30dd669 ("bridge: Add vlan id to multicast groups") +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_mdb.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/bridge/br_mdb.c ++++ b/net/bridge/br_mdb.c +@@ -370,6 +370,7 @@ static int __br_mdb_add(struct net *net, + if (!p || p->br != br || p->state == BR_STATE_DISABLED) + return -EINVAL; + ++ memset(&ip, 0, sizeof(ip)); + ip.proto = entry->addr.proto; + if (ip.proto == htons(ETH_P_IP)) + ip.u.ip4 = entry->addr.u.ip4; +@@ -416,6 +417,7 @@ static int __br_mdb_del(struct net_bridg + if (!netif_running(br->dev) || br->multicast_disabled) + return -EINVAL; + ++ memset(&ip, 0, sizeof(ip)); + ip.proto = entry->addr.proto; + if (ip.proto == htons(ETH_P_IP)) { + if (timer_pending(&br->ip4_querier.timer)) diff --git a/queue-3.14/inet-frags-fix-defragmented-packet-s-ip-header-for-af_packet.patch b/queue-3.14/inet-frags-fix-defragmented-packet-s-ip-header-for-af_packet.patch new file mode 100644 index 00000000000..cf7493c0553 --- /dev/null +++ b/queue-3.14/inet-frags-fix-defragmented-packet-s-ip-header-for-af_packet.patch @@ -0,0 +1,58 @@ +From foo@baz Sat Sep 26 11:19:08 PDT 2015 +From: Edward Hyunkoo Jee +Date: Tue, 21 Jul 2015 09:43:59 +0200 +Subject: inet: frags: fix defragmented packet's IP header for af_packet + +From: Edward Hyunkoo Jee + +[ Upstream commit 0848f6428ba3a2e42db124d41ac6f548655735bf ] + +When ip_frag_queue() computes positions, it assumes that the passed +sk_buff does not contain L2 headers. + +However, when PACKET_FANOUT_FLAG_DEFRAG is used, IP reassembly +functions can be called on outgoing packets that contain L2 headers. + +Also, IPv4 checksum is not corrected after reassembly. + +Fixes: 7736d33f4262 ("packet: Add pre-defragmentation support for ipv4 fanouts.") +Signed-off-by: Edward Hyunkoo Jee +Signed-off-by: Eric Dumazet +Cc: Willem de Bruijn +Cc: Jerry Chu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_fragment.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/net/ipv4/ip_fragment.c ++++ b/net/ipv4/ip_fragment.c +@@ -357,7 +357,7 @@ static int ip_frag_queue(struct ipq *qp, + ihl = ip_hdrlen(skb); + + /* Determine the position of this fragment. */ +- end = offset + skb->len - ihl; ++ end = offset + skb->len - skb_network_offset(skb) - ihl; + err = -EINVAL; + + /* Is this the final fragment? */ +@@ -387,7 +387,7 @@ static int ip_frag_queue(struct ipq *qp, + goto err; + + err = -ENOMEM; +- if (pskb_pull(skb, ihl) == NULL) ++ if (!pskb_pull(skb, skb_network_offset(skb) + ihl)) + goto err; + + err = pskb_trim_rcsum(skb, end - offset); +@@ -628,6 +628,9 @@ static int ip_frag_reasm(struct ipq *qp, + iph->frag_off = qp->q.max_size ? htons(IP_DF) : 0; + iph->tot_len = htons(len); + iph->tos |= ecn; ++ ++ ip_send_check(iph); ++ + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); + qp->q.fragments = NULL; + qp->q.fragments_tail = NULL; diff --git a/queue-3.14/ip_tunnel-fix-ipv4-pmtu-check-to-honor-inner-ip-header-df.patch b/queue-3.14/ip_tunnel-fix-ipv4-pmtu-check-to-honor-inner-ip-header-df.patch new file mode 100644 index 00000000000..643ace743a7 --- /dev/null +++ b/queue-3.14/ip_tunnel-fix-ipv4-pmtu-check-to-honor-inner-ip-header-df.patch @@ -0,0 +1,62 @@ +From foo@baz Sat Sep 26 11:19:08 PDT 2015 +From: =?UTF-8?q?Timo=20Ter=C3=A4s?= +Date: Tue, 7 Jul 2015 08:34:13 +0300 +Subject: ip_tunnel: fix ipv4 pmtu check to honor inner ip header df +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: =?UTF-8?q?Timo=20Ter=C3=A4s?= + +[ Upstream commit fc24f2b2094366da8786f59f2606307e934cea17 ] + +Frag needed should be sent only if the inner header asked +to not fragment. Currently fragmentation is broken if the +tunnel has df set, but df was not asked in the original +packet. The tunnel's df needs to be still checked to update +internally the pmtu cache. + +Commit 23a3647bc4f93bac broke it, and this commit fixes +the ipv4 df check back to the way it was. + +Fixes: 23a3647bc4f93bac ("ip_tunnels: Use skb-len to PMTU check.") +Cc: Pravin B Shelar +Signed-off-by: Timo Teräs +Acked-by: Pravin B Shelar +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_tunnel.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/net/ipv4/ip_tunnel.c ++++ b/net/ipv4/ip_tunnel.c +@@ -484,7 +484,8 @@ drop: + EXPORT_SYMBOL_GPL(ip_tunnel_rcv); + + static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, +- struct rtable *rt, __be16 df) ++ struct rtable *rt, __be16 df, ++ const struct iphdr *inner_iph) + { + struct ip_tunnel *tunnel = netdev_priv(dev); + int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len; +@@ -501,7 +502,8 @@ static int tnl_update_pmtu(struct net_de + + if (skb->protocol == htons(ETH_P_IP)) { + if (!skb_is_gso(skb) && +- (df & htons(IP_DF)) && mtu < pkt_size) { ++ (inner_iph->frag_off & htons(IP_DF)) && ++ mtu < pkt_size) { + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); + return -E2BIG; +@@ -632,7 +634,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, + goto tx_error; + } + +- if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) { ++ if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) { + ip_rt_put(rt); + goto tx_error; + } diff --git a/queue-3.14/ipv6-lock-socket-in-ip6_datagram_connect.patch b/queue-3.14/ipv6-lock-socket-in-ip6_datagram_connect.patch new file mode 100644 index 00000000000..759ecafc815 --- /dev/null +++ b/queue-3.14/ipv6-lock-socket-in-ip6_datagram_connect.patch @@ -0,0 +1,126 @@ +From foo@baz Sat Sep 26 11:19:08 PDT 2015 +From: Eric Dumazet +Date: Tue, 14 Jul 2015 08:10:22 +0200 +Subject: ipv6: lock socket in ip6_datagram_connect() + +From: Eric Dumazet + +[ Upstream commit 03645a11a570d52e70631838cb786eb4253eb463 ] + +ip6_datagram_connect() is doing a lot of socket changes without +socket being locked. + +This looks wrong, at least for udp_lib_rehash() which could corrupt +lists because of concurrent udp_sk(sk)->udp_portaddr_hash accesses. + +Signed-off-by: Eric Dumazet +Acked-by: Herbert Xu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/ip.h | 1 + + net/ipv4/datagram.c | 16 ++++++++++++---- + net/ipv6/datagram.c | 20 +++++++++++++++----- + 3 files changed, 28 insertions(+), 9 deletions(-) + +--- a/include/net/ip.h ++++ b/include/net/ip.h +@@ -154,6 +154,7 @@ static inline __u8 get_rtconn_flags(stru + } + + /* datagram.c */ ++int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); + int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); + + void ip4_datagram_release_cb(struct sock *sk); +--- a/net/ipv4/datagram.c ++++ b/net/ipv4/datagram.c +@@ -20,7 +20,7 @@ + #include + #include + +-int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) ++int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) + { + struct inet_sock *inet = inet_sk(sk); + struct sockaddr_in *usin = (struct sockaddr_in *) uaddr; +@@ -39,8 +39,6 @@ int ip4_datagram_connect(struct sock *sk + + sk_dst_reset(sk); + +- lock_sock(sk); +- + oif = sk->sk_bound_dev_if; + saddr = inet->inet_saddr; + if (ipv4_is_multicast(usin->sin_addr.s_addr)) { +@@ -81,9 +79,19 @@ int ip4_datagram_connect(struct sock *sk + sk_dst_set(sk, &rt->dst); + err = 0; + out: +- release_sock(sk); + return err; + } ++EXPORT_SYMBOL(__ip4_datagram_connect); ++ ++int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) ++{ ++ int res; ++ ++ lock_sock(sk); ++ res = __ip4_datagram_connect(sk, uaddr, addr_len); ++ release_sock(sk); ++ return res; ++} + EXPORT_SYMBOL(ip4_datagram_connect); + + /* Because UDP xmit path can manipulate sk_dst_cache without holding +--- a/net/ipv6/datagram.c ++++ b/net/ipv6/datagram.c +@@ -40,7 +40,7 @@ static bool ipv6_mapped_addr_any(const s + return ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0); + } + +-int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) ++static int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) + { + struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; + struct inet_sock *inet = inet_sk(sk); +@@ -56,7 +56,7 @@ int ip6_datagram_connect(struct sock *sk + if (usin->sin6_family == AF_INET) { + if (__ipv6_only_sock(sk)) + return -EAFNOSUPPORT; +- err = ip4_datagram_connect(sk, uaddr, addr_len); ++ err = __ip4_datagram_connect(sk, uaddr, addr_len); + goto ipv4_connected; + } + +@@ -98,9 +98,9 @@ int ip6_datagram_connect(struct sock *sk + sin.sin_addr.s_addr = daddr->s6_addr32[3]; + sin.sin_port = usin->sin6_port; + +- err = ip4_datagram_connect(sk, +- (struct sockaddr *) &sin, +- sizeof(sin)); ++ err = __ip4_datagram_connect(sk, ++ (struct sockaddr *) &sin, ++ sizeof(sin)); + + ipv4_connected: + if (err) +@@ -203,6 +203,16 @@ out: + fl6_sock_release(flowlabel); + return err; + } ++ ++int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) ++{ ++ int res; ++ ++ lock_sock(sk); ++ res = __ip6_datagram_connect(sk, uaddr, addr_len); ++ release_sock(sk); ++ return res; ++} + EXPORT_SYMBOL_GPL(ip6_datagram_connect); + + int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *uaddr, diff --git a/queue-3.14/ipv6-make-mld-packets-to-only-be-processed-locally.patch b/queue-3.14/ipv6-make-mld-packets-to-only-be-processed-locally.patch new file mode 100644 index 00000000000..26384faa1b2 --- /dev/null +++ b/queue-3.14/ipv6-make-mld-packets-to-only-be-processed-locally.patch @@ -0,0 +1,40 @@ +From foo@baz Sat Sep 26 11:19:08 PDT 2015 +From: Angga +Date: Fri, 3 Jul 2015 14:40:52 +1200 +Subject: ipv6: Make MLD packets to only be processed locally + +From: Angga + +[ Upstream commit 4c938d22c88a9ddccc8c55a85e0430e9c62b1ac5 ] + +Before commit daad151263cf ("ipv6: Make ipv6_is_mld() inline and use it +from ip6_mc_input().") MLD packets were only processed locally. After the +change, a copy of MLD packet goes through ip6_mr_input, causing +MRT6MSG_NOCACHE message to be generated to user space. + +Make MLD packet only processed locally. + +Fixes: daad151263cf ("ipv6: Make ipv6_is_mld() inline and use it from ip6_mc_input().") +Signed-off-by: Hermin Anggawijaya +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_input.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/net/ipv6/ip6_input.c ++++ b/net/ipv6/ip6_input.c +@@ -329,10 +329,10 @@ int ip6_mc_input(struct sk_buff *skb) + if (offset < 0) + goto out; + +- if (!ipv6_is_mld(skb, nexthdr, offset)) +- goto out; ++ if (ipv6_is_mld(skb, nexthdr, offset)) ++ deliver = true; + +- deliver = true; ++ goto out; + } + /* unknown RA - process it normally */ + } diff --git a/queue-3.14/isdn-gigaset-reset-tty-receive_room-when-attaching-ser_gigaset.patch b/queue-3.14/isdn-gigaset-reset-tty-receive_room-when-attaching-ser_gigaset.patch new file mode 100644 index 00000000000..060e02174a6 --- /dev/null +++ b/queue-3.14/isdn-gigaset-reset-tty-receive_room-when-attaching-ser_gigaset.patch @@ -0,0 +1,52 @@ +From foo@baz Sat Sep 26 11:19:08 PDT 2015 +From: Tilman Schmidt +Date: Tue, 14 Jul 2015 00:37:13 +0200 +Subject: isdn/gigaset: reset tty->receive_room when attaching ser_gigaset + +From: Tilman Schmidt + +[ Upstream commit fd98e9419d8d622a4de91f76b306af6aa627aa9c ] + +Commit 79901317ce80 ("n_tty: Don't flush buffer when closing ldisc"), +first merged in kernel release 3.10, caused the following regression +in the Gigaset M101 driver: + +Before that commit, when closing the N_TTY line discipline in +preparation to switching to N_GIGASET_M101, receive_room would be +reset to a non-zero value by the call to n_tty_flush_buffer() in +n_tty's close method. With the removal of that call, receive_room +might be left at zero, blocking data reception on the serial line. + +The present patch fixes that regression by setting receive_room +to an appropriate value in the ldisc open method. + +Fixes: 79901317ce80 ("n_tty: Don't flush buffer when closing ldisc") +Signed-off-by: Tilman Schmidt +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/isdn/gigaset/ser-gigaset.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +--- a/drivers/isdn/gigaset/ser-gigaset.c ++++ b/drivers/isdn/gigaset/ser-gigaset.c +@@ -524,9 +524,18 @@ gigaset_tty_open(struct tty_struct *tty) + cs->hw.ser->tty = tty; + atomic_set(&cs->hw.ser->refcnt, 1); + init_completion(&cs->hw.ser->dead_cmp); +- + tty->disc_data = cs; + ++ /* Set the amount of data we're willing to receive per call ++ * from the hardware driver to half of the input buffer size ++ * to leave some reserve. ++ * Note: We don't do flow control towards the hardware driver. ++ * If more data is received than will fit into the input buffer, ++ * it will be dropped and an error will be logged. This should ++ * never happen as the device is slow and the buffer size ample. ++ */ ++ tty->receive_room = RBUFSIZE/2; ++ + /* OK.. Initialization of the datastructures and the HW is done.. Now + * startup system and notify the LL that we are ready to run + */ diff --git a/queue-3.14/net-call-rcu_read_lock-early-in-process_backlog.patch b/queue-3.14/net-call-rcu_read_lock-early-in-process_backlog.patch new file mode 100644 index 00000000000..c205f310194 --- /dev/null +++ b/queue-3.14/net-call-rcu_read_lock-early-in-process_backlog.patch @@ -0,0 +1,151 @@ +From foo@baz Sat Sep 26 11:19:08 PDT 2015 +From: Julian Anastasov +Date: Thu, 9 Jul 2015 09:59:10 +0300 +Subject: net: call rcu_read_lock early in process_backlog + +From: Julian Anastasov + +[ Upstream commit 2c17d27c36dcce2b6bf689f41a46b9e909877c21 ] + +Incoming packet should be either in backlog queue or +in RCU read-side section. Otherwise, the final sequence of +flush_backlog() and synchronize_net() may miss packets +that can run without device reference: + +CPU 1 CPU 2 + skb->dev: no reference + process_backlog:__skb_dequeue + process_backlog:local_irq_enable + +on_each_cpu for +flush_backlog => IPI(hardirq): flush_backlog + - packet not found in backlog + + CPU delayed ... +synchronize_net +- no ongoing RCU +read-side sections + +netdev_run_todo, +rcu_barrier: no +ongoing callbacks + __netif_receive_skb_core:rcu_read_lock + - too late +free dev + process packet for freed dev + +Fixes: 6e583ce5242f ("net: eliminate refcounting in backlog queue") +Cc: Eric W. Biederman +Cc: Stephen Hemminger +Signed-off-by: Julian Anastasov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 29 ++++++++++++++--------------- + 1 file changed, 14 insertions(+), 15 deletions(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -3554,8 +3554,6 @@ static int __netif_receive_skb_core(stru + + pt_prev = NULL; + +- rcu_read_lock(); +- + another_round: + skb->skb_iif = skb->dev->ifindex; + +@@ -3565,7 +3563,7 @@ another_round: + skb->protocol == cpu_to_be16(ETH_P_8021AD)) { + skb = skb_vlan_untag(skb); + if (unlikely(!skb)) +- goto unlock; ++ goto out; + } + + #ifdef CONFIG_NET_CLS_ACT +@@ -3590,7 +3588,7 @@ skip_taps: + #ifdef CONFIG_NET_CLS_ACT + skb = handle_ing(skb, &pt_prev, &ret, orig_dev); + if (!skb) +- goto unlock; ++ goto out; + ncls: + #endif + +@@ -3605,7 +3603,7 @@ ncls: + if (vlan_do_receive(&skb)) + goto another_round; + else if (unlikely(!skb)) +- goto unlock; ++ goto out; + } + + rx_handler = rcu_dereference(skb->dev->rx_handler); +@@ -3617,7 +3615,7 @@ ncls: + switch (rx_handler(&skb)) { + case RX_HANDLER_CONSUMED: + ret = NET_RX_SUCCESS; +- goto unlock; ++ goto out; + case RX_HANDLER_ANOTHER: + goto another_round; + case RX_HANDLER_EXACT: +@@ -3669,8 +3667,6 @@ drop: + ret = NET_RX_DROP; + } + +-unlock: +- rcu_read_unlock(); + out: + return ret; + } +@@ -3702,29 +3698,30 @@ static int __netif_receive_skb(struct sk + + static int netif_receive_skb_internal(struct sk_buff *skb) + { ++ int ret; ++ + net_timestamp_check(netdev_tstamp_prequeue, skb); + + if (skb_defer_rx_timestamp(skb)) + return NET_RX_SUCCESS; + ++ rcu_read_lock(); ++ + #ifdef CONFIG_RPS + if (static_key_false(&rps_needed)) { + struct rps_dev_flow voidflow, *rflow = &voidflow; +- int cpu, ret; +- +- rcu_read_lock(); +- +- cpu = get_rps_cpu(skb->dev, skb, &rflow); ++ int cpu = get_rps_cpu(skb->dev, skb, &rflow); + + if (cpu >= 0) { + ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); + rcu_read_unlock(); + return ret; + } +- rcu_read_unlock(); + } + #endif +- return __netif_receive_skb(skb); ++ ret = __netif_receive_skb(skb); ++ rcu_read_unlock(); ++ return ret; + } + + /** +@@ -4185,8 +4182,10 @@ static int process_backlog(struct napi_s + unsigned int qlen; + + while ((skb = __skb_dequeue(&sd->process_queue))) { ++ rcu_read_lock(); + local_irq_enable(); + __netif_receive_skb(skb); ++ rcu_read_unlock(); + local_irq_disable(); + input_queue_head_incr(sd); + if (++work >= quota) { diff --git a/queue-3.14/net-clone-skb-before-setting-peeked-flag.patch b/queue-3.14/net-clone-skb-before-setting-peeked-flag.patch new file mode 100644 index 00000000000..e42fc3c4f1a --- /dev/null +++ b/queue-3.14/net-clone-skb-before-setting-peeked-flag.patch @@ -0,0 +1,108 @@ +From foo@baz Sat Sep 26 11:19:08 PDT 2015 +From: Herbert Xu +Date: Mon, 13 Jul 2015 16:04:13 +0800 +Subject: net: Clone skb before setting peeked flag + +From: Herbert Xu + +[ Upstream commit 738ac1ebb96d02e0d23bc320302a6ea94c612dec ] + +Shared skbs must not be modified and this is crucial for broadcast +and/or multicast paths where we use it as an optimisation to avoid +unnecessary cloning. + +The function skb_recv_datagram breaks this rule by setting peeked +without cloning the skb first. This causes funky races which leads +to double-free. + +This patch fixes this by cloning the skb and replacing the skb +in the list when setting skb->peeked. + +Fixes: a59322be07c9 ("[UDP]: Only increment counter on first peek/recv") +Reported-by: Konstantin Khlebnikov +Signed-off-by: Herbert Xu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/datagram.c | 41 ++++++++++++++++++++++++++++++++++++++--- + 1 file changed, 38 insertions(+), 3 deletions(-) + +--- a/net/core/datagram.c ++++ b/net/core/datagram.c +@@ -130,6 +130,35 @@ out_noerr: + goto out; + } + ++static int skb_set_peeked(struct sk_buff *skb) ++{ ++ struct sk_buff *nskb; ++ ++ if (skb->peeked) ++ return 0; ++ ++ /* We have to unshare an skb before modifying it. */ ++ if (!skb_shared(skb)) ++ goto done; ++ ++ nskb = skb_clone(skb, GFP_ATOMIC); ++ if (!nskb) ++ return -ENOMEM; ++ ++ skb->prev->next = nskb; ++ skb->next->prev = nskb; ++ nskb->prev = skb->prev; ++ nskb->next = skb->next; ++ ++ consume_skb(skb); ++ skb = nskb; ++ ++done: ++ skb->peeked = 1; ++ ++ return 0; ++} ++ + /** + * __skb_recv_datagram - Receive a datagram skbuff + * @sk: socket +@@ -164,7 +193,9 @@ out_noerr: + struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, + int *peeked, int *off, int *err) + { ++ struct sk_buff_head *queue = &sk->sk_receive_queue; + struct sk_buff *skb, *last; ++ unsigned long cpu_flags; + long timeo; + /* + * Caller is allowed not to check sk->sk_err before skb_recv_datagram() +@@ -183,8 +214,6 @@ struct sk_buff *__skb_recv_datagram(stru + * Look at current nfs client by the way... + * However, this function was correct in any case. 8) + */ +- unsigned long cpu_flags; +- struct sk_buff_head *queue = &sk->sk_receive_queue; + int _off = *off; + + last = (struct sk_buff *)queue; +@@ -198,7 +227,11 @@ struct sk_buff *__skb_recv_datagram(stru + _off -= skb->len; + continue; + } +- skb->peeked = 1; ++ ++ error = skb_set_peeked(skb); ++ if (error) ++ goto unlock_err; ++ + atomic_inc(&skb->users); + } else + __skb_unlink(skb, queue); +@@ -222,6 +255,8 @@ struct sk_buff *__skb_recv_datagram(stru + + return NULL; + ++unlock_err: ++ spin_unlock_irqrestore(&queue->lock, cpu_flags); + no_packet: + *err = error; + return NULL; diff --git a/queue-3.14/net-do-not-process-device-backlog-during-unregistration.patch b/queue-3.14/net-do-not-process-device-backlog-during-unregistration.patch new file mode 100644 index 00000000000..7a9e56bd510 --- /dev/null +++ b/queue-3.14/net-do-not-process-device-backlog-during-unregistration.patch @@ -0,0 +1,85 @@ +From foo@baz Sat Sep 26 11:19:08 PDT 2015 +From: Julian Anastasov +Date: Thu, 9 Jul 2015 09:59:09 +0300 +Subject: net: do not process device backlog during unregistration + +From: Julian Anastasov + +[ Upstream commit e9e4dd3267d0c5234c5c0f47440456b10875dec9 ] + +commit 381c759d9916 ("ipv4: Avoid crashing in ip_error") +fixes a problem where processed packet comes from device +with destroyed inetdev (dev->ip_ptr). This is not expected +because inetdev_destroy is called in NETDEV_UNREGISTER +phase and packets should not be processed after +dev_close_many() and synchronize_net(). Above fix is still +required because inetdev_destroy can be called for other +reasons. But it shows the real problem: backlog can keep +packets for long time and they do not hold reference to +device. Such packets are then delivered to upper levels +at the same time when device is unregistered. +Calling flush_backlog after NETDEV_UNREGISTER_FINAL still +accounts all packets from backlog but before that some packets +continue to be delivered to upper levels long after the +synchronize_net call which is supposed to wait the last +ones. Also, as Eric pointed out, processed packets, mostly +from other devices, can continue to add new packets to backlog. + +Fix the problem by moving flush_backlog early, after the +device driver is stopped and before the synchronize_net() call. +Then use netif_running check to make sure we do not add more +packets to backlog. We have to do it in enqueue_to_backlog +context when the local IRQ is disabled. As result, after the +flush_backlog and synchronize_net sequence all packets +should be accounted. + +Thanks to Eric W. Biederman for the test script and his +valuable feedback! + +Reported-by: Vittorio Gambaletta +Fixes: 6e583ce5242f ("net: eliminate refcounting in backlog queue") +Cc: Eric W. Biederman +Cc: Stephen Hemminger +Signed-off-by: Julian Anastasov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -3214,6 +3214,8 @@ static int enqueue_to_backlog(struct sk_ + local_irq_save(flags); + + rps_lock(sd); ++ if (!netif_running(skb->dev)) ++ goto drop; + qlen = skb_queue_len(&sd->input_pkt_queue); + if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) { + if (skb_queue_len(&sd->input_pkt_queue)) { +@@ -3235,6 +3237,7 @@ enqueue: + goto enqueue; + } + ++drop: + sd->dropped++; + rps_unlock(sd); + +@@ -5694,6 +5697,7 @@ static void rollback_registered_many(str + unlist_netdevice(dev); + + dev->reg_state = NETREG_UNREGISTERING; ++ on_each_cpu(flush_backlog, dev, 1); + } + + synchronize_net(); +@@ -6310,8 +6314,6 @@ void netdev_run_todo(void) + + dev->reg_state = NETREG_UNREGISTERED; + +- on_each_cpu(flush_backlog, dev, 1); +- + netdev_wait_allrefs(dev); + + /* paranoia */ diff --git a/queue-3.14/net-fix-skb-csum-races-when-peeking.patch b/queue-3.14/net-fix-skb-csum-races-when-peeking.patch new file mode 100644 index 00000000000..86d73d5d174 --- /dev/null +++ b/queue-3.14/net-fix-skb-csum-races-when-peeking.patch @@ -0,0 +1,41 @@ +From foo@baz Sat Sep 26 11:19:08 PDT 2015 +From: Herbert Xu +Date: Mon, 13 Jul 2015 20:01:42 +0800 +Subject: net: Fix skb csum races when peeking + +From: Herbert Xu + +[ Upstream commit 89c22d8c3b278212eef6a8cc66b570bc840a6f5a ] + +When we calculate the checksum on the recv path, we store the +result in the skb as an optimisation in case we need the checksum +again down the line. + +This is in fact bogus for the MSG_PEEK case as this is done without +any locking. So multiple threads can peek and then store the result +to the same skb, potentially resulting in bogus skb states. + +This patch fixes this by only storing the result if the skb is not +shared. This preserves the optimisations for the few cases where +it can be done safely due to locking or other reasons, e.g., SIOCINQ. + +Signed-off-by: Herbert Xu +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/datagram.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/core/datagram.c ++++ b/net/core/datagram.c +@@ -777,7 +777,8 @@ __sum16 __skb_checksum_complete_head(str + if (likely(!sum)) { + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) + netdev_rx_csum_fault(skb->dev); +- skb->ip_summed = CHECKSUM_UNNECESSARY; ++ if (!skb_shared(skb)) ++ skb->ip_summed = CHECKSUM_UNNECESSARY; + } + return sum; + } diff --git a/queue-3.14/net-fix-skb_set_peeked-use-after-free-bug.patch b/queue-3.14/net-fix-skb_set_peeked-use-after-free-bug.patch new file mode 100644 index 00000000000..4d24642a3e3 --- /dev/null +++ b/queue-3.14/net-fix-skb_set_peeked-use-after-free-bug.patch @@ -0,0 +1,76 @@ +From foo@baz Sat Sep 26 11:19:08 PDT 2015 +From: Herbert Xu +Date: Tue, 4 Aug 2015 15:42:47 +0800 +Subject: net: Fix skb_set_peeked use-after-free bug + +From: Herbert Xu + +[ Upstream commit a0a2a6602496a45ae838a96db8b8173794b5d398 ] + +The commit 738ac1ebb96d02e0d23bc320302a6ea94c612dec ("net: Clone +skb before setting peeked flag") introduced a use-after-free bug +in skb_recv_datagram. This is because skb_set_peeked may create +a new skb and free the existing one. As it stands the caller will +continue to use the old freed skb. + +This patch fixes it by making skb_set_peeked return the new skb +(or the old one if unchanged). + +Fixes: 738ac1ebb96d ("net: Clone skb before setting peeked flag") +Reported-by: Brenden Blanco +Signed-off-by: Herbert Xu +Tested-by: Brenden Blanco +Reviewed-by: Konstantin Khlebnikov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/datagram.c | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) + +--- a/net/core/datagram.c ++++ b/net/core/datagram.c +@@ -130,12 +130,12 @@ out_noerr: + goto out; + } + +-static int skb_set_peeked(struct sk_buff *skb) ++static struct sk_buff *skb_set_peeked(struct sk_buff *skb) + { + struct sk_buff *nskb; + + if (skb->peeked) +- return 0; ++ return skb; + + /* We have to unshare an skb before modifying it. */ + if (!skb_shared(skb)) +@@ -143,7 +143,7 @@ static int skb_set_peeked(struct sk_buff + + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) +- return -ENOMEM; ++ return ERR_PTR(-ENOMEM); + + skb->prev->next = nskb; + skb->next->prev = nskb; +@@ -156,7 +156,7 @@ static int skb_set_peeked(struct sk_buff + done: + skb->peeked = 1; + +- return 0; ++ return skb; + } + + /** +@@ -228,8 +228,9 @@ struct sk_buff *__skb_recv_datagram(stru + continue; + } + +- error = skb_set_peeked(skb); +- if (error) ++ skb = skb_set_peeked(skb); ++ error = PTR_ERR(skb); ++ if (IS_ERR(skb)) + goto unlock_err; + + atomic_inc(&skb->users); diff --git a/queue-3.14/net-graceful-exit-from-netif_alloc_netdev_queues.patch b/queue-3.14/net-graceful-exit-from-netif_alloc_netdev_queues.patch new file mode 100644 index 00000000000..7f9c87ccd97 --- /dev/null +++ b/queue-3.14/net-graceful-exit-from-netif_alloc_netdev_queues.patch @@ -0,0 +1,36 @@ +From foo@baz Sat Sep 26 11:19:08 PDT 2015 +From: Eric Dumazet +Date: Mon, 6 Jul 2015 17:13:26 +0200 +Subject: net: graceful exit from netif_alloc_netdev_queues() + +From: Eric Dumazet + +[ Upstream commit d339727c2b1a10f25e6636670ab6e1841170e328 ] + +User space can crash kernel with + +ip link add ifb10 numtxqueues 100000 type ifb + +We must replace a BUG_ON() by proper test and return -EINVAL for +crazy values. + +Fixes: 60877a32bce00 ("net: allow large number of tx queues") +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -5951,7 +5951,8 @@ static int netif_alloc_netdev_queues(str + struct netdev_queue *tx; + size_t sz = count * sizeof(*tx); + +- BUG_ON(count < 1 || count > 0xffff); ++ if (count < 1 || count > 0xffff) ++ return -EINVAL; + + tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); + if (!tx) { diff --git a/queue-3.14/net-mlx4_core-fix-wrong-index-in-propagating-port-change-event-to-vfs.patch b/queue-3.14/net-mlx4_core-fix-wrong-index-in-propagating-port-change-event-to-vfs.patch new file mode 100644 index 00000000000..451dd6258da --- /dev/null +++ b/queue-3.14/net-mlx4_core-fix-wrong-index-in-propagating-port-change-event-to-vfs.patch @@ -0,0 +1,45 @@ +From foo@baz Sat Sep 26 11:19:08 PDT 2015 +From: Jack Morgenstein +Date: Wed, 22 Jul 2015 16:53:47 +0300 +Subject: net/mlx4_core: Fix wrong index in propagating port change event to VFs + +From: Jack Morgenstein + +[ Upstream commit 1c1bf34951e8d17941bf708d1901c47e81b15d55 ] + +The port-change event processing in procedure mlx4_eq_int() uses "slave" +as the vf_oper array index. Since the value of "slave" is the PF function +index, the result is that the PF link state is used for deciding to +propagate the event for all the VFs. The VF link state should be used, +so the VF function index should be used here. + +Fixes: 948e306d7d64 ('net/mlx4: Add VF link state support') +Signed-off-by: Jack Morgenstein +Signed-off-by: Matan Barak +Signed-off-by: Or Gerlitz +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx4/eq.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx4/eq.c ++++ b/drivers/net/ethernet/mellanox/mlx4/eq.c +@@ -557,7 +557,7 @@ static int mlx4_eq_int(struct mlx4_dev * + mlx4_dbg(dev, "%s: Sending MLX4_PORT_CHANGE_SUBTYPE_DOWN" + " to slave: %d, port:%d\n", + __func__, i, port); +- s_info = &priv->mfunc.master.vf_oper[slave].vport[port].state; ++ s_info = &priv->mfunc.master.vf_oper[i].vport[port].state; + if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state) + mlx4_slave_event(dev, i, eqe); + } else { /* IB port */ +@@ -583,7 +583,7 @@ static int mlx4_eq_int(struct mlx4_dev * + for (i = 0; i < dev->num_slaves; i++) { + if (i == mlx4_master_func_num(dev)) + continue; +- s_info = &priv->mfunc.master.vf_oper[slave].vport[port].state; ++ s_info = &priv->mfunc.master.vf_oper[slave].vport[i].state; + if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state) + mlx4_slave_event(dev, i, eqe); + } diff --git a/queue-3.14/net-pktgen-fix-race-between-pktgen_thread_worker-and-kthread_stop.patch b/queue-3.14/net-pktgen-fix-race-between-pktgen_thread_worker-and-kthread_stop.patch new file mode 100644 index 00000000000..84ba1910300 --- /dev/null +++ b/queue-3.14/net-pktgen-fix-race-between-pktgen_thread_worker-and-kthread_stop.patch @@ -0,0 +1,35 @@ +From foo@baz Sat Sep 26 11:19:08 PDT 2015 +From: Oleg Nesterov +Date: Wed, 8 Jul 2015 21:42:11 +0200 +Subject: net: pktgen: fix race between pktgen_thread_worker() and kthread_stop() + +From: Oleg Nesterov + +[ Upstream commit fecdf8be2d91e04b0a9a4f79ff06499a36f5d14f ] + +pktgen_thread_worker() is obviously racy, kthread_stop() can come +between the kthread_should_stop() check and set_current_state(). + +Signed-off-by: Oleg Nesterov +Reported-by: Jan Stancek +Reported-by: Marcelo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/pktgen.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/core/pktgen.c ++++ b/net/core/pktgen.c +@@ -3464,8 +3464,10 @@ static int pktgen_thread_worker(void *ar + pktgen_rem_thread(t); + + /* Wait for kthread_stop */ +- while (!kthread_should_stop()) { ++ for (;;) { + set_current_state(TASK_INTERRUPTIBLE); ++ if (kthread_should_stop()) ++ break; + schedule(); + } + __set_current_state(TASK_RUNNING); diff --git a/queue-3.14/net-tipc-initialize-security-state-for-new-connection-socket.patch b/queue-3.14/net-tipc-initialize-security-state-for-new-connection-socket.patch new file mode 100644 index 00000000000..dca020ab0f9 --- /dev/null +++ b/queue-3.14/net-tipc-initialize-security-state-for-new-connection-socket.patch @@ -0,0 +1,42 @@ +From foo@baz Sat Sep 26 11:19:08 PDT 2015 +From: Stephen Smalley +Date: Tue, 7 Jul 2015 09:43:45 -0400 +Subject: net/tipc: initialize security state for new connection socket + +From: Stephen Smalley + +[ Upstream commit fdd75ea8df370f206a8163786e7470c1277a5064 ] + +Calling connect() with an AF_TIPC socket would trigger a series +of error messages from SELinux along the lines of: +SELinux: Invalid class 0 +type=AVC msg=audit(1434126658.487:34500): avc: denied { } + for pid=292 comm="kworker/u16:5" scontext=system_u:system_r:kernel_t:s0 + tcontext=system_u:object_r:unlabeled_t:s0 tclass= + permissive=0 + +This was due to a failure to initialize the security state of the new +connection sock by the tipc code, leaving it with junk in the security +class field and an unlabeled secid. Add a call to security_sk_clone() +to inherit the security state from the parent socket. + +Reported-by: Tim Shearer +Signed-off-by: Stephen Smalley +Acked-by: Paul Moore +Acked-by: Ying Xue +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/tipc/socket.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/tipc/socket.c ++++ b/net/tipc/socket.c +@@ -1681,6 +1681,7 @@ static int accept(struct socket *sock, s + res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1); + if (res) + goto exit; ++ security_sk_clone(sock->sk, new_sock->sk); + + new_sk = new_sock->sk; + new_tsock = tipc_sk(new_sk); diff --git a/queue-3.14/netlink-don-t-hold-mutex-in-rcu-callback-when-releasing-mmapd-ring.patch b/queue-3.14/netlink-don-t-hold-mutex-in-rcu-callback-when-releasing-mmapd-ring.patch new file mode 100644 index 00000000000..e4382f80550 --- /dev/null +++ b/queue-3.14/netlink-don-t-hold-mutex-in-rcu-callback-when-releasing-mmapd-ring.patch @@ -0,0 +1,210 @@ +From foo@baz Sat Sep 26 11:19:08 PDT 2015 +From: Florian Westphal +Date: Tue, 21 Jul 2015 16:33:50 +0200 +Subject: netlink: don't hold mutex in rcu callback when releasing mmapd ring + +From: Florian Westphal + +[ Upstream commit 0470eb99b4721586ccac954faac3fa4472da0845 ] + +Kirill A. Shutemov says: + +This simple test-case trigers few locking asserts in kernel: + +int main(int argc, char **argv) +{ + unsigned int block_size = 16 * 4096; + struct nl_mmap_req req = { + .nm_block_size = block_size, + .nm_block_nr = 64, + .nm_frame_size = 16384, + .nm_frame_nr = 64 * block_size / 16384, + }; + unsigned int ring_size; + int fd; + + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + if (setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, &req, sizeof(req)) < 0) + exit(1); + if (setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, &req, sizeof(req)) < 0) + exit(1); + + ring_size = req.nm_block_nr * req.nm_block_size; + mmap(NULL, 2 * ring_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + return 0; +} + ++++ exited with 0 +++ +BUG: sleeping function called from invalid context at /home/kas/git/public/linux-mm/kernel/locking/mutex.c:616 +in_atomic(): 1, irqs_disabled(): 0, pid: 1, name: init +3 locks held by init/1: + #0: (reboot_mutex){+.+...}, at: [] SyS_reboot+0xa9/0x220 + #1: ((reboot_notifier_list).rwsem){.+.+..}, at: [] __blocking_notifier_call_chain+0x39/0x70 + #2: (rcu_callback){......}, at: [] rcu_do_batch.isra.49+0x160/0x10c0 +Preemption disabled at:[] __delay+0xf/0x20 + +CPU: 1 PID: 1 Comm: init Not tainted 4.1.0-00009-gbddf4c4818e0 #253 +Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS Debian-1.8.2-1 04/01/2014 + ffff88017b3d8000 ffff88027bc03c38 ffffffff81929ceb 0000000000000102 + 0000000000000000 ffff88027bc03c68 ffffffff81085a9d 0000000000000002 + ffffffff81ca2a20 0000000000000268 0000000000000000 ffff88027bc03c98 +Call Trace: + [] dump_stack+0x4f/0x7b + [] ___might_sleep+0x16d/0x270 + [] __might_sleep+0x4d/0x90 + [] mutex_lock_nested+0x2f/0x430 + [] ? _raw_spin_unlock_irqrestore+0x5d/0x80 + [] ? __this_cpu_preempt_check+0x13/0x20 + [] netlink_set_ring+0x1ed/0x350 + [] ? netlink_undo_bind+0x70/0x70 + [] netlink_sock_destruct+0x80/0x150 + [] __sk_free+0x1d/0x160 + [] sk_free+0x19/0x20 +[..] + +Cong Wang says: + +We can't hold mutex lock in a rcu callback, [..] + +Thomas Graf says: + +The socket should be dead at this point. It might be simpler to +add a netlink_release_ring() function which doesn't require +locking at all. + +Reported-by: "Kirill A. Shutemov" +Diagnosed-by: Cong Wang +Suggested-by: Thomas Graf +Signed-off-by: Florian Westphal +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/netlink/af_netlink.c | 79 +++++++++++++++++++++++++++-------------------- + 1 file changed, 47 insertions(+), 32 deletions(-) + +--- a/net/netlink/af_netlink.c ++++ b/net/netlink/af_netlink.c +@@ -350,25 +350,52 @@ err1: + return NULL; + } + ++ ++static void ++__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec, ++ unsigned int order) ++{ ++ struct netlink_sock *nlk = nlk_sk(sk); ++ struct sk_buff_head *queue; ++ struct netlink_ring *ring; ++ ++ queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; ++ ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; ++ ++ spin_lock_bh(&queue->lock); ++ ++ ring->frame_max = req->nm_frame_nr - 1; ++ ring->head = 0; ++ ring->frame_size = req->nm_frame_size; ++ ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE; ++ ++ swap(ring->pg_vec_len, req->nm_block_nr); ++ swap(ring->pg_vec_order, order); ++ swap(ring->pg_vec, pg_vec); ++ ++ __skb_queue_purge(queue); ++ spin_unlock_bh(&queue->lock); ++ ++ WARN_ON(atomic_read(&nlk->mapped)); ++ ++ if (pg_vec) ++ free_pg_vec(pg_vec, order, req->nm_block_nr); ++} ++ + static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, +- bool closing, bool tx_ring) ++ bool tx_ring) + { + struct netlink_sock *nlk = nlk_sk(sk); + struct netlink_ring *ring; +- struct sk_buff_head *queue; + void **pg_vec = NULL; + unsigned int order = 0; +- int err; + + ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; +- queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; + +- if (!closing) { +- if (atomic_read(&nlk->mapped)) +- return -EBUSY; +- if (atomic_read(&ring->pending)) +- return -EBUSY; +- } ++ if (atomic_read(&nlk->mapped)) ++ return -EBUSY; ++ if (atomic_read(&ring->pending)) ++ return -EBUSY; + + if (req->nm_block_nr) { + if (ring->pg_vec != NULL) +@@ -400,31 +427,19 @@ static int netlink_set_ring(struct sock + return -EINVAL; + } + +- err = -EBUSY; + mutex_lock(&nlk->pg_vec_lock); +- if (closing || atomic_read(&nlk->mapped) == 0) { +- err = 0; +- spin_lock_bh(&queue->lock); +- +- ring->frame_max = req->nm_frame_nr - 1; +- ring->head = 0; +- ring->frame_size = req->nm_frame_size; +- ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE; +- +- swap(ring->pg_vec_len, req->nm_block_nr); +- swap(ring->pg_vec_order, order); +- swap(ring->pg_vec, pg_vec); +- +- __skb_queue_purge(queue); +- spin_unlock_bh(&queue->lock); +- +- WARN_ON(atomic_read(&nlk->mapped)); ++ if (atomic_read(&nlk->mapped) == 0) { ++ __netlink_set_ring(sk, req, tx_ring, pg_vec, order); ++ mutex_unlock(&nlk->pg_vec_lock); ++ return 0; + } ++ + mutex_unlock(&nlk->pg_vec_lock); + + if (pg_vec) + free_pg_vec(pg_vec, order, req->nm_block_nr); +- return err; ++ ++ return -EBUSY; + } + + static void netlink_mm_open(struct vm_area_struct *vma) +@@ -893,10 +908,10 @@ static void netlink_sock_destruct(struct + + memset(&req, 0, sizeof(req)); + if (nlk->rx_ring.pg_vec) +- netlink_set_ring(sk, &req, true, false); ++ __netlink_set_ring(sk, &req, false, NULL, 0); + memset(&req, 0, sizeof(req)); + if (nlk->tx_ring.pg_vec) +- netlink_set_ring(sk, &req, true, true); ++ __netlink_set_ring(sk, &req, true, NULL, 0); + } + #endif /* CONFIG_NETLINK_MMAP */ + +@@ -2190,7 +2205,7 @@ static int netlink_setsockopt(struct soc + return -EINVAL; + if (copy_from_user(&req, optval, sizeof(req))) + return -EFAULT; +- err = netlink_set_ring(sk, &req, false, ++ err = netlink_set_ring(sk, &req, + optname == NETLINK_TX_RING); + break; + } diff --git a/queue-3.14/packet-missing-dev_put-in-packet_do_bind.patch b/queue-3.14/packet-missing-dev_put-in-packet_do_bind.patch new file mode 100644 index 00000000000..179cd12bfe0 --- /dev/null +++ b/queue-3.14/packet-missing-dev_put-in-packet_do_bind.patch @@ -0,0 +1,59 @@ +From foo@baz Sat Sep 26 11:19:08 PDT 2015 +From: Lars Westerhoff +Date: Tue, 28 Jul 2015 01:32:21 +0300 +Subject: packet: missing dev_put() in packet_do_bind() + +From: Lars Westerhoff + +[ Upstream commit 158cd4af8dedbda0d612d448c724c715d0dda649 ] + +When binding a PF_PACKET socket, the use count of the bound interface is +always increased with dev_hold in dev_get_by_{index,name}. However, +when rebound with the same protocol and device as in the previous bind +the use count of the interface was not decreased. Ultimately, this +caused the deletion of the interface to fail with the following message: + +unregister_netdevice: waiting for dummy0 to become free. Usage count = 1 + +This patch moves the dev_put out of the conditional part that was only +executed when either the protocol or device changed on a bind. + +Fixes: 902fefb82ef7 ('packet: improve socket create/bind latency in some cases') +Signed-off-by: Lars Westerhoff +Signed-off-by: Dan Carpenter +Reviewed-by: Daniel Borkmann +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/packet/af_packet.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) + +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -2645,7 +2645,7 @@ static int packet_release(struct socket + static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) + { + struct packet_sock *po = pkt_sk(sk); +- const struct net_device *dev_curr; ++ struct net_device *dev_curr; + __be16 proto_curr; + bool need_rehook; + +@@ -2669,15 +2669,13 @@ static int packet_do_bind(struct sock *s + + po->num = proto; + po->prot_hook.type = proto; +- +- if (po->prot_hook.dev) +- dev_put(po->prot_hook.dev); +- + po->prot_hook.dev = dev; + + po->ifindex = dev ? dev->ifindex : 0; + packet_cached_dev_assign(po, dev); + } ++ if (dev_curr) ++ dev_put(dev_curr); + + if (proto == 0 || !need_rehook) + goto out_unlock; diff --git a/queue-3.14/rds-fix-an-integer-overflow-test-in-rds_info_getsockopt.patch b/queue-3.14/rds-fix-an-integer-overflow-test-in-rds_info_getsockopt.patch new file mode 100644 index 00000000000..6de36b96563 --- /dev/null +++ b/queue-3.14/rds-fix-an-integer-overflow-test-in-rds_info_getsockopt.patch @@ -0,0 +1,36 @@ +From foo@baz Sat Sep 26 11:19:08 PDT 2015 +From: Dan Carpenter +Date: Sat, 1 Aug 2015 15:33:26 +0300 +Subject: rds: fix an integer overflow test in rds_info_getsockopt() + +From: Dan Carpenter + +[ Upstream commit 468b732b6f76b138c0926eadf38ac88467dcd271 ] + +"len" is a signed integer. We check that len is not negative, so it +goes from zero to INT_MAX. PAGE_SIZE is unsigned long so the comparison +is type promoted to unsigned long. ULONG_MAX - 4095 is a higher than +INT_MAX so the condition can never be true. + +I don't know if this is harmful but it seems safe to limit "len" to +INT_MAX - 4095. + +Fixes: a8c879a7ee98 ('RDS: Info and stats') +Signed-off-by: Dan Carpenter +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/rds/info.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/rds/info.c ++++ b/net/rds/info.c +@@ -176,7 +176,7 @@ int rds_info_getsockopt(struct socket *s + + /* check for all kinds of wrapping and the like */ + start = (unsigned long)optval; +- if (len < 0 || len + PAGE_SIZE - 1 < len || start + len < start) { ++ if (len < 0 || len > INT_MAX - PAGE_SIZE + 1 || start + len < start) { + ret = -EINVAL; + goto out; + } diff --git a/queue-3.14/rtnetlink-verify-ifla_vf_info-attributes-before-passing-them-to-driver.patch b/queue-3.14/rtnetlink-verify-ifla_vf_info-attributes-before-passing-them-to-driver.patch new file mode 100644 index 00000000000..0e1d94fd81b --- /dev/null +++ b/queue-3.14/rtnetlink-verify-ifla_vf_info-attributes-before-passing-them-to-driver.patch @@ -0,0 +1,209 @@ +From foo@baz Sat Sep 26 11:19:08 PDT 2015 +From: Daniel Borkmann +Date: Tue, 7 Jul 2015 00:07:52 +0200 +Subject: rtnetlink: verify IFLA_VF_INFO attributes before passing them to driver + +From: Daniel Borkmann + +[ Upstream commit 4f7d2cdfdde71ffe962399b7020c674050329423 ] + +Jason Gunthorpe reported that since commit c02db8c6290b ("rtnetlink: make +SR-IOV VF interface symmetric"), we don't verify IFLA_VF_INFO attributes +anymore with respect to their policy, that is, ifla_vfinfo_policy[]. + +Before, they were part of ifla_policy[], but they have been nested since +placed under IFLA_VFINFO_LIST, that contains the attribute IFLA_VF_INFO, +which is another nested attribute for the actual VF attributes such as +IFLA_VF_MAC, IFLA_VF_VLAN, etc. + +Despite the policy being split out from ifla_policy[] in this commit, +it's never applied anywhere. nla_for_each_nested() only does basic nla_ok() +testing for struct nlattr, but it doesn't know about the data context and +their requirements. + +Fix, on top of Jason's initial work, does 1) parsing of the attributes +with the right policy, and 2) using the resulting parsed attribute table +from 1) instead of the nla_for_each_nested() loop (just like we used to +do when still part of ifla_policy[]). + +Reference: http://thread.gmane.org/gmane.linux.network/368913 +Fixes: c02db8c6290b ("rtnetlink: make SR-IOV VF interface symmetric") +Reported-by: Jason Gunthorpe +Cc: Chris Wright +Cc: Sucheta Chakraborty +Cc: Greg Rose +Cc: Jeff Kirsher +Cc: Rony Efraim +Cc: Vlad Zolotarov +Cc: Nicolas Dichtel +Cc: Thomas Graf +Signed-off-by: Jason Gunthorpe +Signed-off-by: Daniel Borkmann +Acked-by: Vlad Zolotarov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/rtnetlink.c | 128 +++++++++++++++++++++++++-------------------------- + 1 file changed, 65 insertions(+), 63 deletions(-) + +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -1259,10 +1259,6 @@ static const struct nla_policy ifla_info + [IFLA_INFO_SLAVE_DATA] = { .type = NLA_NESTED }, + }; + +-static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = { +- [IFLA_VF_INFO] = { .type = NLA_NESTED }, +-}; +- + static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { + [IFLA_VF_MAC] = { .len = sizeof(struct ifla_vf_mac) }, + [IFLA_VF_VLAN] = { .len = sizeof(struct ifla_vf_vlan) }, +@@ -1336,67 +1332,66 @@ static int validate_linkmsg(struct net_d + return 0; + } + +-static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) ++static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) + { +- int rem, err = -EINVAL; +- struct nlattr *vf; + const struct net_device_ops *ops = dev->netdev_ops; ++ int err = -EINVAL; + +- nla_for_each_nested(vf, attr, rem) { +- switch (nla_type(vf)) { +- case IFLA_VF_MAC: { +- struct ifla_vf_mac *ivm; +- ivm = nla_data(vf); +- err = -EOPNOTSUPP; +- if (ops->ndo_set_vf_mac) +- err = ops->ndo_set_vf_mac(dev, ivm->vf, +- ivm->mac); +- break; +- } +- case IFLA_VF_VLAN: { +- struct ifla_vf_vlan *ivv; +- ivv = nla_data(vf); +- err = -EOPNOTSUPP; +- if (ops->ndo_set_vf_vlan) +- err = ops->ndo_set_vf_vlan(dev, ivv->vf, +- ivv->vlan, +- ivv->qos); +- break; +- } +- case IFLA_VF_TX_RATE: { +- struct ifla_vf_tx_rate *ivt; +- ivt = nla_data(vf); +- err = -EOPNOTSUPP; +- if (ops->ndo_set_vf_tx_rate) +- err = ops->ndo_set_vf_tx_rate(dev, ivt->vf, +- ivt->rate); +- break; +- } +- case IFLA_VF_SPOOFCHK: { +- struct ifla_vf_spoofchk *ivs; +- ivs = nla_data(vf); +- err = -EOPNOTSUPP; +- if (ops->ndo_set_vf_spoofchk) +- err = ops->ndo_set_vf_spoofchk(dev, ivs->vf, +- ivs->setting); +- break; +- } +- case IFLA_VF_LINK_STATE: { +- struct ifla_vf_link_state *ivl; +- ivl = nla_data(vf); +- err = -EOPNOTSUPP; +- if (ops->ndo_set_vf_link_state) +- err = ops->ndo_set_vf_link_state(dev, ivl->vf, +- ivl->link_state); +- break; +- } +- default: +- err = -EINVAL; +- break; +- } +- if (err) +- break; ++ if (tb[IFLA_VF_MAC]) { ++ struct ifla_vf_mac *ivm = nla_data(tb[IFLA_VF_MAC]); ++ ++ err = -EOPNOTSUPP; ++ if (ops->ndo_set_vf_mac) ++ err = ops->ndo_set_vf_mac(dev, ivm->vf, ++ ivm->mac); ++ if (err < 0) ++ return err; ++ } ++ ++ if (tb[IFLA_VF_VLAN]) { ++ struct ifla_vf_vlan *ivv = nla_data(tb[IFLA_VF_VLAN]); ++ ++ err = -EOPNOTSUPP; ++ if (ops->ndo_set_vf_vlan) ++ err = ops->ndo_set_vf_vlan(dev, ivv->vf, ivv->vlan, ++ ivv->qos); ++ if (err < 0) ++ return err; ++ } ++ ++ if (tb[IFLA_VF_TX_RATE]) { ++ struct ifla_vf_tx_rate *ivt = nla_data(tb[IFLA_VF_TX_RATE]); ++ ++ err = -EOPNOTSUPP; ++ if (ops->ndo_set_vf_tx_rate) ++ err = ops->ndo_set_vf_tx_rate(dev, ivt->vf, ++ ivt->rate); ++ if (err < 0) ++ return err; ++ } ++ ++ if (tb[IFLA_VF_SPOOFCHK]) { ++ struct ifla_vf_spoofchk *ivs = nla_data(tb[IFLA_VF_SPOOFCHK]); ++ ++ err = -EOPNOTSUPP; ++ if (ops->ndo_set_vf_spoofchk) ++ err = ops->ndo_set_vf_spoofchk(dev, ivs->vf, ++ ivs->setting); ++ if (err < 0) ++ return err; ++ } ++ ++ if (tb[IFLA_VF_LINK_STATE]) { ++ struct ifla_vf_link_state *ivl = nla_data(tb[IFLA_VF_LINK_STATE]); ++ ++ err = -EOPNOTSUPP; ++ if (ops->ndo_set_vf_link_state) ++ err = ops->ndo_set_vf_link_state(dev, ivl->vf, ++ ivl->link_state); ++ if (err < 0) ++ return err; + } ++ + return err; + } + +@@ -1579,14 +1574,21 @@ static int do_setlink(const struct sk_bu + } + + if (tb[IFLA_VFINFO_LIST]) { ++ struct nlattr *vfinfo[IFLA_VF_MAX + 1]; + struct nlattr *attr; + int rem; ++ + nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) { +- if (nla_type(attr) != IFLA_VF_INFO) { ++ if (nla_type(attr) != IFLA_VF_INFO || ++ nla_len(attr) < NLA_HDRLEN) { + err = -EINVAL; + goto errout; + } +- err = do_setvfinfo(dev, attr); ++ err = nla_parse_nested(vfinfo, IFLA_VF_MAX, attr, ++ ifla_vf_policy); ++ if (err < 0) ++ goto errout; ++ err = do_setvfinfo(dev, vfinfo); + if (err < 0) + goto errout; + modified = 1; diff --git a/queue-3.14/series b/queue-3.14/series index c82511771a1..a582f4b275a 100644 --- a/queue-3.14/series +++ b/queue-3.14/series @@ -41,3 +41,28 @@ ib-mlx4-use-correct-sl-on-ah-query-under-roce.patch stmmac-fix-check-for-phydev-being-open.patch stmmac-troubleshoot-unexpected-bits-in-des0-des1.patch hfs-hfsplus-cache-pages-correctly-between-bnode_create-and-bnode_free.patch +ipv6-make-mld-packets-to-only-be-processed-locally.patch +net-graceful-exit-from-netif_alloc_netdev_queues.patch +rtnetlink-verify-ifla_vf_info-attributes-before-passing-them-to-driver.patch +ip_tunnel-fix-ipv4-pmtu-check-to-honor-inner-ip-header-df.patch +net-tipc-initialize-security-state-for-new-connection-socket.patch +bridge-mdb-zero-out-the-local-br_ip-variable-before-use.patch +net-pktgen-fix-race-between-pktgen_thread_worker-and-kthread_stop.patch +net-do-not-process-device-backlog-during-unregistration.patch +net-call-rcu_read_lock-early-in-process_backlog.patch +net-clone-skb-before-setting-peeked-flag.patch +net-fix-skb-csum-races-when-peeking.patch +net-fix-skb_set_peeked-use-after-free-bug.patch +bridge-mdb-fix-double-add-notification.patch +isdn-gigaset-reset-tty-receive_room-when-attaching-ser_gigaset.patch +ipv6-lock-socket-in-ip6_datagram_connect.patch +bonding-fix-destruction-of-bond-with-devices-different-from-arphrd_ether.patch +bonding-correct-the-mac-address-for-follow-fail_over_mac-policy.patch +inet-frags-fix-defragmented-packet-s-ip-header-for-af_packet.patch +netlink-don-t-hold-mutex-in-rcu-callback-when-releasing-mmapd-ring.patch +net-mlx4_core-fix-wrong-index-in-propagating-port-change-event-to-vfs.patch +packet-missing-dev_put-in-packet_do_bind.patch +rds-fix-an-integer-overflow-test-in-rds_info_getsockopt.patch +udp-fix-dst-races-with-multicast-early-demux.patch +bna-fix-interrupts-storm-caused-by-erroneous-packets.patch +subject-net-gso-use-feature-flag-argument-in-all-protocol-gso-handlers.patch diff --git a/queue-3.14/subject-net-gso-use-feature-flag-argument-in-all-protocol-gso-handlers.patch b/queue-3.14/subject-net-gso-use-feature-flag-argument-in-all-protocol-gso-handlers.patch new file mode 100644 index 00000000000..d21a3dc0b2a --- /dev/null +++ b/queue-3.14/subject-net-gso-use-feature-flag-argument-in-all-protocol-gso-handlers.patch @@ -0,0 +1,92 @@ +From foo@baz Sat Sep 26 11:19:08 PDT 2015 +From: Florian Westphal +Date: Wed, 26 Aug 2015 22:17:39 -0700 +Subject: Subject: [PATCH 3.14-stable] net: gso: use feature flag argument in all protocol gso handlers + +From: Florian Westphal + +[ Upstream commit 1e16aa3ddf863c6b9f37eddf52503230a62dedb3 ] + +skb_gso_segment() has a 'features' argument representing offload features +available to the output path. + +A few handlers, e.g. GRE, instead re-fetch the features of skb->dev and use +those instead of the provided ones when handing encapsulation/tunnels. + +Depending on dev->hw_enc_features of the output device skb_gso_segment() can +then return NULL even when the caller has disabled all GSO feature bits, +as segmentation of inner header thinks device will take care of segmentation. + +This e.g. affects the tbf scheduler, which will silently drop GRE-encap GSO skbs +that did not fit the remaining token quota as the segmentation does not work +when device supports corresponding hw offload capabilities. + +Cc: Pravin B Shelar +Signed-off-by: Florian Westphal +Signed-off-by: David S. Miller +[jay.vosburgh: backported to 3.14. ] +Signed-off-by: Jay Vosburgh +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/af_inet.c | 2 +- + net/ipv4/gre_offload.c | 2 +- + net/ipv4/udp.c | 2 +- + net/ipv6/ip6_offload.c | 2 +- + net/mpls/mpls_gso.c | 2 +- + 5 files changed, 5 insertions(+), 5 deletions(-) + +--- a/net/ipv4/af_inet.c ++++ b/net/ipv4/af_inet.c +@@ -1291,7 +1291,7 @@ static struct sk_buff *inet_gso_segment( + + encap = SKB_GSO_CB(skb)->encap_level > 0; + if (encap) +- features = skb->dev->hw_enc_features & netif_skb_features(skb); ++ features &= skb->dev->hw_enc_features; + SKB_GSO_CB(skb)->encap_level += ihl; + + skb_reset_transport_header(skb); +--- a/net/ipv4/gre_offload.c ++++ b/net/ipv4/gre_offload.c +@@ -69,7 +69,7 @@ static struct sk_buff *gre_gso_segment(s + skb->mac_len = skb_inner_network_offset(skb); + + /* segment inner packet. */ +- enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); ++ enc_features = skb->dev->hw_enc_features & features; + segs = skb_mac_gso_segment(skb, enc_features); + if (!segs || IS_ERR(segs)) { + skb_gso_error_unwind(skb, protocol, ghl, mac_offset, mac_len); +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -2517,7 +2517,7 @@ struct sk_buff *skb_udp_tunnel_segment(s + skb->protocol = htons(ETH_P_TEB); + + /* segment inner packet. */ +- enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); ++ enc_features = skb->dev->hw_enc_features & features; + segs = skb_mac_gso_segment(skb, enc_features); + if (!segs || IS_ERR(segs)) { + skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset, +--- a/net/ipv6/ip6_offload.c ++++ b/net/ipv6/ip6_offload.c +@@ -112,7 +112,7 @@ static struct sk_buff *ipv6_gso_segment( + + encap = SKB_GSO_CB(skb)->encap_level > 0; + if (encap) +- features = skb->dev->hw_enc_features & netif_skb_features(skb); ++ features &= skb->dev->hw_enc_features; + SKB_GSO_CB(skb)->encap_level += sizeof(*ipv6h); + + ipv6h = ipv6_hdr(skb); +--- a/net/mpls/mpls_gso.c ++++ b/net/mpls/mpls_gso.c +@@ -47,7 +47,7 @@ static struct sk_buff *mpls_gso_segment( + __skb_push(skb, skb->mac_len); + + /* Segment inner packet. */ +- mpls_features = skb->dev->mpls_features & netif_skb_features(skb); ++ mpls_features = skb->dev->mpls_features & features; + segs = skb_mac_gso_segment(skb, mpls_features); + + diff --git a/queue-3.14/udp-fix-dst-races-with-multicast-early-demux.patch b/queue-3.14/udp-fix-dst-races-with-multicast-early-demux.patch new file mode 100644 index 00000000000..5537a59bb9f --- /dev/null +++ b/queue-3.14/udp-fix-dst-races-with-multicast-early-demux.patch @@ -0,0 +1,62 @@ +From foo@baz Sat Sep 26 11:19:08 PDT 2015 +From: Eric Dumazet +Date: Sat, 1 Aug 2015 12:14:33 +0200 +Subject: udp: fix dst races with multicast early demux +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Eric Dumazet + +[ Upstream commit 10e2eb878f3ca07ac2f05fa5ca5e6c4c9174a27a ] + +Multicast dst are not cached. They carry DST_NOCACHE. + +As mentioned in commit f8864972126899 ("ipv4: fix dst race in +sk_dst_get()"), these dst need special care before caching them +into a socket. + +Caching them is allowed only if their refcnt was not 0, ie we +must use atomic_inc_not_zero() + +Also, we must use READ_ONCE() to fetch sk->sk_rx_dst, as mentioned +in commit d0c294c53a771 ("tcp: prevent fetching dst twice in early demux +code") + +Fixes: 421b3885bf6d ("udp: ipv4: Add udp early demux") +Tested-by: Gregory Hoggarth +Signed-off-by: Eric Dumazet +Reported-by: Gregory Hoggarth +Reported-by: Alex Gartrell +Cc: Michal Kubeček +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/udp.c | 13 ++++++++++--- + 1 file changed, 10 insertions(+), 3 deletions(-) + +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -1957,12 +1957,19 @@ void udp_v4_early_demux(struct sk_buff * + + skb->sk = sk; + skb->destructor = sock_edemux; +- dst = sk->sk_rx_dst; ++ dst = ACCESS_ONCE(sk->sk_rx_dst); + + if (dst) + dst = dst_check(dst, 0); +- if (dst) +- skb_dst_set_noref(skb, dst); ++ if (dst) { ++ /* DST_NOCACHE can not be used without taking a reference */ ++ if (dst->flags & DST_NOCACHE) { ++ if (likely(atomic_inc_not_zero(&dst->__refcnt))) ++ skb_dst_set(skb, dst); ++ } else { ++ skb_dst_set_noref(skb, dst); ++ } ++ } + } + + int udp_rcv(struct sk_buff *skb)