4.1-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sat, 26 Sep 2015 18:40:29 +0000 (11:40 -0700)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sat, 26 Sep 2015 18:40:29 +0000 (11:40 -0700)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 26 Sep 2015 18:40:29 +0000 (11:40 -0700)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 26 Sep 2015 18:40:29 +0000 (11:40 -0700)
diff --git a/queue-4.1/act_bpf-fix-memory-leaks-when-replacing-bpf-programs.patch b/queue-4.1/act_bpf-fix-memory-leaks-when-replacing-bpf-programs.patch

new file mode 100644 (file)

index 0000000..18eb1fa
--- /dev/null
+++ b/queue-4.1/act_bpf-fix-memory-leaks-when-replacing-bpf-programs.patch
@@ -0,0 +1,179 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Wed, 29 Jul 2015 18:40:56 +0200
+Subject: act_bpf: fix memory leaks when replacing bpf programs
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+[ Upstream commit f4eaed28c7834fc049c754f63e6988bbd73778d9 ]
+
+We currently trigger multiple memory leaks when replacing bpf
+actions, besides others:
+
+  comm "tc", pid 1909, jiffies 4294851310 (age 1602.796s)
+  hex dump (first 32 bytes):
+    01 00 00 00 03 00 00 00 00 00 00 00 00 00 00 00  ................
+    18 b0 98 6d 00 88 ff ff 00 00 00 00 00 00 00 00  ...m............
+  backtrace:
+    [<ffffffff817e623e>] kmemleak_alloc+0x4e/0xb0
+    [<ffffffff8120a22d>] __vmalloc_node_range+0x1bd/0x2c0
+    [<ffffffff8120a37a>] __vmalloc+0x4a/0x50
+    [<ffffffff811a8d0a>] bpf_prog_alloc+0x3a/0xa0
+    [<ffffffff816c0684>] bpf_prog_create+0x44/0xa0
+    [<ffffffffa09ba4eb>] tcf_bpf_init+0x28b/0x3c0 [act_bpf]
+    [<ffffffff816d7001>] tcf_action_init_1+0x191/0x1b0
+    [<ffffffff816d70a2>] tcf_action_init+0x82/0xf0
+    [<ffffffff816d4d12>] tcf_exts_validate+0xb2/0xc0
+    [<ffffffffa09b5838>] cls_bpf_modify_existing+0x98/0x340 [cls_bpf]
+    [<ffffffffa09b5cd6>] cls_bpf_change+0x1a6/0x274 [cls_bpf]
+    [<ffffffff816d56e5>] tc_ctl_tfilter+0x335/0x910
+    [<ffffffff816b9145>] rtnetlink_rcv_msg+0x95/0x240
+    [<ffffffff816df34f>] netlink_rcv_skb+0xaf/0xc0
+    [<ffffffff816b909e>] rtnetlink_rcv+0x2e/0x40
+    [<ffffffff816deaaf>] netlink_unicast+0xef/0x1b0
+
+Issue is that the old content from tcf_bpf is allocated and needs
+to be released when we replace it. We seem to do that since the
+beginning of act_bpf on the filter and insns, later on the name as
+well.
+
+Example test case, after patch:
+
+  # FOO="1,6 0 0 4294967295,"
+  # BAR="1,6 0 0 4294967294,"
+  # tc actions add action bpf bytecode "$FOO" index 2
+  # tc actions show action bpf
+   action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
+   index 2 ref 1 bind 0
+  # tc actions replace action bpf bytecode "$BAR" index 2
+  # tc actions show action bpf
+   action order 0: bpf bytecode '1,6 0 0 4294967294' default-action pipe
+   index 2 ref 1 bind 0
+  # tc actions replace action bpf bytecode "$FOO" index 2
+  # tc actions show action bpf
+   action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
+   index 2 ref 1 bind 0
+  # tc actions del action bpf index 2
+  [...]
+  # echo "scan" > /sys/kernel/debug/kmemleak
+  # cat /sys/kernel/debug/kmemleak | grep "comm \"tc\"" | wc -l
+  0
+
+Fixes: d23b8ad8ab23 ("tc: add BPF based action")
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/act_bpf.c |   50 +++++++++++++++++++++++++++++++++++---------------
+ 1 file changed, 35 insertions(+), 15 deletions(-)
+
+--- a/net/sched/act_bpf.c
++++ b/net/sched/act_bpf.c
+@@ -27,9 +27,10 @@
+ struct tcf_bpf_cfg {
+       struct bpf_prog *filter;
+       struct sock_filter *bpf_ops;
+-      char *bpf_name;
++      const char *bpf_name;
+       u32 bpf_fd;
+       u16 bpf_num_ops;
++      bool is_ebpf;
+ };
+ 
+ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
+@@ -200,6 +201,7 @@ static int tcf_bpf_init_from_ops(struct
+       cfg->bpf_ops = bpf_ops;
+       cfg->bpf_num_ops = bpf_num_ops;
+       cfg->filter = fp;
++      cfg->is_ebpf = false;
+ 
+       return 0;
+ }
+@@ -234,18 +236,40 @@ static int tcf_bpf_init_from_efd(struct
+       cfg->bpf_fd = bpf_fd;
+       cfg->bpf_name = name;
+       cfg->filter = fp;
++      cfg->is_ebpf = true;
+ 
+       return 0;
+ }
+ 
++static void tcf_bpf_cfg_cleanup(const struct tcf_bpf_cfg *cfg)
++{
++      if (cfg->is_ebpf)
++              bpf_prog_put(cfg->filter);
++      else
++              bpf_prog_destroy(cfg->filter);
++
++      kfree(cfg->bpf_ops);
++      kfree(cfg->bpf_name);
++}
++
++static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
++                                struct tcf_bpf_cfg *cfg)
++{
++      cfg->is_ebpf = tcf_bpf_is_ebpf(prog);
++      cfg->filter = prog->filter;
++
++      cfg->bpf_ops = prog->bpf_ops;
++      cfg->bpf_name = prog->bpf_name;
++}
++
+ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
+                       struct nlattr *est, struct tc_action *act,
+                       int replace, int bind)
+ {
+       struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
++      struct tcf_bpf_cfg cfg, old;
+       struct tc_act_bpf *parm;
+       struct tcf_bpf *prog;
+-      struct tcf_bpf_cfg cfg;
+       bool is_bpf, is_ebpf;
+       int ret;
+ 
+@@ -294,6 +318,9 @@ static int tcf_bpf_init(struct net *net,
+       prog = to_bpf(act);
+       spin_lock_bh(&prog->tcf_lock);
+ 
++      if (ret != ACT_P_CREATED)
++              tcf_bpf_prog_fill_cfg(prog, &old);
++
+       prog->bpf_ops = cfg.bpf_ops;
+       prog->bpf_name = cfg.bpf_name;
+ 
+@@ -309,29 +336,22 @@ static int tcf_bpf_init(struct net *net,
+ 
+       if (ret == ACT_P_CREATED)
+               tcf_hash_insert(act);
++      else
++              tcf_bpf_cfg_cleanup(&old);
+ 
+       return ret;
+ 
+ destroy_fp:
+-      if (is_ebpf)
+-              bpf_prog_put(cfg.filter);
+-      else
+-              bpf_prog_destroy(cfg.filter);
+-
+-      kfree(cfg.bpf_ops);
+-      kfree(cfg.bpf_name);
+-
++      tcf_bpf_cfg_cleanup(&cfg);
+       return ret;
+ }
+ 
+ static void tcf_bpf_cleanup(struct tc_action *act, int bind)
+ {
+-      const struct tcf_bpf *prog = act->priv;
++      struct tcf_bpf_cfg tmp;
+ 
+-      if (tcf_bpf_is_ebpf(prog))
+-              bpf_prog_put(prog->filter);
+-      else
+-              bpf_prog_destroy(prog->filter);
++      tcf_bpf_prog_fill_cfg(act->priv, &tmp);
++      tcf_bpf_cfg_cleanup(&tmp);
+ }
+ 
+ static struct tc_action_ops act_bpf_ops __read_mostly = {
diff --git a/queue-4.1/bna-fix-interrupts-storm-caused-by-erroneous-packets.patch b/queue-4.1/bna-fix-interrupts-storm-caused-by-erroneous-packets.patch

new file mode 100644 (file)

index 0000000..361286a
--- /dev/null
+++ b/queue-4.1/bna-fix-interrupts-storm-caused-by-erroneous-packets.patch
@@ -0,0 +1,43 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Ivan Vecera <ivecera@redhat.com>
+Date: Thu, 6 Aug 2015 22:48:23 +0200
+Subject: bna: fix interrupts storm caused by erroneous packets
+
+From: Ivan Vecera <ivecera@redhat.com>
+
+[ Upstream commit ade4dc3e616e33c80d7e62855fe1b6f9895bc7c3 ]
+
+The commit "e29aa33 bna: Enable Multi Buffer RX" moved packets counter
+increment from the beginning of the NAPI processing loop after the check
+for erroneous packets so they are never accounted. This counter is used
+to inform firmware about number of processed completions (packets).
+As these packets are never acked the firmware fires IRQs for them again
+and again.
+
+Fixes: e29aa33 ("bna: Enable Multi Buffer RX")
+Signed-off-by: Ivan Vecera <ivecera@redhat.com>
+Acked-by: Rasesh Mody <rasesh.mody@qlogic.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/brocade/bna/bnad.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/brocade/bna/bnad.c
++++ b/drivers/net/ethernet/brocade/bna/bnad.c
+@@ -675,6 +675,7 @@ bnad_cq_process(struct bnad *bnad, struc
+                       if (!next_cmpl->valid)
+                               break;
+               }
++              packets++;
+ 
+               /* TODO: BNA_CQ_EF_LOCAL ? */
+               if (unlikely(flags & (BNA_CQ_EF_MAC_ERROR |
+@@ -691,7 +692,6 @@ bnad_cq_process(struct bnad *bnad, struc
+               else
+                       bnad_cq_setup_skb_frags(rcb, skb, sop_ci, nvecs, len);
+ 
+-              packets++;
+               rcb->rxq->rx_packets++;
+               rcb->rxq->rx_bytes += totlen;
+               ccb->bytes_per_intr += totlen;
diff --git a/queue-4.1/bonding-correct-the-mac-address-for-follow-fail_over_mac-policy.patch b/queue-4.1/bonding-correct-the-mac-address-for-follow-fail_over_mac-policy.patch

new file mode 100644 (file)

index 0000000..7c6dba0
--- /dev/null
+++ b/queue-4.1/bonding-correct-the-mac-address-for-follow-fail_over_mac-policy.patch
@@ -0,0 +1,80 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: dingtianhong <dingtianhong@huawei.com>
+Date: Thu, 16 Jul 2015 16:30:02 +0800
+Subject: bonding: correct the MAC address for "follow" fail_over_mac policy
+
+From: dingtianhong <dingtianhong@huawei.com>
+
+[ Upstream commit a951bc1e6ba58f11df5ed5ddc41311e10f5fd20b ]
+
+The "follow" fail_over_mac policy is useful for multiport devices that
+either become confused or incur a performance penalty when multiple
+ports are programmed with the same MAC address, but the same MAC
+address still may happened by this steps for this policy:
+
+1) echo +eth0 > /sys/class/net/bond0/bonding/slaves
+   bond0 has the same mac address with eth0, it is MAC1.
+
+2) echo +eth1 > /sys/class/net/bond0/bonding/slaves
+   eth1 is backup, eth1 has MAC2.
+
+3) ifconfig eth0 down
+   eth1 became active slave, bond will swap MAC for eth0 and eth1,
+   so eth1 has MAC1, and eth0 has MAC2.
+
+4) ifconfig eth1 down
+   there is no active slave, and eth1 still has MAC1, eth2 has MAC2.
+
+5) ifconfig eth0 up
+   the eth0 became active slave again, the bond set eth0 to MAC1.
+
+Something wrong here, then if you set eth1 up, the eth0 and eth1 will have the same
+MAC address, it will break this policy for ACTIVE_BACKUP mode.
+
+This patch will fix this problem by finding the old active slave and
+swap them MAC address before change active slave.
+
+Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
+Tested-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/bonding/bond_main.c |   20 ++++++++++++++++++++
+ 1 file changed, 20 insertions(+)
+
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -625,6 +625,23 @@ static void bond_set_dev_addr(struct net
+       call_netdevice_notifiers(NETDEV_CHANGEADDR, bond_dev);
+ }
+ 
++static struct slave *bond_get_old_active(struct bonding *bond,
++                                       struct slave *new_active)
++{
++      struct slave *slave;
++      struct list_head *iter;
++
++      bond_for_each_slave(bond, slave, iter) {
++              if (slave == new_active)
++                      continue;
++
++              if (ether_addr_equal(bond->dev->dev_addr, slave->dev->dev_addr))
++                      return slave;
++      }
++
++      return NULL;
++}
++
+ /* bond_do_fail_over_mac
+  *
+  * Perform special MAC address swapping for fail_over_mac settings
+@@ -652,6 +669,9 @@ static void bond_do_fail_over_mac(struct
+               if (!new_active)
+                       return;
+ 
++              if (!old_active)
++                      old_active = bond_get_old_active(bond, new_active);
++
+               if (old_active) {
+                       ether_addr_copy(tmp_mac, new_active->dev->dev_addr);
+                       ether_addr_copy(saddr.sa_data,
diff --git a/queue-4.1/bonding-fix-destruction-of-bond-with-devices-different-from-arphrd_ether.patch b/queue-4.1/bonding-fix-destruction-of-bond-with-devices-different-from-arphrd_ether.patch

new file mode 100644 (file)

index 0000000..7a960aa
--- /dev/null
+++ b/queue-4.1/bonding-fix-destruction-of-bond-with-devices-different-from-arphrd_ether.patch
@@ -0,0 +1,101 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Date: Wed, 15 Jul 2015 21:52:51 +0200
+Subject: bonding: fix destruction of bond with devices different from arphrd_ether
+
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+
+[ Upstream commit 06f6d1094aa0992432b1e2a0920b0ee86ccd83bf ]
+
+When the bonding is being unloaded and the netdevice notifier is
+unregistered it executes NETDEV_UNREGISTER for each device which should
+remove the bond's proc entry but if the device enslaved is not of
+ARPHRD_ETHER type and is in front of the bonding, it may execute
+bond_release_and_destroy() first which would release the last slave and
+destroy the bond device leaving the proc entry and thus we will get the
+following error (with dynamic debug on for bond_netdev_event to see the
+events order):
+[  908.963051] eql: event: 9
+[  908.963052] eql: IFF_SLAVE
+[  908.963054] eql: event: 2
+[  908.963056] eql: IFF_SLAVE
+[  908.963058] eql: event: 6
+[  908.963059] eql: IFF_SLAVE
+[  908.963110] bond0: Releasing active interface eql
+[  908.976168] bond0: Destroying bond bond0
+[  908.976266] bond0 (unregistering): Released all slaves
+[  908.984097] ------------[ cut here ]------------
+[  908.984107] WARNING: CPU: 0 PID: 1787 at fs/proc/generic.c:575
+remove_proc_entry+0x112/0x160()
+[  908.984110] remove_proc_entry: removing non-empty directory
+'net/bonding', leaking at least 'bond0'
+[  908.984111] Modules linked in: bonding(-) eql(O) 9p nfsd auth_rpcgss
+oid_registry nfs_acl nfs lockd grace fscache sunrpc crct10dif_pclmul
+crc32_pclmul crc32c_intel ghash_clmulni_intel ppdev qxl drm_kms_helper
+snd_hda_codec_generic aesni_intel ttm aes_x86_64 glue_helper pcspkr lrw
+gf128mul ablk_helper cryptd snd_hda_intel virtio_console snd_hda_codec
+psmouse serio_raw snd_hwdep snd_hda_core 9pnet_virtio 9pnet evdev joydev
+drm virtio_balloon snd_pcm snd_timer snd soundcore i2c_piix4 i2c_core
+pvpanic acpi_cpufreq parport_pc parport processor thermal_sys button
+autofs4 ext4 crc16 mbcache jbd2 hid_generic usbhid hid sg sr_mod cdrom
+ata_generic virtio_blk virtio_net floppy ata_piix e1000 libata ehci_pci
+virtio_pci scsi_mod uhci_hcd ehci_hcd virtio_ring virtio usbcore
+usb_common [last unloaded: bonding]
+
+[  908.984168] CPU: 0 PID: 1787 Comm: rmmod Tainted: G        W  O
+4.2.0-rc2+ #8
+[  908.984170] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
+[  908.984172]  0000000000000000 ffffffff81732d41 ffffffff81525b34
+ffff8800358dfda8
+[  908.984175]  ffffffff8106c521 ffff88003595af78 ffff88003595af40
+ffff88003e3a4280
+[  908.984178]  ffffffffa058d040 0000000000000000 ffffffff8106c59a
+ffffffff8172ebd0
+[  908.984181] Call Trace:
+[  908.984188]  [<ffffffff81525b34>] ? dump_stack+0x40/0x50
+[  908.984193]  [<ffffffff8106c521>] ? warn_slowpath_common+0x81/0xb0
+[  908.984196]  [<ffffffff8106c59a>] ? warn_slowpath_fmt+0x4a/0x50
+[  908.984199]  [<ffffffff81218352>] ? remove_proc_entry+0x112/0x160
+[  908.984205]  [<ffffffffa05850e6>] ? bond_destroy_proc_dir+0x26/0x30
+[bonding]
+[  908.984208]  [<ffffffffa057540e>] ? bond_net_exit+0x8e/0xa0 [bonding]
+[  908.984217]  [<ffffffff8142f407>] ? ops_exit_list.isra.4+0x37/0x70
+[  908.984225]  [<ffffffff8142f52d>] ?
+unregister_pernet_operations+0x8d/0xd0
+[  908.984228]  [<ffffffff8142f58d>] ?
+unregister_pernet_subsys+0x1d/0x30
+[  908.984232]  [<ffffffffa0585269>] ? bonding_exit+0x23/0xdba [bonding]
+[  908.984236]  [<ffffffff810e28ba>] ? SyS_delete_module+0x18a/0x250
+[  908.984241]  [<ffffffff81086f99>] ? task_work_run+0x89/0xc0
+[  908.984244]  [<ffffffff8152b732>] ?
+entry_SYSCALL_64_fastpath+0x16/0x75
+[  908.984247] ---[ end trace 7c006ed4abbef24b ]---
+
+Thus remove the proc entry manually if bond_release_and_destroy() is
+used. Because of the checks in bond_remove_proc_entry() it's not a
+problem for a bond device to change namespaces (the bug fixed by the
+Fixes commit) but since commit
+f9399814927ad ("bonding: Don't allow bond devices to change network
+namespaces.") that can't happen anyway.
+
+Reported-by: Carol Soto <clsoto@linux.vnet.ibm.com>
+Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Fixes: a64d49c3dd50 ("bonding: Manage /proc/net/bonding/ entries from
+                      the netdev events")
+Tested-by: Carol L Soto <clsoto@linux.vnet.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/bonding/bond_main.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -1902,6 +1902,7 @@ static int  bond_release_and_destroy(str
+               bond_dev->priv_flags |= IFF_DISABLE_NETPOLL;
+               netdev_info(bond_dev, "Destroying bond %s\n",
+                           bond_dev->name);
++              bond_remove_proc_entry(bond);
+               unregister_netdevice(bond_dev);
+       }
+       return ret;
diff --git a/queue-4.1/bridge-fix-potential-crash-in-__netdev_pick_tx.patch b/queue-4.1/bridge-fix-potential-crash-in-__netdev_pick_tx.patch

new file mode 100644 (file)

index 0000000..9384ad6
--- /dev/null
+++ b/queue-4.1/bridge-fix-potential-crash-in-__netdev_pick_tx.patch
@@ -0,0 +1,38 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 9 Jul 2015 18:56:07 +0200
+Subject: bridge: fix potential crash in __netdev_pick_tx()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit a7d35f9d73e9ffa74a02304b817e579eec632f67 ]
+
+Commit c29390c6dfee ("xps: must clear sender_cpu before forwarding")
+fixed an issue in normal forward path, caused by sender_cpu & napi_id
+skb fields being an union.
+
+Bridge is another point where skb can be forwarded, so we need
+the same cure.
+
+Bug triggers if packet was received on a NIC using skb_mark_napi_id()
+
+Fixes: 2bd82484bb4c ("xps: fix xps for stacked devices")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Bob Liu <bob.liu@oracle.com>
+Tested-by: Bob Liu <bob.liu@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_forward.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/bridge/br_forward.c
++++ b/net/bridge/br_forward.c
+@@ -42,6 +42,7 @@ int br_dev_queue_push_xmit(struct sock *
+       } else {
+               skb_push(skb, ETH_HLEN);
+               br_drop_fake_rtable(skb);
++              skb_sender_cpu_clear(skb);
+               dev_queue_xmit(skb);
+       }
+ 
diff --git a/queue-4.1/bridge-mdb-fix-double-add-notification.patch b/queue-4.1/bridge-mdb-fix-double-add-notification.patch

new file mode 100644 (file)

index 0000000..c8e590f
--- /dev/null
+++ b/queue-4.1/bridge-mdb-fix-double-add-notification.patch
@@ -0,0 +1,41 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Date: Mon, 13 Jul 2015 06:36:19 -0700
+Subject: bridge: mdb: fix double add notification
+
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+
+[ Upstream commit 5ebc784625ea68a9570d1f70557e7932988cd1b4 ]
+
+Since the mdb add/del code was introduced there have been 2 br_mdb_notify
+calls when doing br_mdb_add() resulting in 2 notifications on each add.
+
+Example:
+ Command: bridge mdb add dev br0 port eth1 grp 239.0.0.1 permanent
+ Before patch:
+ root@debian:~# bridge monitor all
+ [MDB]dev br0 port eth1 grp 239.0.0.1 permanent
+ [MDB]dev br0 port eth1 grp 239.0.0.1 permanent
+
+ After patch:
+ root@debian:~# bridge monitor all
+ [MDB]dev br0 port eth1 grp 239.0.0.1 permanent
+
+Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Fixes: cfd567543590 ("bridge: add support of adding and deleting mdb entries")
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_mdb.c |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/net/bridge/br_mdb.c
++++ b/net/bridge/br_mdb.c
+@@ -348,7 +348,6 @@ static int br_mdb_add_group(struct net_b
+               return -ENOMEM;
+       rcu_assign_pointer(*pp, p);
+ 
+-      br_mdb_notify(br->dev, port, group, RTM_NEWMDB);
+       return 0;
+ }
+ 
diff --git a/queue-4.1/bridge-mdb-zero-out-the-local-br_ip-variable-before-use.patch b/queue-4.1/bridge-mdb-zero-out-the-local-br_ip-variable-before-use.patch

new file mode 100644 (file)

index 0000000..8e67b29
--- /dev/null
+++ b/queue-4.1/bridge-mdb-zero-out-the-local-br_ip-variable-before-use.patch
@@ -0,0 +1,57 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Nikolay Aleksandrov <razor@blackwall.org>
+Date: Tue, 7 Jul 2015 15:55:56 +0200
+Subject: bridge: mdb: zero out the local br_ip variable before use
+
+From: Nikolay Aleksandrov <razor@blackwall.org>
+
+[ Upstream commit f1158b74e54f2e2462ba5e2f45a118246d9d5b43 ]
+
+Since commit b0e9a30dd669 ("bridge: Add vlan id to multicast groups")
+there's a check in br_ip_equal() for a matching vlan id, but the mdb
+functions were not modified to use (or at least zero it) so when an
+entry was added it would have a garbage vlan id (from the local br_ip
+variable in __br_mdb_add/del) and this would prevent it from being
+matched and also deleted. So zero out the whole local ip var to protect
+ourselves from future changes and also to fix the current bug, since
+there's no vlan id support in the mdb uapi - use always vlan id 0.
+Example before patch:
+root@debian:~# bridge mdb add dev br0 port eth1 grp 239.0.0.1 permanent
+root@debian:~# bridge mdb
+dev br0 port eth1 grp 239.0.0.1 permanent
+root@debian:~# bridge mdb del dev br0 port eth1 grp 239.0.0.1 permanent
+RTNETLINK answers: Invalid argument
+
+After patch:
+root@debian:~# bridge mdb add dev br0 port eth1 grp 239.0.0.1 permanent
+root@debian:~# bridge mdb
+dev br0 port eth1 grp 239.0.0.1 permanent
+root@debian:~# bridge mdb del dev br0 port eth1 grp 239.0.0.1 permanent
+root@debian:~# bridge mdb
+
+Signed-off-by: Nikolay Aleksandrov <razor@blackwall.org>
+Fixes: b0e9a30dd669 ("bridge: Add vlan id to multicast groups")
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_mdb.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/bridge/br_mdb.c
++++ b/net/bridge/br_mdb.c
+@@ -371,6 +371,7 @@ static int __br_mdb_add(struct net *net,
+       if (!p || p->br != br || p->state == BR_STATE_DISABLED)
+               return -EINVAL;
+ 
++      memset(&ip, 0, sizeof(ip));
+       ip.proto = entry->addr.proto;
+       if (ip.proto == htons(ETH_P_IP))
+               ip.u.ip4 = entry->addr.u.ip4;
+@@ -417,6 +418,7 @@ static int __br_mdb_del(struct net_bridg
+       if (!netif_running(br->dev) || br->multicast_disabled)
+               return -EINVAL;
+ 
++      memset(&ip, 0, sizeof(ip));
+       ip.proto = entry->addr.proto;
+       if (ip.proto == htons(ETH_P_IP)) {
+               if (timer_pending(&br->ip4_other_query.timer))
diff --git a/queue-4.1/bridge-netlink-account-for-the-ifla_brport_proxyarp-attribute-size-and-policy.patch b/queue-4.1/bridge-netlink-account-for-the-ifla_brport_proxyarp-attribute-size-and-policy.patch

new file mode 100644 (file)

index 0000000..4e4caed
--- /dev/null
+++ b/queue-4.1/bridge-netlink-account-for-the-ifla_brport_proxyarp-attribute-size-and-policy.patch
@@ -0,0 +1,39 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Date: Tue, 4 Aug 2015 19:06:32 +0200
+Subject: bridge: netlink: account for the IFLA_BRPORT_PROXYARP attribute size and policy
+
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+
+[ Upstream commit 355b9f9df1f0311f20087350aee8ad96eedca8a9 ]
+
+The attribute size wasn't accounted for in the get_slave_size() callback
+(br_port_get_slave_size) when it was introduced, so fix it now. Also add
+a policy entry for it in br_port_policy.
+
+Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Fixes: 958501163ddd ("bridge: Add support for IEEE 802.11 Proxy ARP")
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_netlink.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/bridge/br_netlink.c
++++ b/net/bridge/br_netlink.c
+@@ -112,6 +112,7 @@ static inline size_t br_port_info_size(v
+               + nla_total_size(1)     /* IFLA_BRPORT_FAST_LEAVE */
+               + nla_total_size(1)     /* IFLA_BRPORT_LEARNING */
+               + nla_total_size(1)     /* IFLA_BRPORT_UNICAST_FLOOD */
++              + nla_total_size(1)     /* IFLA_BRPORT_PROXYARP */
+               + 0;
+ }
+ 
+@@ -504,6 +505,7 @@ static const struct nla_policy br_port_p
+       [IFLA_BRPORT_FAST_LEAVE]= { .type = NLA_U8 },
+       [IFLA_BRPORT_LEARNING]  = { .type = NLA_U8 },
+       [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 },
++      [IFLA_BRPORT_PROXYARP]  = { .type = NLA_U8 },
+ };
+ 
+ /* Change the state of the port and notify spanning tree */
diff --git a/queue-4.1/bridge-netlink-account-for-the-ifla_brport_proxyarp_wifi-attribute-size-and-policy.patch b/queue-4.1/bridge-netlink-account-for-the-ifla_brport_proxyarp_wifi-attribute-size-and-policy.patch

new file mode 100644 (file)

index 0000000..4d080ee
--- /dev/null
+++ b/queue-4.1/bridge-netlink-account-for-the-ifla_brport_proxyarp_wifi-attribute-size-and-policy.patch
@@ -0,0 +1,39 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Date: Tue, 4 Aug 2015 19:06:33 +0200
+Subject: bridge: netlink: account for the IFLA_BRPORT_PROXYARP_WIFI attribute size and policy
+
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+
+[ Upstream commit 786c2077ec8e9eab37a88fc14aac4309a8061e18 ]
+
+The attribute size wasn't accounted for in the get_slave_size() callback
+(br_port_get_slave_size) when it was introduced, so fix it now. Also add
+a policy entry for it in br_port_policy.
+
+Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Fixes: 842a9ae08a25 ("bridge: Extend Proxy ARP design to allow optional rules for Wi-Fi")
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_netlink.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/bridge/br_netlink.c
++++ b/net/bridge/br_netlink.c
+@@ -113,6 +113,7 @@ static inline size_t br_port_info_size(v
+               + nla_total_size(1)     /* IFLA_BRPORT_LEARNING */
+               + nla_total_size(1)     /* IFLA_BRPORT_UNICAST_FLOOD */
+               + nla_total_size(1)     /* IFLA_BRPORT_PROXYARP */
++              + nla_total_size(1)     /* IFLA_BRPORT_PROXYARP_WIFI */
+               + 0;
+ }
+ 
+@@ -506,6 +507,7 @@ static const struct nla_policy br_port_p
+       [IFLA_BRPORT_LEARNING]  = { .type = NLA_U8 },
+       [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 },
+       [IFLA_BRPORT_PROXYARP]  = { .type = NLA_U8 },
++      [IFLA_BRPORT_PROXYARP_WIFI] = { .type = NLA_U8 },
+ };
+ 
+ /* Change the state of the port and notify spanning tree */
diff --git a/queue-4.1/bridge-netlink-fix-slave_changelink-br_setport-race-conditions.patch b/queue-4.1/bridge-netlink-fix-slave_changelink-br_setport-race-conditions.patch

new file mode 100644 (file)

index 0000000..c8a219c
--- /dev/null
+++ b/queue-4.1/bridge-netlink-fix-slave_changelink-br_setport-race-conditions.patch
@@ -0,0 +1,45 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Date: Wed, 22 Jul 2015 13:03:40 +0200
+Subject: bridge: netlink: fix slave_changelink/br_setport race conditions
+
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+
+[ Upstream commit 963ad94853000ab100f5ff19eea80095660d41b4 ]
+
+Since slave_changelink support was added there have been a few race
+conditions when using br_setport() since some of the port functions it
+uses require the bridge lock. It is very easy to trigger a lockup due to
+some internal spin_lock() usage without bh disabled, also it's possible to
+get the bridge into an inconsistent state.
+
+Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Fixes: 3ac636b8591c ("bridge: implement rtnl_link_ops->slave_changelink")
+Reviewed-by: Jiri Pirko <jiri@resnulli.us>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_netlink.c |   10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/net/bridge/br_netlink.c
++++ b/net/bridge/br_netlink.c
+@@ -711,9 +711,17 @@ static int br_port_slave_changelink(stru
+                                   struct nlattr *tb[],
+                                   struct nlattr *data[])
+ {
++      struct net_bridge *br = netdev_priv(brdev);
++      int ret;
++
+       if (!data)
+               return 0;
+-      return br_setport(br_port_get_rtnl(dev), data);
++
++      spin_lock_bh(&br->lock);
++      ret = br_setport(br_port_get_rtnl(dev), data);
++      spin_unlock_bh(&br->lock);
++
++      return ret;
+ }
+ 
+ static int br_port_fill_slave_info(struct sk_buff *skb,
diff --git a/queue-4.1/fib_trie-drop-unnecessary-calls-to-leaf_pull_suffix.patch b/queue-4.1/fib_trie-drop-unnecessary-calls-to-leaf_pull_suffix.patch

new file mode 100644 (file)

index 0000000..ca42070
--- /dev/null
+++ b/queue-4.1/fib_trie-drop-unnecessary-calls-to-leaf_pull_suffix.patch
@@ -0,0 +1,50 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Alexander Duyck <alexander.h.duyck@redhat.com>
+Date: Mon, 27 Jul 2015 13:08:06 -0700
+Subject: fib_trie: Drop unnecessary calls to leaf_pull_suffix
+
+From: Alexander Duyck <alexander.h.duyck@redhat.com>
+
+[ Upstream commit 1513069edcf8dd86cfd8d5daef482b97d6b93df6 ]
+
+It was reported that update_suffix was taking a long time on systems where
+a large number of leaves were attached to a single node.  As it turns out
+fib_table_flush was calling update_suffix for each leaf that didn't have all
+of the aliases stripped from it.  As a result, on this large node removing
+one leaf would result in us calling update_suffix for every other leaf on
+the node.
+
+The fix is to just remove the calls to leaf_pull_suffix since they are
+redundant as we already have a call in resize that will go through and
+update the suffix length for the node before we exit out of
+fib_table_flush or fib_table_flush_external.
+
+Reported-by: David Ahern <dsa@cumulusnetworks.com>
+Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
+Tested-by: David Ahern <dsa@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fib_trie.c |    4 ----
+ 1 file changed, 4 deletions(-)
+
+--- a/net/ipv4/fib_trie.c
++++ b/net/ipv4/fib_trie.c
+@@ -1780,8 +1780,6 @@ void fib_table_flush_external(struct fib
+               if (hlist_empty(&n->leaf)) {
+                       put_child_root(pn, n->key, NULL);
+                       node_free(n);
+-              } else {
+-                      leaf_pull_suffix(pn, n);
+               }
+       }
+ }
+@@ -1852,8 +1850,6 @@ int fib_table_flush(struct fib_table *tb
+               if (hlist_empty(&n->leaf)) {
+                       put_child_root(pn, n->key, NULL);
+                       node_free(n);
+-              } else {
+-                      leaf_pull_suffix(pn, n);
+               }
+       }
+ 
diff --git a/queue-4.1/fq_codel-fix-a-use-after-free.patch b/queue-4.1/fq_codel-fix-a-use-after-free.patch

new file mode 100644 (file)

index 0000000..fc8a1ff
--- /dev/null
+++ b/queue-4.1/fq_codel-fix-a-use-after-free.patch
@@ -0,0 +1,34 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: WANG Cong <xiyou.wangcong@gmail.com>
+Date: Mon, 13 Jul 2015 12:30:07 -0700
+Subject: fq_codel: fix a use-after-free
+
+From: WANG Cong <xiyou.wangcong@gmail.com>
+
+[ Upstream commit 052cbda41fdc243a8d40cce7ab3a6327b4b2887e ]
+
+Fixes: 25331d6ce42b ("net: sched: implement qstat helper routines")
+Cc: John Fastabend <john.fastabend@gmail.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: Cong Wang <cwang@twopensource.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_fq_codel.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/sched/sch_fq_codel.c
++++ b/net/sched/sch_fq_codel.c
+@@ -162,10 +162,10 @@ static unsigned int fq_codel_drop(struct
+       skb = dequeue_head(flow);
+       len = qdisc_pkt_len(skb);
+       q->backlogs[idx] -= len;
+-      kfree_skb(skb);
+       sch->q.qlen--;
+       qdisc_qstats_drop(sch);
+       qdisc_qstats_backlog_dec(sch, skb);
++      kfree_skb(skb);
+       flow->dropped++;
+       return idx;
+ }
diff --git a/queue-4.1/inet-fix-possible-request-socket-leak.patch b/queue-4.1/inet-fix-possible-request-socket-leak.patch

new file mode 100644 (file)

index 0000000..ce311d5
--- /dev/null
+++ b/queue-4.1/inet-fix-possible-request-socket-leak.patch
@@ -0,0 +1,59 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 10 Aug 2015 15:07:34 -0700
+Subject: inet: fix possible request socket leak
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 3257d8b12f954c462d29de6201664a846328a522 ]
+
+In commit b357a364c57c9 ("inet: fix possible panic in
+reqsk_queue_unlink()"), I missed fact that tcp_check_req()
+can return the listener socket in one case, and that we must
+release the request socket refcount or we leak it.
+
+Tested:
+
+ Following packetdrill test template shows the issue
+
+0     socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0    setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
++0    bind(3, ..., ...) = 0
++0    listen(3, 1) = 0
+
++0    < S 0:0(0) win 2920 <mss 1460,sackOK,nop,nop>
++0    > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
++.002 < . 1:1(0) ack 21 win 2920
++0    > R 21:21(0)
+
+Fixes: b357a364c57c9 ("inet: fix possible panic in reqsk_queue_unlink()")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_ipv4.c |    2 +-
+ net/ipv6/tcp_ipv6.c |    2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -1348,7 +1348,7 @@ static struct sock *tcp_v4_hnd_req(struc
+       req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr);
+       if (req) {
+               nsk = tcp_check_req(sk, skb, req, false);
+-              if (!nsk)
++              if (!nsk || nsk == sk)
+                       reqsk_put(req);
+               return nsk;
+       }
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -946,7 +946,7 @@ static struct sock *tcp_v6_hnd_req(struc
+                                  &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb));
+       if (req) {
+               nsk = tcp_check_req(sk, skb, req, false);
+-              if (!nsk)
++              if (!nsk || nsk == sk)
+                       reqsk_put(req);
+               return nsk;
+       }
diff --git a/queue-4.1/inet-fix-races-with-reqsk-timers.patch b/queue-4.1/inet-fix-races-with-reqsk-timers.patch

new file mode 100644 (file)

index 0000000..1d482c2
--- /dev/null
+++ b/queue-4.1/inet-fix-races-with-reqsk-timers.patch
@@ -0,0 +1,58 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 10 Aug 2015 09:09:13 -0700
+Subject: inet: fix races with reqsk timers
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 2235f2ac75fd2501c251b0b699a9632e80239a6d ]
+
+reqsk_queue_destroy() and reqsk_queue_unlink() should use
+del_timer_sync() instead of del_timer() before calling reqsk_put(),
+otherwise we could free a req still used by another cpu.
+
+But before doing so, reqsk_queue_destroy() must release syn_wait_lock
+spinlock or risk a dead lock, as reqsk_timer_handler() might
+need to take this same spinlock from reqsk_queue_unlink() (called from
+inet_csk_reqsk_queue_drop())
+
+Fixes: fa76ce7328b2 ("inet: get rid of central tcp/dccp listener timer")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/request_sock.c         |    8 +++++++-
+ net/ipv4/inet_connection_sock.c |    2 +-
+ 2 files changed, 8 insertions(+), 2 deletions(-)
+
+--- a/net/core/request_sock.c
++++ b/net/core/request_sock.c
+@@ -103,10 +103,16 @@ void reqsk_queue_destroy(struct request_
+                       spin_lock_bh(&queue->syn_wait_lock);
+                       while ((req = lopt->syn_table[i]) != NULL) {
+                               lopt->syn_table[i] = req->dl_next;
++                              /* Because of following del_timer_sync(),
++                               * we must release the spinlock here
++                               * or risk a dead lock.
++                               */
++                              spin_unlock_bh(&queue->syn_wait_lock);
+                               atomic_inc(&lopt->qlen_dec);
+-                              if (del_timer(&req->rsk_timer))
++                              if (del_timer_sync(&req->rsk_timer))
+                                       reqsk_put(req);
+                               reqsk_put(req);
++                              spin_lock_bh(&queue->syn_wait_lock);
+                       }
+                       spin_unlock_bh(&queue->syn_wait_lock);
+               }
+--- a/net/ipv4/inet_connection_sock.c
++++ b/net/ipv4/inet_connection_sock.c
+@@ -584,7 +584,7 @@ static bool reqsk_queue_unlink(struct re
+       }
+ 
+       spin_unlock(&queue->syn_wait_lock);
+-      if (del_timer(&req->rsk_timer))
++      if (del_timer_sync(&req->rsk_timer))
+               reqsk_put(req);
+       return found;
+ }
diff --git a/queue-4.1/inet-frags-fix-defragmented-packet-s-ip-header-for-af_packet.patch b/queue-4.1/inet-frags-fix-defragmented-packet-s-ip-header-for-af_packet.patch

new file mode 100644 (file)

index 0000000..7d40443
--- /dev/null
+++ b/queue-4.1/inet-frags-fix-defragmented-packet-s-ip-header-for-af_packet.patch
@@ -0,0 +1,58 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Edward Hyunkoo Jee <edjee@google.com>
+Date: Tue, 21 Jul 2015 09:43:59 +0200
+Subject: inet: frags: fix defragmented packet's IP header for af_packet
+
+From: Edward Hyunkoo Jee <edjee@google.com>
+
+[ Upstream commit 0848f6428ba3a2e42db124d41ac6f548655735bf ]
+
+When ip_frag_queue() computes positions, it assumes that the passed
+sk_buff does not contain L2 headers.
+
+However, when PACKET_FANOUT_FLAG_DEFRAG is used, IP reassembly
+functions can be called on outgoing packets that contain L2 headers.
+
+Also, IPv4 checksum is not corrected after reassembly.
+
+Fixes: 7736d33f4262 ("packet: Add pre-defragmentation support for ipv4 fanouts.")
+Signed-off-by: Edward Hyunkoo Jee <edjee@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Willem de Bruijn <willemb@google.com>
+Cc: Jerry Chu <hkchu@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_fragment.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -342,7 +342,7 @@ static int ip_frag_queue(struct ipq *qp,
+       ihl = ip_hdrlen(skb);
+ 
+       /* Determine the position of this fragment. */
+-      end = offset + skb->len - ihl;
++      end = offset + skb->len - skb_network_offset(skb) - ihl;
+       err = -EINVAL;
+ 
+       /* Is this the final fragment? */
+@@ -372,7 +372,7 @@ static int ip_frag_queue(struct ipq *qp,
+               goto err;
+ 
+       err = -ENOMEM;
+-      if (!pskb_pull(skb, ihl))
++      if (!pskb_pull(skb, skb_network_offset(skb) + ihl))
+               goto err;
+ 
+       err = pskb_trim_rcsum(skb, end - offset);
+@@ -613,6 +613,9 @@ static int ip_frag_reasm(struct ipq *qp,
+       iph->frag_off = qp->q.max_size ? htons(IP_DF) : 0;
+       iph->tot_len = htons(len);
+       iph->tos |= ecn;
++
++      ip_send_check(iph);
++
+       IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS);
+       qp->q.fragments = NULL;
+       qp->q.fragments_tail = NULL;
diff --git a/queue-4.1/ip_tunnel-fix-ipv4-pmtu-check-to-honor-inner-ip-header-df.patch b/queue-4.1/ip_tunnel-fix-ipv4-pmtu-check-to-honor-inner-ip-header-df.patch

new file mode 100644 (file)

index 0000000..1596291
--- /dev/null
+++ b/queue-4.1/ip_tunnel-fix-ipv4-pmtu-check-to-honor-inner-ip-header-df.patch
@@ -0,0 +1,62 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
+Date: Tue, 7 Jul 2015 08:34:13 +0300
+Subject: ip_tunnel: fix ipv4 pmtu check to honor inner ip header df
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
+
+[ Upstream commit fc24f2b2094366da8786f59f2606307e934cea17 ]
+
+Frag needed should be sent only if the inner header asked
+to not fragment. Currently fragmentation is broken if the
+tunnel has df set, but df was not asked in the original
+packet. The tunnel's df needs to be still checked to update
+internally the pmtu cache.
+
+Commit 23a3647bc4f93bac broke it, and this commit fixes
+the ipv4 df check back to the way it was.
+
+Fixes: 23a3647bc4f93bac ("ip_tunnels: Use skb-len to PMTU check.")
+Cc: Pravin B Shelar <pshelar@nicira.com>
+Signed-off-by: Timo Teräs <timo.teras@iki.fi>
+Acked-by: Pravin B Shelar <pshelar@nicira.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_tunnel.c |    8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/net/ipv4/ip_tunnel.c
++++ b/net/ipv4/ip_tunnel.c
+@@ -586,7 +586,8 @@ int ip_tunnel_encap(struct sk_buff *skb,
+ EXPORT_SYMBOL(ip_tunnel_encap);
+ 
+ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
+-                          struct rtable *rt, __be16 df)
++                          struct rtable *rt, __be16 df,
++                          const struct iphdr *inner_iph)
+ {
+       struct ip_tunnel *tunnel = netdev_priv(dev);
+       int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
+@@ -603,7 +604,8 @@ static int tnl_update_pmtu(struct net_de
+ 
+       if (skb->protocol == htons(ETH_P_IP)) {
+               if (!skb_is_gso(skb) &&
+-                  (df & htons(IP_DF)) && mtu < pkt_size) {
++                  (inner_iph->frag_off & htons(IP_DF)) &&
++                  mtu < pkt_size) {
+                       memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+                       icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
+                       return -E2BIG;
+@@ -737,7 +739,7 @@ void ip_tunnel_xmit(struct sk_buff *skb,
+               goto tx_error;
+       }
+ 
+-      if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
++      if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
+               ip_rt_put(rt);
+               goto tx_error;
+       }
diff --git a/queue-4.1/ipv4-off-by-one-in-continuation-handling-in-proc-net-route.patch b/queue-4.1/ipv4-off-by-one-in-continuation-handling-in-proc-net-route.patch

new file mode 100644 (file)

index 0000000..cffa065
--- /dev/null
+++ b/queue-4.1/ipv4-off-by-one-in-continuation-handling-in-proc-net-route.patch
@@ -0,0 +1,51 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Andy Whitcroft <apw@canonical.com>
+Date: Thu, 13 Aug 2015 20:49:01 +0100
+Subject: ipv4: off-by-one in continuation handling in /proc/net/route
+
+From: Andy Whitcroft <apw@canonical.com>
+
+[ Upstream commit 25b97c016b26039982daaa2c11d83979f93b71ab ]
+
+When generating /proc/net/route we emit a header followed by a line for
+each route.  When a short read is performed we will restart this process
+based on the open file descriptor.  When calculating the start point we
+fail to take into account that the 0th entry is the header.  This leads
+us to skip the first entry when doing a continuation read.
+
+This can be easily seen with the comparison below:
+
+  while read l; do echo "$l"; done </proc/net/route >A
+  cat /proc/net/route >B
+  diff -bu A B | grep '^[+-]'
+
+On my example machine I have approximatly 10KB of route output.  There we
+see the very first non-title element is lost in the while read case,
+and an entry around the 8K mark in the cat case:
+
+  +wlan0 00000000 02021EAC 0003 0 0 400 00000000 0 0 0
+  -tun1  00C0AC0A 00000000 0001 0 0 950 00C0FFFF 0 0 0
+
+Fix up the off-by-one when reaquiring position on continuation.
+
+Fixes: 8be33e955cb9 ("fib_trie: Fib walk rcu should take a tnode and key instead of a trie and a leaf")
+BugLink: http://bugs.launchpad.net/bugs/1483440
+Acked-by: Alexander Duyck <alexander.h.duyck@redhat.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fib_trie.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/fib_trie.c
++++ b/net/ipv4/fib_trie.c
+@@ -2453,7 +2453,7 @@ static struct key_vector *fib_route_get_
+               key = l->key + 1;
+               iter->pos++;
+ 
+-              if (pos-- <= 0)
++              if (--pos <= 0)
+                       break;
+ 
+               l = NULL;
diff --git a/queue-4.1/ipv6-lock-socket-in-ip6_datagram_connect.patch b/queue-4.1/ipv6-lock-socket-in-ip6_datagram_connect.patch

new file mode 100644 (file)

index 0000000..ce5b461
--- /dev/null
+++ b/queue-4.1/ipv6-lock-socket-in-ip6_datagram_connect.patch
@@ -0,0 +1,126 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 14 Jul 2015 08:10:22 +0200
+Subject: ipv6: lock socket in ip6_datagram_connect()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 03645a11a570d52e70631838cb786eb4253eb463 ]
+
+ip6_datagram_connect() is doing a lot of socket changes without
+socket being locked.
+
+This looks wrong, at least for udp_lib_rehash() which could corrupt
+lists because of concurrent udp_sk(sk)->udp_portaddr_hash accesses.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/ip.h    |    1 +
+ net/ipv4/datagram.c |   16 ++++++++++++----
+ net/ipv6/datagram.c |   20 +++++++++++++++-----
+ 3 files changed, 28 insertions(+), 9 deletions(-)
+
+--- a/include/net/ip.h
++++ b/include/net/ip.h
+@@ -161,6 +161,7 @@ static inline __u8 get_rtconn_flags(stru
+ }
+ 
+ /* datagram.c */
++int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
+ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
+ 
+ void ip4_datagram_release_cb(struct sock *sk);
+--- a/net/ipv4/datagram.c
++++ b/net/ipv4/datagram.c
+@@ -20,7 +20,7 @@
+ #include <net/route.h>
+ #include <net/tcp_states.h>
+ 
+-int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
++int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+ {
+       struct inet_sock *inet = inet_sk(sk);
+       struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
+@@ -39,8 +39,6 @@ int ip4_datagram_connect(struct sock *sk
+ 
+       sk_dst_reset(sk);
+ 
+-      lock_sock(sk);
+-
+       oif = sk->sk_bound_dev_if;
+       saddr = inet->inet_saddr;
+       if (ipv4_is_multicast(usin->sin_addr.s_addr)) {
+@@ -82,9 +80,19 @@ int ip4_datagram_connect(struct sock *sk
+       sk_dst_set(sk, &rt->dst);
+       err = 0;
+ out:
+-      release_sock(sk);
+       return err;
+ }
++EXPORT_SYMBOL(__ip4_datagram_connect);
++
++int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
++{
++      int res;
++
++      lock_sock(sk);
++      res = __ip4_datagram_connect(sk, uaddr, addr_len);
++      release_sock(sk);
++      return res;
++}
+ EXPORT_SYMBOL(ip4_datagram_connect);
+ 
+ /* Because UDP xmit path can manipulate sk_dst_cache without holding
+--- a/net/ipv6/datagram.c
++++ b/net/ipv6/datagram.c
+@@ -40,7 +40,7 @@ static bool ipv6_mapped_addr_any(const s
+       return ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0);
+ }
+ 
+-int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
++static int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+ {
+       struct sockaddr_in6     *usin = (struct sockaddr_in6 *) uaddr;
+       struct inet_sock        *inet = inet_sk(sk);
+@@ -56,7 +56,7 @@ int ip6_datagram_connect(struct sock *sk
+       if (usin->sin6_family == AF_INET) {
+               if (__ipv6_only_sock(sk))
+                       return -EAFNOSUPPORT;
+-              err = ip4_datagram_connect(sk, uaddr, addr_len);
++              err = __ip4_datagram_connect(sk, uaddr, addr_len);
+               goto ipv4_connected;
+       }
+ 
+@@ -98,9 +98,9 @@ int ip6_datagram_connect(struct sock *sk
+               sin.sin_addr.s_addr = daddr->s6_addr32[3];
+               sin.sin_port = usin->sin6_port;
+ 
+-              err = ip4_datagram_connect(sk,
+-                                         (struct sockaddr *) &sin,
+-                                         sizeof(sin));
++              err = __ip4_datagram_connect(sk,
++                                           (struct sockaddr *) &sin,
++                                           sizeof(sin));
+ 
+ ipv4_connected:
+               if (err)
+@@ -204,6 +204,16 @@ out:
+       fl6_sock_release(flowlabel);
+       return err;
+ }
++
++int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
++{
++      int res;
++
++      lock_sock(sk);
++      res = __ip6_datagram_connect(sk, uaddr, addr_len);
++      release_sock(sk);
++      return res;
++}
+ EXPORT_SYMBOL_GPL(ip6_datagram_connect);
+ 
+ int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *uaddr,
diff --git a/queue-4.1/ipv6-make-mld-packets-to-only-be-processed-locally.patch b/queue-4.1/ipv6-make-mld-packets-to-only-be-processed-locally.patch

new file mode 100644 (file)

index 0000000..7562de5
--- /dev/null
+++ b/queue-4.1/ipv6-make-mld-packets-to-only-be-processed-locally.patch
@@ -0,0 +1,40 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Angga <Hermin.Anggawijaya@alliedtelesis.co.nz>
+Date: Fri, 3 Jul 2015 14:40:52 +1200
+Subject: ipv6: Make MLD packets to only be processed locally
+
+From: Angga <Hermin.Anggawijaya@alliedtelesis.co.nz>
+
+[ Upstream commit 4c938d22c88a9ddccc8c55a85e0430e9c62b1ac5 ]
+
+Before commit daad151263cf ("ipv6: Make ipv6_is_mld() inline and use it
+from ip6_mc_input().") MLD packets were only processed locally. After the
+change, a copy of MLD packet goes through ip6_mr_input, causing
+MRT6MSG_NOCACHE message to be generated to user space.
+
+Make MLD packet only processed locally.
+
+Fixes: daad151263cf ("ipv6: Make ipv6_is_mld() inline and use it from ip6_mc_input().")
+Signed-off-by: Hermin Anggawijaya <hermin.anggawijaya@alliedtelesis.co.nz>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_input.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/net/ipv6/ip6_input.c
++++ b/net/ipv6/ip6_input.c
+@@ -331,10 +331,10 @@ int ip6_mc_input(struct sk_buff *skb)
+                               if (offset < 0)
+                                       goto out;
+ 
+-                              if (!ipv6_is_mld(skb, nexthdr, offset))
+-                                      goto out;
++                              if (ipv6_is_mld(skb, nexthdr, offset))
++                                      deliver = true;
+ 
+-                              deliver = true;
++                              goto out;
+                       }
+                       /* unknown RA - process it normally */
+               }
diff --git a/queue-4.1/isdn-gigaset-reset-tty-receive_room-when-attaching-ser_gigaset.patch b/queue-4.1/isdn-gigaset-reset-tty-receive_room-when-attaching-ser_gigaset.patch

new file mode 100644 (file)

index 0000000..b190616
--- /dev/null
+++ b/queue-4.1/isdn-gigaset-reset-tty-receive_room-when-attaching-ser_gigaset.patch
@@ -0,0 +1,52 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Tilman Schmidt <tilman@imap.cc>
+Date: Tue, 14 Jul 2015 00:37:13 +0200
+Subject: isdn/gigaset: reset tty->receive_room when attaching ser_gigaset
+
+From: Tilman Schmidt <tilman@imap.cc>
+
+[ Upstream commit fd98e9419d8d622a4de91f76b306af6aa627aa9c ]
+
+Commit 79901317ce80 ("n_tty: Don't flush buffer when closing ldisc"),
+first merged in kernel release 3.10, caused the following regression
+in the Gigaset M101 driver:
+
+Before that commit, when closing the N_TTY line discipline in
+preparation to switching to N_GIGASET_M101, receive_room would be
+reset to a non-zero value by the call to n_tty_flush_buffer() in
+n_tty's close method. With the removal of that call, receive_room
+might be left at zero, blocking data reception on the serial line.
+
+The present patch fixes that regression by setting receive_room
+to an appropriate value in the ldisc open method.
+
+Fixes: 79901317ce80 ("n_tty: Don't flush buffer when closing ldisc")
+Signed-off-by: Tilman Schmidt <tilman@imap.cc>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/isdn/gigaset/ser-gigaset.c |   11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+--- a/drivers/isdn/gigaset/ser-gigaset.c
++++ b/drivers/isdn/gigaset/ser-gigaset.c
+@@ -524,9 +524,18 @@ gigaset_tty_open(struct tty_struct *tty)
+       cs->hw.ser->tty = tty;
+       atomic_set(&cs->hw.ser->refcnt, 1);
+       init_completion(&cs->hw.ser->dead_cmp);
+-
+       tty->disc_data = cs;
+ 
++      /* Set the amount of data we're willing to receive per call
++       * from the hardware driver to half of the input buffer size
++       * to leave some reserve.
++       * Note: We don't do flow control towards the hardware driver.
++       * If more data is received than will fit into the input buffer,
++       * it will be dropped and an error will be logged. This should
++       * never happen as the device is slow and the buffer size ample.
++       */
++      tty->receive_room = RBUFSIZE/2;
++
+       /* OK.. Initialization of the datastructures and the HW is done.. Now
+        * startup system and notify the LL that we are ready to run
+        */
diff --git a/queue-4.1/jbd2-avoid-infinite-loop-when-destroying-aborted-journal.patch b/queue-4.1/jbd2-avoid-infinite-loop-when-destroying-aborted-journal.patch

new file mode 100644 (file)

index 0000000..8a66d98
--- /dev/null
+++ b/queue-4.1/jbd2-avoid-infinite-loop-when-destroying-aborted-journal.patch
@@ -0,0 +1,169 @@
+From 841df7df196237ea63233f0f9eaa41db53afd70f Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.com>
+Date: Tue, 28 Jul 2015 14:57:14 -0400
+Subject: jbd2: avoid infinite loop when destroying aborted journal
+
+From: Jan Kara <jack@suse.com>
+
+commit 841df7df196237ea63233f0f9eaa41db53afd70f upstream.
+
+Commit 6f6a6fda2945 "jbd2: fix ocfs2 corrupt when updating journal
+superblock fails" changed jbd2_cleanup_journal_tail() to return EIO
+when the journal is aborted. That makes logic in
+jbd2_log_do_checkpoint() bail out which is fine, except that
+jbd2_journal_destroy() expects jbd2_log_do_checkpoint() to always make
+a progress in cleaning the journal. Without it jbd2_journal_destroy()
+just loops in an infinite loop.
+
+Fix jbd2_journal_destroy() to cleanup journal checkpoint lists of
+jbd2_log_do_checkpoint() fails with error.
+
+Reported-by: Eryu Guan <guaneryu@gmail.com>
+Tested-by: Eryu Guan <guaneryu@gmail.com>
+Fixes: 6f6a6fda294506dfe0e3e0a253bb2d2923f28f0a
+Signed-off-by: Jan Kara <jack@suse.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/jbd2/checkpoint.c |   39 +++++++++++++++++++++++++++++++++------
+ fs/jbd2/commit.c     |    2 +-
+ fs/jbd2/journal.c    |   11 ++++++++++-
+ include/linux/jbd2.h |    3 ++-
+ 4 files changed, 46 insertions(+), 9 deletions(-)
+
+--- a/fs/jbd2/checkpoint.c
++++ b/fs/jbd2/checkpoint.c
+@@ -417,12 +417,12 @@ int jbd2_cleanup_journal_tail(journal_t
+  * journal_clean_one_cp_list
+  *
+  * Find all the written-back checkpoint buffers in the given list and
+- * release them.
++ * release them. If 'destroy' is set, clean all buffers unconditionally.
+  *
+  * Called with j_list_lock held.
+  * Returns 1 if we freed the transaction, 0 otherwise.
+  */
+-static int journal_clean_one_cp_list(struct journal_head *jh)
++static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy)
+ {
+       struct journal_head *last_jh;
+       struct journal_head *next_jh = jh;
+@@ -436,7 +436,10 @@ static int journal_clean_one_cp_list(str
+       do {
+               jh = next_jh;
+               next_jh = jh->b_cpnext;
+-              ret = __try_to_free_cp_buf(jh);
++              if (!destroy)
++                      ret = __try_to_free_cp_buf(jh);
++              else
++                      ret = __jbd2_journal_remove_checkpoint(jh) + 1;
+               if (!ret)
+                       return freed;
+               if (ret == 2)
+@@ -459,10 +462,11 @@ static int journal_clean_one_cp_list(str
+  * journal_clean_checkpoint_list
+  *
+  * Find all the written-back checkpoint buffers in the journal and release them.
++ * If 'destroy' is set, release all buffers unconditionally.
+  *
+  * Called with j_list_lock held.
+  */
+-void __jbd2_journal_clean_checkpoint_list(journal_t *journal)
++void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
+ {
+       transaction_t *transaction, *last_transaction, *next_transaction;
+       int ret;
+@@ -476,7 +480,8 @@ void __jbd2_journal_clean_checkpoint_lis
+       do {
+               transaction = next_transaction;
+               next_transaction = transaction->t_cpnext;
+-              ret = journal_clean_one_cp_list(transaction->t_checkpoint_list);
++              ret = journal_clean_one_cp_list(transaction->t_checkpoint_list,
++                                              destroy);
+               /*
+                * This function only frees up some memory if possible so we
+                * dont have an obligation to finish processing. Bail out if
+@@ -492,7 +497,7 @@ void __jbd2_journal_clean_checkpoint_lis
+                * we can possibly see not yet submitted buffers on io_list
+                */
+               ret = journal_clean_one_cp_list(transaction->
+-                              t_checkpoint_io_list);
++                              t_checkpoint_io_list, destroy);
+               if (need_resched())
+                       return;
+               /*
+@@ -506,6 +511,28 @@ void __jbd2_journal_clean_checkpoint_lis
+ }
+ 
+ /*
++ * Remove buffers from all checkpoint lists as journal is aborted and we just
++ * need to free memory
++ */
++void jbd2_journal_destroy_checkpoint(journal_t *journal)
++{
++      /*
++       * We loop because __jbd2_journal_clean_checkpoint_list() may abort
++       * early due to a need of rescheduling.
++       */
++      while (1) {
++              spin_lock(&journal->j_list_lock);
++              if (!journal->j_checkpoint_transactions) {
++                      spin_unlock(&journal->j_list_lock);
++                      break;
++              }
++              __jbd2_journal_clean_checkpoint_list(journal, true);
++              spin_unlock(&journal->j_list_lock);
++              cond_resched();
++      }
++}
++
++/*
+  * journal_remove_checkpoint: called after a buffer has been committed
+  * to disk (either by being write-back flushed to disk, or being
+  * committed to the log).
+--- a/fs/jbd2/commit.c
++++ b/fs/jbd2/commit.c
+@@ -510,7 +510,7 @@ void jbd2_journal_commit_transaction(jou
+        * frees some memory
+        */
+       spin_lock(&journal->j_list_lock);
+-      __jbd2_journal_clean_checkpoint_list(journal);
++      __jbd2_journal_clean_checkpoint_list(journal, false);
+       spin_unlock(&journal->j_list_lock);
+ 
+       jbd_debug(3, "JBD2: commit phase 1\n");
+--- a/fs/jbd2/journal.c
++++ b/fs/jbd2/journal.c
+@@ -1708,8 +1708,17 @@ int jbd2_journal_destroy(journal_t *jour
+       while (journal->j_checkpoint_transactions != NULL) {
+               spin_unlock(&journal->j_list_lock);
+               mutex_lock(&journal->j_checkpoint_mutex);
+-              jbd2_log_do_checkpoint(journal);
++              err = jbd2_log_do_checkpoint(journal);
+               mutex_unlock(&journal->j_checkpoint_mutex);
++              /*
++               * If checkpointing failed, just free the buffers to avoid
++               * looping forever
++               */
++              if (err) {
++                      jbd2_journal_destroy_checkpoint(journal);
++                      spin_lock(&journal->j_list_lock);
++                      break;
++              }
+               spin_lock(&journal->j_list_lock);
+       }
+ 
+--- a/include/linux/jbd2.h
++++ b/include/linux/jbd2.h
+@@ -1042,8 +1042,9 @@ void jbd2_update_log_tail(journal_t *jou
+ extern void jbd2_journal_commit_transaction(journal_t *);
+ 
+ /* Checkpoint list management */
+-void __jbd2_journal_clean_checkpoint_list(journal_t *journal);
++void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy);
+ int __jbd2_journal_remove_checkpoint(struct journal_head *);
++void jbd2_journal_destroy_checkpoint(journal_t *journal);
+ void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *);
+ 
+ 
diff --git a/queue-4.1/net-call-rcu_read_lock-early-in-process_backlog.patch b/queue-4.1/net-call-rcu_read_lock-early-in-process_backlog.patch

new file mode 100644 (file)

index 0000000..9524b44
--- /dev/null
+++ b/queue-4.1/net-call-rcu_read_lock-early-in-process_backlog.patch
@@ -0,0 +1,152 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Julian Anastasov <ja@ssi.bg>
+Date: Thu, 9 Jul 2015 09:59:10 +0300
+Subject: net: call rcu_read_lock early in process_backlog
+
+From: Julian Anastasov <ja@ssi.bg>
+
+[ Upstream commit 2c17d27c36dcce2b6bf689f41a46b9e909877c21 ]
+
+Incoming packet should be either in backlog queue or
+in RCU read-side section. Otherwise, the final sequence of
+flush_backlog() and synchronize_net() may miss packets
+that can run without device reference:
+
+CPU 1                  CPU 2
+                       skb->dev: no reference
+                       process_backlog:__skb_dequeue
+                       process_backlog:local_irq_enable
+
+on_each_cpu for
+flush_backlog =>       IPI(hardirq): flush_backlog
+                       - packet not found in backlog
+
+                       CPU delayed ...
+synchronize_net
+- no ongoing RCU
+read-side sections
+
+netdev_run_todo,
+rcu_barrier: no
+ongoing callbacks
+                       __netif_receive_skb_core:rcu_read_lock
+                       - too late
+free dev
+                       process packet for freed dev
+
+Fixes: 6e583ce5242f ("net: eliminate refcounting in backlog queue")
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Cc: Stephen Hemminger <stephen@networkplumber.org>
+Signed-off-by: Julian Anastasov <ja@ssi.bg>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c |   30 +++++++++++++++---------------
+ 1 file changed, 15 insertions(+), 15 deletions(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -3666,8 +3666,6 @@ static int __netif_receive_skb_core(stru
+ 
+       pt_prev = NULL;
+ 
+-      rcu_read_lock();
+-
+ another_round:
+       skb->skb_iif = skb->dev->ifindex;
+ 
+@@ -3677,7 +3675,7 @@ another_round:
+           skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
+               skb = skb_vlan_untag(skb);
+               if (unlikely(!skb))
+-                      goto unlock;
++                      goto out;
+       }
+ 
+ #ifdef CONFIG_NET_CLS_ACT
+@@ -3707,7 +3705,7 @@ skip_taps:
+       if (static_key_false(&ingress_needed)) {
+               skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
+               if (!skb)
+-                      goto unlock;
++                      goto out;
+       }
+ 
+       skb->tc_verd = 0;
+@@ -3724,7 +3722,7 @@ ncls:
+               if (vlan_do_receive(&skb))
+                       goto another_round;
+               else if (unlikely(!skb))
+-                      goto unlock;
++                      goto out;
+       }
+ 
+       rx_handler = rcu_dereference(skb->dev->rx_handler);
+@@ -3736,7 +3734,7 @@ ncls:
+               switch (rx_handler(&skb)) {
+               case RX_HANDLER_CONSUMED:
+                       ret = NET_RX_SUCCESS;
+-                      goto unlock;
++                      goto out;
+               case RX_HANDLER_ANOTHER:
+                       goto another_round;
+               case RX_HANDLER_EXACT:
+@@ -3790,8 +3788,7 @@ drop:
+               ret = NET_RX_DROP;
+       }
+ 
+-unlock:
+-      rcu_read_unlock();
++out:
+       return ret;
+ }
+ 
+@@ -3822,29 +3819,30 @@ static int __netif_receive_skb(struct sk
+ 
+ static int netif_receive_skb_internal(struct sk_buff *skb)
+ {
++      int ret;
++
+       net_timestamp_check(netdev_tstamp_prequeue, skb);
+ 
+       if (skb_defer_rx_timestamp(skb))
+               return NET_RX_SUCCESS;
+ 
++      rcu_read_lock();
++
+ #ifdef CONFIG_RPS
+       if (static_key_false(&rps_needed)) {
+               struct rps_dev_flow voidflow, *rflow = &voidflow;
+-              int cpu, ret;
+-
+-              rcu_read_lock();
+-
+-              cpu = get_rps_cpu(skb->dev, skb, &rflow);
++              int cpu = get_rps_cpu(skb->dev, skb, &rflow);
+ 
+               if (cpu >= 0) {
+                       ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
+                       rcu_read_unlock();
+                       return ret;
+               }
+-              rcu_read_unlock();
+       }
+ #endif
+-      return __netif_receive_skb(skb);
++      ret = __netif_receive_skb(skb);
++      rcu_read_unlock();
++      return ret;
+ }
+ 
+ /**
+@@ -4389,8 +4387,10 @@ static int process_backlog(struct napi_s
+               struct sk_buff *skb;
+ 
+               while ((skb = __skb_dequeue(&sd->process_queue))) {
++                      rcu_read_lock();
+                       local_irq_enable();
+                       __netif_receive_skb(skb);
++                      rcu_read_unlock();
+                       local_irq_disable();
+                       input_queue_head_incr(sd);
+                       if (++work >= quota) {
diff --git a/queue-4.1/net-clone-skb-before-setting-peeked-flag.patch b/queue-4.1/net-clone-skb-before-setting-peeked-flag.patch

new file mode 100644 (file)

index 0000000..f9f2577
--- /dev/null
+++ b/queue-4.1/net-clone-skb-before-setting-peeked-flag.patch
@@ -0,0 +1,108 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Herbert Xu <herbert@gondor.apana.org.au>
+Date: Mon, 13 Jul 2015 16:04:13 +0800
+Subject: net: Clone skb before setting peeked flag
+
+From: Herbert Xu <herbert@gondor.apana.org.au>
+
+[ Upstream commit 738ac1ebb96d02e0d23bc320302a6ea94c612dec ]
+
+Shared skbs must not be modified and this is crucial for broadcast
+and/or multicast paths where we use it as an optimisation to avoid
+unnecessary cloning.
+
+The function skb_recv_datagram breaks this rule by setting peeked
+without cloning the skb first.  This causes funky races which leads
+to double-free.
+
+This patch fixes this by cloning the skb and replacing the skb
+in the list when setting skb->peeked.
+
+Fixes: a59322be07c9 ("[UDP]: Only increment counter on first peek/recv")
+Reported-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/datagram.c |   41 ++++++++++++++++++++++++++++++++++++++---
+ 1 file changed, 38 insertions(+), 3 deletions(-)
+
+--- a/net/core/datagram.c
++++ b/net/core/datagram.c
+@@ -131,6 +131,35 @@ out_noerr:
+       goto out;
+ }
+ 
++static int skb_set_peeked(struct sk_buff *skb)
++{
++      struct sk_buff *nskb;
++
++      if (skb->peeked)
++              return 0;
++
++      /* We have to unshare an skb before modifying it. */
++      if (!skb_shared(skb))
++              goto done;
++
++      nskb = skb_clone(skb, GFP_ATOMIC);
++      if (!nskb)
++              return -ENOMEM;
++
++      skb->prev->next = nskb;
++      skb->next->prev = nskb;
++      nskb->prev = skb->prev;
++      nskb->next = skb->next;
++
++      consume_skb(skb);
++      skb = nskb;
++
++done:
++      skb->peeked = 1;
++
++      return 0;
++}
++
+ /**
+  *    __skb_recv_datagram - Receive a datagram skbuff
+  *    @sk: socket
+@@ -165,7 +194,9 @@ out_noerr:
+ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
+                                   int *peeked, int *off, int *err)
+ {
++      struct sk_buff_head *queue = &sk->sk_receive_queue;
+       struct sk_buff *skb, *last;
++      unsigned long cpu_flags;
+       long timeo;
+       /*
+        * Caller is allowed not to check sk->sk_err before skb_recv_datagram()
+@@ -184,8 +215,6 @@ struct sk_buff *__skb_recv_datagram(stru
+                * Look at current nfs client by the way...
+                * However, this function was correct in any case. 8)
+                */
+-              unsigned long cpu_flags;
+-              struct sk_buff_head *queue = &sk->sk_receive_queue;
+               int _off = *off;
+ 
+               last = (struct sk_buff *)queue;
+@@ -199,7 +228,11 @@ struct sk_buff *__skb_recv_datagram(stru
+                                       _off -= skb->len;
+                                       continue;
+                               }
+-                              skb->peeked = 1;
++
++                              error = skb_set_peeked(skb);
++                              if (error)
++                                      goto unlock_err;
++
+                               atomic_inc(&skb->users);
+                       } else
+                               __skb_unlink(skb, queue);
+@@ -223,6 +256,8 @@ struct sk_buff *__skb_recv_datagram(stru
+ 
+       return NULL;
+ 
++unlock_err:
++      spin_unlock_irqrestore(&queue->lock, cpu_flags);
+ no_packet:
+       *err = error;
+       return NULL;
diff --git a/queue-4.1/net-do-not-process-device-backlog-during-unregistration.patch b/queue-4.1/net-do-not-process-device-backlog-during-unregistration.patch

new file mode 100644 (file)

index 0000000..9df5164
--- /dev/null
+++ b/queue-4.1/net-do-not-process-device-backlog-during-unregistration.patch
@@ -0,0 +1,85 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Julian Anastasov <ja@ssi.bg>
+Date: Thu, 9 Jul 2015 09:59:09 +0300
+Subject: net: do not process device backlog during unregistration
+
+From: Julian Anastasov <ja@ssi.bg>
+
+[ Upstream commit e9e4dd3267d0c5234c5c0f47440456b10875dec9 ]
+
+commit 381c759d9916 ("ipv4: Avoid crashing in ip_error")
+fixes a problem where processed packet comes from device
+with destroyed inetdev (dev->ip_ptr). This is not expected
+because inetdev_destroy is called in NETDEV_UNREGISTER
+phase and packets should not be processed after
+dev_close_many() and synchronize_net(). Above fix is still
+required because inetdev_destroy can be called for other
+reasons. But it shows the real problem: backlog can keep
+packets for long time and they do not hold reference to
+device. Such packets are then delivered to upper levels
+at the same time when device is unregistered.
+Calling flush_backlog after NETDEV_UNREGISTER_FINAL still
+accounts all packets from backlog but before that some packets
+continue to be delivered to upper levels long after the
+synchronize_net call which is supposed to wait the last
+ones. Also, as Eric pointed out, processed packets, mostly
+from other devices, can continue to add new packets to backlog.
+
+Fix the problem by moving flush_backlog early, after the
+device driver is stopped and before the synchronize_net() call.
+Then use netif_running check to make sure we do not add more
+packets to backlog. We have to do it in enqueue_to_backlog
+context when the local IRQ is disabled. As result, after the
+flush_backlog and synchronize_net sequence all packets
+should be accounted.
+
+Thanks to Eric W. Biederman for the test script and his
+valuable feedback!
+
+Reported-by: Vittorio Gambaletta <linuxbugs@vittgam.net>
+Fixes: 6e583ce5242f ("net: eliminate refcounting in backlog queue")
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Cc: Stephen Hemminger <stephen@networkplumber.org>
+Signed-off-by: Julian Anastasov <ja@ssi.bg>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -3337,6 +3337,8 @@ static int enqueue_to_backlog(struct sk_
+       local_irq_save(flags);
+ 
+       rps_lock(sd);
++      if (!netif_running(skb->dev))
++              goto drop;
+       qlen = skb_queue_len(&sd->input_pkt_queue);
+       if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {
+               if (qlen) {
+@@ -3358,6 +3360,7 @@ enqueue:
+               goto enqueue;
+       }
+ 
++drop:
+       sd->dropped++;
+       rps_unlock(sd);
+ 
+@@ -6023,6 +6026,7 @@ static void rollback_registered_many(str
+               unlist_netdevice(dev);
+ 
+               dev->reg_state = NETREG_UNREGISTERING;
++              on_each_cpu(flush_backlog, dev, 1);
+       }
+ 
+       synchronize_net();
+@@ -6647,8 +6651,6 @@ void netdev_run_todo(void)
+ 
+               dev->reg_state = NETREG_UNREGISTERED;
+ 
+-              on_each_cpu(flush_backlog, dev, 1);
+-
+               netdev_wait_allrefs(dev);
+ 
+               /* paranoia */
diff --git a/queue-4.1/net-dsa-do-not-override-phy-interface-if-already-configured.patch b/queue-4.1/net-dsa-do-not-override-phy-interface-if-already-configured.patch

new file mode 100644 (file)

index 0000000..0b740ac
--- /dev/null
+++ b/queue-4.1/net-dsa-do-not-override-phy-interface-if-already-configured.patch
@@ -0,0 +1,37 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Sat, 8 Aug 2015 12:58:57 -0700
+Subject: net: dsa: Do not override PHY interface if already configured
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+[ Upstream commit 211c504a444710b1d8ce3431ac19f2578602ca27 ]
+
+In case we need to divert reads/writes using the slave MII bus, we may have
+already fetched a valid PHY interface property from Device Tree, and that
+mode is used by the PHY driver to make configuration decisions.
+
+If we could not fetch the "phy-mode" property, we will assign p->phy_interface
+to PHY_INTERFACE_MODE_NA, such that we can actually check for that condition as
+to whether or not we should override the interface value.
+
+Fixes: 19334920eaf7 ("net: dsa: Set valid phy interface type")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dsa/slave.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/dsa/slave.c
++++ b/net/dsa/slave.c
+@@ -732,7 +732,8 @@ static int dsa_slave_phy_connect(struct
+               return -ENODEV;
+ 
+       /* Use already configured phy mode */
+-      p->phy_interface = p->phy->interface;
++      if (p->phy_interface == PHY_INTERFACE_MODE_NA)
++              p->phy_interface = p->phy->interface;
+       phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link,
+                          p->phy_interface);
+ 
diff --git a/queue-4.1/net-fix-skb-csum-races-when-peeking.patch b/queue-4.1/net-fix-skb-csum-races-when-peeking.patch

new file mode 100644 (file)

index 0000000..79193ca
--- /dev/null
+++ b/queue-4.1/net-fix-skb-csum-races-when-peeking.patch
@@ -0,0 +1,60 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Herbert Xu <herbert@gondor.apana.org.au>
+Date: Mon, 13 Jul 2015 20:01:42 +0800
+Subject: net: Fix skb csum races when peeking
+
+From: Herbert Xu <herbert@gondor.apana.org.au>
+
+[ Upstream commit 89c22d8c3b278212eef6a8cc66b570bc840a6f5a ]
+
+When we calculate the checksum on the recv path, we store the
+result in the skb as an optimisation in case we need the checksum
+again down the line.
+
+This is in fact bogus for the MSG_PEEK case as this is done without
+any locking.  So multiple threads can peek and then store the result
+to the same skb, potentially resulting in bogus skb states.
+
+This patch fixes this by only storing the result if the skb is not
+shared.  This preserves the optimisations for the few cases where
+it can be done safely due to locking or other reasons, e.g., SIOCINQ.
+
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/datagram.c |   15 +++++++++------
+ 1 file changed, 9 insertions(+), 6 deletions(-)
+
+--- a/net/core/datagram.c
++++ b/net/core/datagram.c
+@@ -657,7 +657,8 @@ __sum16 __skb_checksum_complete_head(str
+                   !skb->csum_complete_sw)
+                       netdev_rx_csum_fault(skb->dev);
+       }
+-      skb->csum_valid = !sum;
++      if (!skb_shared(skb))
++              skb->csum_valid = !sum;
+       return sum;
+ }
+ EXPORT_SYMBOL(__skb_checksum_complete_head);
+@@ -677,11 +678,13 @@ __sum16 __skb_checksum_complete(struct s
+                       netdev_rx_csum_fault(skb->dev);
+       }
+ 
+-      /* Save full packet checksum */
+-      skb->csum = csum;
+-      skb->ip_summed = CHECKSUM_COMPLETE;
+-      skb->csum_complete_sw = 1;
+-      skb->csum_valid = !sum;
++      if (!skb_shared(skb)) {
++              /* Save full packet checksum */
++              skb->csum = csum;
++              skb->ip_summed = CHECKSUM_COMPLETE;
++              skb->csum_complete_sw = 1;
++              skb->csum_valid = !sum;
++      }
+ 
+       return sum;
+ }
diff --git a/queue-4.1/net-fix-skb_set_peeked-use-after-free-bug.patch b/queue-4.1/net-fix-skb_set_peeked-use-after-free-bug.patch

new file mode 100644 (file)

index 0000000..1ca621e
--- /dev/null
+++ b/queue-4.1/net-fix-skb_set_peeked-use-after-free-bug.patch
@@ -0,0 +1,76 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Herbert Xu <herbert@gondor.apana.org.au>
+Date: Tue, 4 Aug 2015 15:42:47 +0800
+Subject: net: Fix skb_set_peeked use-after-free bug
+
+From: Herbert Xu <herbert@gondor.apana.org.au>
+
+[ Upstream commit a0a2a6602496a45ae838a96db8b8173794b5d398 ]
+
+The commit 738ac1ebb96d02e0d23bc320302a6ea94c612dec ("net: Clone
+skb before setting peeked flag") introduced a use-after-free bug
+in skb_recv_datagram.  This is because skb_set_peeked may create
+a new skb and free the existing one.  As it stands the caller will
+continue to use the old freed skb.
+
+This patch fixes it by making skb_set_peeked return the new skb
+(or the old one if unchanged).
+
+Fixes: 738ac1ebb96d ("net: Clone skb before setting peeked flag")
+Reported-by: Brenden Blanco <bblanco@plumgrid.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Tested-by: Brenden Blanco <bblanco@plumgrid.com>
+Reviewed-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/datagram.c |   13 +++++++------
+ 1 file changed, 7 insertions(+), 6 deletions(-)
+
+--- a/net/core/datagram.c
++++ b/net/core/datagram.c
+@@ -131,12 +131,12 @@ out_noerr:
+       goto out;
+ }
+ 
+-static int skb_set_peeked(struct sk_buff *skb)
++static struct sk_buff *skb_set_peeked(struct sk_buff *skb)
+ {
+       struct sk_buff *nskb;
+ 
+       if (skb->peeked)
+-              return 0;
++              return skb;
+ 
+       /* We have to unshare an skb before modifying it. */
+       if (!skb_shared(skb))
+@@ -144,7 +144,7 @@ static int skb_set_peeked(struct sk_buff
+ 
+       nskb = skb_clone(skb, GFP_ATOMIC);
+       if (!nskb)
+-              return -ENOMEM;
++              return ERR_PTR(-ENOMEM);
+ 
+       skb->prev->next = nskb;
+       skb->next->prev = nskb;
+@@ -157,7 +157,7 @@ static int skb_set_peeked(struct sk_buff
+ done:
+       skb->peeked = 1;
+ 
+-      return 0;
++      return skb;
+ }
+ 
+ /**
+@@ -229,8 +229,9 @@ struct sk_buff *__skb_recv_datagram(stru
+                                       continue;
+                               }
+ 
+-                              error = skb_set_peeked(skb);
+-                              if (error)
++                              skb = skb_set_peeked(skb);
++                              error = PTR_ERR(skb);
++                              if (IS_ERR(skb))
+                                       goto unlock_err;
+ 
+                               atomic_inc(&skb->users);
diff --git a/queue-4.1/net-graceful-exit-from-netif_alloc_netdev_queues.patch b/queue-4.1/net-graceful-exit-from-netif_alloc_netdev_queues.patch

new file mode 100644 (file)

index 0000000..451812b
--- /dev/null
+++ b/queue-4.1/net-graceful-exit-from-netif_alloc_netdev_queues.patch
@@ -0,0 +1,36 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 6 Jul 2015 17:13:26 +0200
+Subject: net: graceful exit from netif_alloc_netdev_queues()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit d339727c2b1a10f25e6636670ab6e1841170e328 ]
+
+User space can crash kernel with
+
+ip link add ifb10 numtxqueues 100000 type ifb
+
+We must replace a BUG_ON() by proper test and return -EINVAL for
+crazy values.
+
+Fixes: 60877a32bce00 ("net: allow large number of tx queues")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -6297,7 +6297,8 @@ static int netif_alloc_netdev_queues(str
+       struct netdev_queue *tx;
+       size_t sz = count * sizeof(*tx);
+ 
+-      BUG_ON(count < 1 || count > 0xffff);
++      if (count < 1 || count > 0xffff)
++              return -EINVAL;
+ 
+       tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
+       if (!tx) {
diff --git a/queue-4.1/net-mlx4_core-fix-wrong-index-in-propagating-port-change-event-to-vfs.patch b/queue-4.1/net-mlx4_core-fix-wrong-index-in-propagating-port-change-event-to-vfs.patch

new file mode 100644 (file)

index 0000000..72c2a4b
--- /dev/null
+++ b/queue-4.1/net-mlx4_core-fix-wrong-index-in-propagating-port-change-event-to-vfs.patch
@@ -0,0 +1,45 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Date: Wed, 22 Jul 2015 16:53:47 +0300
+Subject: net/mlx4_core: Fix wrong index in propagating port change event to VFs
+
+From: Jack Morgenstein <jackm@dev.mellanox.co.il>
+
+[ Upstream commit 1c1bf34951e8d17941bf708d1901c47e81b15d55 ]
+
+The port-change event processing in procedure mlx4_eq_int() uses "slave"
+as the vf_oper array index. Since the value of "slave" is the PF function
+index, the result is that the PF link state is used for deciding to
+propagate the event for all the VFs. The VF link state should be used,
+so the VF function index should be used here.
+
+Fixes: 948e306d7d64 ('net/mlx4: Add VF link state support')
+Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Signed-off-by: Matan Barak <matanb@mellanox.com>
+Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/eq.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
++++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
+@@ -573,7 +573,7 @@ static int mlx4_eq_int(struct mlx4_dev *
+                                                       continue;
+                                               mlx4_dbg(dev, "%s: Sending MLX4_PORT_CHANGE_SUBTYPE_DOWN to slave: %d, port:%d\n",
+                                                        __func__, i, port);
+-                                              s_info = &priv->mfunc.master.vf_oper[slave].vport[port].state;
++                                              s_info = &priv->mfunc.master.vf_oper[i].vport[port].state;
+                                               if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state) {
+                                                       eqe->event.port_change.port =
+                                                               cpu_to_be32(
+@@ -608,7 +608,7 @@ static int mlx4_eq_int(struct mlx4_dev *
+                                                       continue;
+                                               if (i == mlx4_master_func_num(dev))
+                                                       continue;
+-                                              s_info = &priv->mfunc.master.vf_oper[slave].vport[port].state;
++                                              s_info = &priv->mfunc.master.vf_oper[i].vport[port].state;
+                                               if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state) {
+                                                       eqe->event.port_change.port =
+                                                               cpu_to_be32(
diff --git a/queue-4.1/net-pktgen-fix-race-between-pktgen_thread_worker-and-kthread_stop.patch b/queue-4.1/net-pktgen-fix-race-between-pktgen_thread_worker-and-kthread_stop.patch

new file mode 100644 (file)

index 0000000..1d2ea39
--- /dev/null
+++ b/queue-4.1/net-pktgen-fix-race-between-pktgen_thread_worker-and-kthread_stop.patch
@@ -0,0 +1,35 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Oleg Nesterov <oleg@redhat.com>
+Date: Wed, 8 Jul 2015 21:42:11 +0200
+Subject: net: pktgen: fix race between pktgen_thread_worker() and kthread_stop()
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+[ Upstream commit fecdf8be2d91e04b0a9a4f79ff06499a36f5d14f ]
+
+pktgen_thread_worker() is obviously racy, kthread_stop() can come
+between the kthread_should_stop() check and set_current_state().
+
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Reported-by: Jan Stancek <jstancek@redhat.com>
+Reported-by: Marcelo Leitner <mleitner@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/pktgen.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/core/pktgen.c
++++ b/net/core/pktgen.c
+@@ -3490,8 +3490,10 @@ static int pktgen_thread_worker(void *ar
+       pktgen_rem_thread(t);
+ 
+       /* Wait for kthread_stop */
+-      while (!kthread_should_stop()) {
++      for (;;) {
+               set_current_state(TASK_INTERRUPTIBLE);
++              if (kthread_should_stop())
++                      break;
+               schedule();
+       }
+       __set_current_state(TASK_RUNNING);
diff --git a/queue-4.1/net-sched-fix-refcount-imbalance-in-actions.patch b/queue-4.1/net-sched-fix-refcount-imbalance-in-actions.patch

new file mode 100644 (file)

index 0000000..1d65a4b
--- /dev/null
+++ b/queue-4.1/net-sched-fix-refcount-imbalance-in-actions.patch
@@ -0,0 +1,162 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Wed, 29 Jul 2015 23:35:25 +0200
+Subject: net: sched: fix refcount imbalance in actions
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+[ Upstream commit 28e6b67f0b292f557468c139085303b15f1a678f ]
+
+Since commit 55334a5db5cd ("net_sched: act: refuse to remove bound action
+outside"), we end up with a wrong reference count for a tc action.
+
+Test case 1:
+
+  FOO="1,6 0 0 4294967295,"
+  BAR="1,6 0 0 4294967294,"
+  tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 \
+     action bpf bytecode "$FOO"
+  tc actions show action bpf
+    action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
+    index 1 ref 1 bind 1
+  tc actions replace action bpf bytecode "$BAR" index 1
+  tc actions show action bpf
+    action order 0: bpf bytecode '1,6 0 0 4294967294' default-action pipe
+    index 1 ref 2 bind 1
+  tc actions replace action bpf bytecode "$FOO" index 1
+  tc actions show action bpf
+    action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
+    index 1 ref 3 bind 1
+
+Test case 2:
+
+  FOO="1,6 0 0 4294967295,"
+  tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 action ok
+  tc actions show action gact
+    action order 0: gact action pass
+    random type none pass val 0
+     index 1 ref 1 bind 1
+  tc actions add action drop index 1
+    RTNETLINK answers: File exists [...]
+  tc actions show action gact
+    action order 0: gact action pass
+     random type none pass val 0
+     index 1 ref 2 bind 1
+  tc actions add action drop index 1
+    RTNETLINK answers: File exists [...]
+  tc actions show action gact
+    action order 0: gact action pass
+     random type none pass val 0
+     index 1 ref 3 bind 1
+
+What happens is that in tcf_hash_check(), we check tcf_common for a given
+index and increase tcfc_refcnt and conditionally tcfc_bindcnt when we've
+found an existing action. Now there are the following cases:
+
+  1) We do a late binding of an action. In that case, we leave the
+     tcfc_refcnt/tcfc_bindcnt increased and are done with the ->init()
+     handler. This is correctly handeled.
+
+  2) We replace the given action, or we try to add one without replacing
+     and find out that the action at a specific index already exists
+     (thus, we go out with error in that case).
+
+In case of 2), we have to undo the reference count increase from
+tcf_hash_check() in the tcf_hash_check() function. Currently, we fail to
+do so because of the 'tcfc_bindcnt > 0' check which bails out early with
+an -EPERM error.
+
+Now, while commit 55334a5db5cd prevents 'tc actions del action ...' on an
+already classifier-bound action to drop the reference count (which could
+then become negative, wrap around etc), this restriction only accounts for
+invocations outside a specific action's ->init() handler.
+
+One possible solution would be to add a flag thus we possibly trigger
+the -EPERM ony in situations where it is indeed relevant.
+
+After the patch, above test cases have correct reference count again.
+
+Fixes: 55334a5db5cd ("net_sched: act: refuse to remove bound action outside")
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: Cong Wang <cwang@twopensource.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/act_api.h |    8 +++++++-
+ net/sched/act_api.c   |   11 ++++++-----
+ 2 files changed, 13 insertions(+), 6 deletions(-)
+
+--- a/include/net/act_api.h
++++ b/include/net/act_api.h
+@@ -99,7 +99,6 @@ struct tc_action_ops {
+ 
+ int tcf_hash_search(struct tc_action *a, u32 index);
+ void tcf_hash_destroy(struct tc_action *a);
+-int tcf_hash_release(struct tc_action *a, int bind);
+ u32 tcf_hash_new_index(struct tcf_hashinfo *hinfo);
+ int tcf_hash_check(u32 index, struct tc_action *a, int bind);
+ int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a,
+@@ -107,6 +106,13 @@ int tcf_hash_create(u32 index, struct nl
+ void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est);
+ void tcf_hash_insert(struct tc_action *a);
+ 
++int __tcf_hash_release(struct tc_action *a, bool bind, bool strict);
++
++static inline int tcf_hash_release(struct tc_action *a, bool bind)
++{
++      return __tcf_hash_release(a, bind, false);
++}
++
+ int tcf_register_action(struct tc_action_ops *a, unsigned int mask);
+ int tcf_unregister_action(struct tc_action_ops *a);
+ int tcf_action_destroy(struct list_head *actions, int bind);
+--- a/net/sched/act_api.c
++++ b/net/sched/act_api.c
+@@ -45,7 +45,7 @@ void tcf_hash_destroy(struct tc_action *
+ }
+ EXPORT_SYMBOL(tcf_hash_destroy);
+ 
+-int tcf_hash_release(struct tc_action *a, int bind)
++int __tcf_hash_release(struct tc_action *a, bool bind, bool strict)
+ {
+       struct tcf_common *p = a->priv;
+       int ret = 0;
+@@ -53,7 +53,7 @@ int tcf_hash_release(struct tc_action *a
+       if (p) {
+               if (bind)
+                       p->tcfc_bindcnt--;
+-              else if (p->tcfc_bindcnt > 0)
++              else if (strict && p->tcfc_bindcnt > 0)
+                       return -EPERM;
+ 
+               p->tcfc_refcnt--;
+@@ -64,9 +64,10 @@ int tcf_hash_release(struct tc_action *a
+                       ret = 1;
+               }
+       }
++
+       return ret;
+ }
+-EXPORT_SYMBOL(tcf_hash_release);
++EXPORT_SYMBOL(__tcf_hash_release);
+ 
+ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
+                          struct tc_action *a)
+@@ -136,7 +137,7 @@ static int tcf_del_walker(struct sk_buff
+               head = &hinfo->htab[tcf_hash(i, hinfo->hmask)];
+               hlist_for_each_entry_safe(p, n, head, tcfc_head) {
+                       a->priv = p;
+-                      ret = tcf_hash_release(a, 0);
++                      ret = __tcf_hash_release(a, false, true);
+                       if (ret == ACT_P_DELETED) {
+                               module_put(a->ops->owner);
+                               n_i++;
+@@ -413,7 +414,7 @@ int tcf_action_destroy(struct list_head
+       int ret = 0;
+ 
+       list_for_each_entry_safe(a, tmp, actions, list) {
+-              ret = tcf_hash_release(a, bind);
++              ret = __tcf_hash_release(a, bind, true);
+               if (ret == ACT_P_DELETED)
+                       module_put(a->ops->owner);
+               else if (ret < 0)
diff --git a/queue-4.1/net-tipc-initialize-security-state-for-new-connection-socket.patch b/queue-4.1/net-tipc-initialize-security-state-for-new-connection-socket.patch

new file mode 100644 (file)

index 0000000..3f0e4c4
--- /dev/null
+++ b/queue-4.1/net-tipc-initialize-security-state-for-new-connection-socket.patch
@@ -0,0 +1,42 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Stephen Smalley <sds@tycho.nsa.gov>
+Date: Tue, 7 Jul 2015 09:43:45 -0400
+Subject: net/tipc: initialize security state for new connection socket
+
+From: Stephen Smalley <sds@tycho.nsa.gov>
+
+[ Upstream commit fdd75ea8df370f206a8163786e7470c1277a5064 ]
+
+Calling connect() with an AF_TIPC socket would trigger a series
+of error messages from SELinux along the lines of:
+SELinux: Invalid class 0
+type=AVC msg=audit(1434126658.487:34500): avc:  denied  { <unprintable> }
+  for pid=292 comm="kworker/u16:5" scontext=system_u:system_r:kernel_t:s0
+  tcontext=system_u:object_r:unlabeled_t:s0 tclass=<unprintable>
+  permissive=0
+
+This was due to a failure to initialize the security state of the new
+connection sock by the tipc code, leaving it with junk in the security
+class field and an unlabeled secid.  Add a call to security_sk_clone()
+to inherit the security state from the parent socket.
+
+Reported-by: Tim Shearer <tim.shearer@overturenetworks.com>
+Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
+Acked-by: Paul Moore <paul@paul-moore.com>
+Acked-by: Ying Xue <ying.xue@windriver.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/tipc/socket.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/tipc/socket.c
++++ b/net/tipc/socket.c
+@@ -2009,6 +2009,7 @@ static int tipc_accept(struct socket *so
+       res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1);
+       if (res)
+               goto exit;
++      security_sk_clone(sock->sk, new_sock->sk);
+ 
+       new_sk = new_sock->sk;
+       new_tsock = tipc_sk(new_sk);
diff --git a/queue-4.1/net-xen-netback-off-by-one-in-bug_on-condition.patch b/queue-4.1/net-xen-netback-off-by-one-in-bug_on-condition.patch

new file mode 100644 (file)

index 0000000..e27b8e6
--- /dev/null
+++ b/queue-4.1/net-xen-netback-off-by-one-in-bug_on-condition.patch
@@ -0,0 +1,39 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Sun, 12 Jul 2015 01:20:55 +0300
+Subject: net/xen-netback: off by one in BUG_ON() condition
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+[ Upstream commit 50c2e4dd6749725338621fff456b26d3a592259f ]
+
+The > should be >=.  I also added spaces around the '-' operations so
+the code is a little more consistent and matches the condition better.
+
+Fixes: f53c3fe8dad7 ('xen-netback: Introduce TX grant mapping')
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/xen-netback/netback.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/xen-netback/netback.c
++++ b/drivers/net/xen-netback/netback.c
+@@ -1571,13 +1571,13 @@ static inline void xenvif_tx_dealloc_act
+               smp_rmb();
+ 
+               while (dc != dp) {
+-                      BUG_ON(gop - queue->tx_unmap_ops > MAX_PENDING_REQS);
++                      BUG_ON(gop - queue->tx_unmap_ops >= MAX_PENDING_REQS);
+                       pending_idx =
+                               queue->dealloc_ring[pending_index(dc++)];
+ 
+-                      pending_idx_release[gop-queue->tx_unmap_ops] =
++                      pending_idx_release[gop - queue->tx_unmap_ops] =
+                               pending_idx;
+-                      queue->pages_to_unmap[gop-queue->tx_unmap_ops] =
++                      queue->pages_to_unmap[gop - queue->tx_unmap_ops] =
+                               queue->mmap_pages[pending_idx];
+                       gnttab_set_unmap_op(gop,
+                                           idx_to_kaddr(queue, pending_idx),
diff --git a/queue-4.1/netlink-don-t-hold-mutex-in-rcu-callback-when-releasing-mmapd-ring.patch b/queue-4.1/netlink-don-t-hold-mutex-in-rcu-callback-when-releasing-mmapd-ring.patch

new file mode 100644 (file)

index 0000000..c749ad2
--- /dev/null
+++ b/queue-4.1/netlink-don-t-hold-mutex-in-rcu-callback-when-releasing-mmapd-ring.patch
@@ -0,0 +1,210 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Florian Westphal <fw@strlen.de>
+Date: Tue, 21 Jul 2015 16:33:50 +0200
+Subject: netlink: don't hold mutex in rcu callback when releasing mmapd ring
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 0470eb99b4721586ccac954faac3fa4472da0845 ]
+
+Kirill A. Shutemov says:
+
+This simple test-case trigers few locking asserts in kernel:
+
+int main(int argc, char **argv)
+{
+        unsigned int block_size = 16 * 4096;
+        struct nl_mmap_req req = {
+                .nm_block_size          = block_size,
+                .nm_block_nr            = 64,
+                .nm_frame_size          = 16384,
+                .nm_frame_nr            = 64 * block_size / 16384,
+        };
+        unsigned int ring_size;
+       int fd;
+
+       fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+        if (setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, &req, sizeof(req)) < 0)
+                exit(1);
+        if (setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, &req, sizeof(req)) < 0)
+                exit(1);
+
+       ring_size = req.nm_block_nr * req.nm_block_size;
+       mmap(NULL, 2 * ring_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+       return 0;
+}
+
++++ exited with 0 +++
+BUG: sleeping function called from invalid context at /home/kas/git/public/linux-mm/kernel/locking/mutex.c:616
+in_atomic(): 1, irqs_disabled(): 0, pid: 1, name: init
+3 locks held by init/1:
+ #0:  (reboot_mutex){+.+...}, at: [<ffffffff81080959>] SyS_reboot+0xa9/0x220
+ #1:  ((reboot_notifier_list).rwsem){.+.+..}, at: [<ffffffff8107f379>] __blocking_notifier_call_chain+0x39/0x70
+ #2:  (rcu_callback){......}, at: [<ffffffff810d32e0>] rcu_do_batch.isra.49+0x160/0x10c0
+Preemption disabled at:[<ffffffff8145365f>] __delay+0xf/0x20
+
+CPU: 1 PID: 1 Comm: init Not tainted 4.1.0-00009-gbddf4c4818e0 #253
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS Debian-1.8.2-1 04/01/2014
+ ffff88017b3d8000 ffff88027bc03c38 ffffffff81929ceb 0000000000000102
+ 0000000000000000 ffff88027bc03c68 ffffffff81085a9d 0000000000000002
+ ffffffff81ca2a20 0000000000000268 0000000000000000 ffff88027bc03c98
+Call Trace:
+ <IRQ>  [<ffffffff81929ceb>] dump_stack+0x4f/0x7b
+ [<ffffffff81085a9d>] ___might_sleep+0x16d/0x270
+ [<ffffffff81085bed>] __might_sleep+0x4d/0x90
+ [<ffffffff8192e96f>] mutex_lock_nested+0x2f/0x430
+ [<ffffffff81932fed>] ? _raw_spin_unlock_irqrestore+0x5d/0x80
+ [<ffffffff81464143>] ? __this_cpu_preempt_check+0x13/0x20
+ [<ffffffff8182fc3d>] netlink_set_ring+0x1ed/0x350
+ [<ffffffff8182e000>] ? netlink_undo_bind+0x70/0x70
+ [<ffffffff8182fe20>] netlink_sock_destruct+0x80/0x150
+ [<ffffffff817e484d>] __sk_free+0x1d/0x160
+ [<ffffffff817e49a9>] sk_free+0x19/0x20
+[..]
+
+Cong Wang says:
+
+We can't hold mutex lock in a rcu callback, [..]
+
+Thomas Graf says:
+
+The socket should be dead at this point. It might be simpler to
+add a netlink_release_ring() function which doesn't require
+locking at all.
+
+Reported-by: "Kirill A. Shutemov" <kirill@shutemov.name>
+Diagnosed-by: Cong Wang <cwang@twopensource.com>
+Suggested-by: Thomas Graf <tgraf@suug.ch>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netlink/af_netlink.c |   79 +++++++++++++++++++++++++++--------------------
+ 1 file changed, 47 insertions(+), 32 deletions(-)
+
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -355,25 +355,52 @@ err1:
+       return NULL;
+ }
+ 
++
++static void
++__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec,
++                 unsigned int order)
++{
++      struct netlink_sock *nlk = nlk_sk(sk);
++      struct sk_buff_head *queue;
++      struct netlink_ring *ring;
++
++      queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
++      ring  = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
++
++      spin_lock_bh(&queue->lock);
++
++      ring->frame_max         = req->nm_frame_nr - 1;
++      ring->head              = 0;
++      ring->frame_size        = req->nm_frame_size;
++      ring->pg_vec_pages      = req->nm_block_size / PAGE_SIZE;
++
++      swap(ring->pg_vec_len, req->nm_block_nr);
++      swap(ring->pg_vec_order, order);
++      swap(ring->pg_vec, pg_vec);
++
++      __skb_queue_purge(queue);
++      spin_unlock_bh(&queue->lock);
++
++      WARN_ON(atomic_read(&nlk->mapped));
++
++      if (pg_vec)
++              free_pg_vec(pg_vec, order, req->nm_block_nr);
++}
++
+ static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
+-                          bool closing, bool tx_ring)
++                          bool tx_ring)
+ {
+       struct netlink_sock *nlk = nlk_sk(sk);
+       struct netlink_ring *ring;
+-      struct sk_buff_head *queue;
+       void **pg_vec = NULL;
+       unsigned int order = 0;
+-      int err;
+ 
+       ring  = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
+-      queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
+ 
+-      if (!closing) {
+-              if (atomic_read(&nlk->mapped))
+-                      return -EBUSY;
+-              if (atomic_read(&ring->pending))
+-                      return -EBUSY;
+-      }
++      if (atomic_read(&nlk->mapped))
++              return -EBUSY;
++      if (atomic_read(&ring->pending))
++              return -EBUSY;
+ 
+       if (req->nm_block_nr) {
+               if (ring->pg_vec != NULL)
+@@ -405,31 +432,19 @@ static int netlink_set_ring(struct sock
+                       return -EINVAL;
+       }
+ 
+-      err = -EBUSY;
+       mutex_lock(&nlk->pg_vec_lock);
+-      if (closing || atomic_read(&nlk->mapped) == 0) {
+-              err = 0;
+-              spin_lock_bh(&queue->lock);
+-
+-              ring->frame_max         = req->nm_frame_nr - 1;
+-              ring->head              = 0;
+-              ring->frame_size        = req->nm_frame_size;
+-              ring->pg_vec_pages      = req->nm_block_size / PAGE_SIZE;
+-
+-              swap(ring->pg_vec_len, req->nm_block_nr);
+-              swap(ring->pg_vec_order, order);
+-              swap(ring->pg_vec, pg_vec);
+-
+-              __skb_queue_purge(queue);
+-              spin_unlock_bh(&queue->lock);
+-
+-              WARN_ON(atomic_read(&nlk->mapped));
++      if (atomic_read(&nlk->mapped) == 0) {
++              __netlink_set_ring(sk, req, tx_ring, pg_vec, order);
++              mutex_unlock(&nlk->pg_vec_lock);
++              return 0;
+       }
++
+       mutex_unlock(&nlk->pg_vec_lock);
+ 
+       if (pg_vec)
+               free_pg_vec(pg_vec, order, req->nm_block_nr);
+-      return err;
++
++      return -EBUSY;
+ }
+ 
+ static void netlink_mm_open(struct vm_area_struct *vma)
+@@ -898,10 +913,10 @@ static void netlink_sock_destruct(struct
+ 
+               memset(&req, 0, sizeof(req));
+               if (nlk->rx_ring.pg_vec)
+-                      netlink_set_ring(sk, &req, true, false);
++                      __netlink_set_ring(sk, &req, false, NULL, 0);
+               memset(&req, 0, sizeof(req));
+               if (nlk->tx_ring.pg_vec)
+-                      netlink_set_ring(sk, &req, true, true);
++                      __netlink_set_ring(sk, &req, true, NULL, 0);
+       }
+ #endif /* CONFIG_NETLINK_MMAP */
+ 
+@@ -2197,7 +2212,7 @@ static int netlink_setsockopt(struct soc
+                       return -EINVAL;
+               if (copy_from_user(&req, optval, sizeof(req)))
+                       return -EFAULT;
+-              err = netlink_set_ring(sk, &req, false,
++              err = netlink_set_ring(sk, &req,
+                                      optname == NETLINK_TX_RING);
+               break;
+       }
diff --git a/queue-4.1/netlink-make-sure-ebusy-won-t-escape-from-netlink_insert.patch b/queue-4.1/netlink-make-sure-ebusy-won-t-escape-from-netlink_insert.patch

new file mode 100644 (file)

index 0000000..9bd5b36
--- /dev/null
+++ b/queue-4.1/netlink-make-sure-ebusy-won-t-escape-from-netlink_insert.patch
@@ -0,0 +1,89 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Fri, 7 Aug 2015 00:26:41 +0200
+Subject: netlink: make sure -EBUSY won't escape from netlink_insert
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+[ Upstream commit 4e7c1330689e27556de407d3fdadc65ffff5eb12 ]
+
+Linus reports the following deadlock on rtnl_mutex; triggered only
+once so far (extract):
+
+[12236.694209] NetworkManager  D 0000000000013b80     0  1047      1 0x00000000
+[12236.694218]  ffff88003f902640 0000000000000000 ffffffff815d15a9 0000000000000018
+[12236.694224]  ffff880119538000 ffff88003f902640 ffffffff81a8ff84 00000000ffffffff
+[12236.694230]  ffffffff81a8ff88 ffff880119c47f00 ffffffff815d133a ffffffff81a8ff80
+[12236.694235] Call Trace:
+[12236.694250]  [<ffffffff815d15a9>] ? schedule_preempt_disabled+0x9/0x10
+[12236.694257]  [<ffffffff815d133a>] ? schedule+0x2a/0x70
+[12236.694263]  [<ffffffff815d15a9>] ? schedule_preempt_disabled+0x9/0x10
+[12236.694271]  [<ffffffff815d2c3f>] ? __mutex_lock_slowpath+0x7f/0xf0
+[12236.694280]  [<ffffffff815d2cc6>] ? mutex_lock+0x16/0x30
+[12236.694291]  [<ffffffff814f1f90>] ? rtnetlink_rcv+0x10/0x30
+[12236.694299]  [<ffffffff8150ce3b>] ? netlink_unicast+0xfb/0x180
+[12236.694309]  [<ffffffff814f5ad3>] ? rtnl_getlink+0x113/0x190
+[12236.694319]  [<ffffffff814f202a>] ? rtnetlink_rcv_msg+0x7a/0x210
+[12236.694331]  [<ffffffff8124565c>] ? sock_has_perm+0x5c/0x70
+[12236.694339]  [<ffffffff814f1fb0>] ? rtnetlink_rcv+0x30/0x30
+[12236.694346]  [<ffffffff8150d62c>] ? netlink_rcv_skb+0x9c/0xc0
+[12236.694354]  [<ffffffff814f1f9f>] ? rtnetlink_rcv+0x1f/0x30
+[12236.694360]  [<ffffffff8150ce3b>] ? netlink_unicast+0xfb/0x180
+[12236.694367]  [<ffffffff8150d344>] ? netlink_sendmsg+0x484/0x5d0
+[12236.694376]  [<ffffffff810a236f>] ? __wake_up+0x2f/0x50
+[12236.694387]  [<ffffffff814cad23>] ? sock_sendmsg+0x33/0x40
+[12236.694396]  [<ffffffff814cb05e>] ? ___sys_sendmsg+0x22e/0x240
+[12236.694405]  [<ffffffff814cab75>] ? ___sys_recvmsg+0x135/0x1a0
+[12236.694415]  [<ffffffff811a9d12>] ? eventfd_write+0x82/0x210
+[12236.694423]  [<ffffffff811a0f9e>] ? fsnotify+0x32e/0x4c0
+[12236.694429]  [<ffffffff8108cb70>] ? wake_up_q+0x60/0x60
+[12236.694434]  [<ffffffff814cba09>] ? __sys_sendmsg+0x39/0x70
+[12236.694440]  [<ffffffff815d4797>] ? entry_SYSCALL_64_fastpath+0x12/0x6a
+
+It seems so far plausible that the recursive call into rtnetlink_rcv()
+looks suspicious. One way, where this could trigger is that the senders
+NETLINK_CB(skb).portid was wrongly 0 (which is rtnetlink socket), so
+the rtnl_getlink() request's answer would be sent to the kernel instead
+to the actual user process, thus grabbing rtnl_mutex() twice.
+
+One theory would be that netlink_autobind() triggered via netlink_sendmsg()
+internally overwrites the -EBUSY error to 0, but where it is wrongly
+originating from __netlink_insert() instead. That would reset the
+socket's portid to 0, which is then filled into NETLINK_CB(skb).portid
+later on. As commit d470e3b483dc ("[NETLINK]: Fix two socket hashing bugs.")
+also puts it, -EBUSY should not be propagated from netlink_insert().
+
+It looks like it's very unlikely to reproduce. We need to trigger the
+rhashtable_insert_rehash() handler under a situation where rehashing
+currently occurs (one /rare/ way would be to hit ht->elasticity limits
+while not filled enough to expand the hashtable, but that would rather
+require a specifically crafted bind() sequence with knowledge about
+destination slots, seems unlikely). It probably makes sense to guard
+__netlink_insert() in any case and remap that error. It was suggested
+that EOVERFLOW might be better than an already overloaded ENOMEM.
+
+Reference: http://thread.gmane.org/gmane.linux.network/372676
+Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
+Acked-by: Thomas Graf <tgraf@suug.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netlink/af_netlink.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -1094,6 +1094,11 @@ static int netlink_insert(struct sock *s
+ 
+       err = __netlink_insert(table, sk);
+       if (err) {
++              /* In case the hashtable backend returns with -EBUSY
++               * from here, it must not escape to the caller.
++               */
++              if (unlikely(err == -EBUSY))
++                      err = -EOVERFLOW;
+               if (err == -EEXIST)
+                       err = -EADDRINUSE;
+               nlk_sk(sk)->portid = 0;
diff --git a/queue-4.1/packet-missing-dev_put-in-packet_do_bind.patch b/queue-4.1/packet-missing-dev_put-in-packet_do_bind.patch

new file mode 100644 (file)

index 0000000..dbbb637
--- /dev/null
+++ b/queue-4.1/packet-missing-dev_put-in-packet_do_bind.patch
@@ -0,0 +1,59 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Lars Westerhoff <lars.westerhoff@newtec.eu>
+Date: Tue, 28 Jul 2015 01:32:21 +0300
+Subject: packet: missing dev_put() in packet_do_bind()
+
+From: Lars Westerhoff <lars.westerhoff@newtec.eu>
+
+[ Upstream commit 158cd4af8dedbda0d612d448c724c715d0dda649 ]
+
+When binding a PF_PACKET socket, the use count of the bound interface is
+always increased with dev_hold in dev_get_by_{index,name}.  However,
+when rebound with the same protocol and device as in the previous bind
+the use count of the interface was not decreased.  Ultimately, this
+caused the deletion of the interface to fail with the following message:
+
+unregister_netdevice: waiting for dummy0 to become free. Usage count = 1
+
+This patch moves the dev_put out of the conditional part that was only
+executed when either the protocol or device changed on a bind.
+
+Fixes: 902fefb82ef7 ('packet: improve socket create/bind latency in some cases')
+Signed-off-by: Lars Westerhoff <lars.westerhoff@newtec.eu>
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Reviewed-by: Daniel Borkmann <dborkman@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c |    8 +++-----
+ 1 file changed, 3 insertions(+), 5 deletions(-)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -2688,7 +2688,7 @@ static int packet_release(struct socket
+ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto)
+ {
+       struct packet_sock *po = pkt_sk(sk);
+-      const struct net_device *dev_curr;
++      struct net_device *dev_curr;
+       __be16 proto_curr;
+       bool need_rehook;
+ 
+@@ -2712,15 +2712,13 @@ static int packet_do_bind(struct sock *s
+ 
+               po->num = proto;
+               po->prot_hook.type = proto;
+-
+-              if (po->prot_hook.dev)
+-                      dev_put(po->prot_hook.dev);
+-
+               po->prot_hook.dev = dev;
+ 
+               po->ifindex = dev ? dev->ifindex : 0;
+               packet_cached_dev_assign(po, dev);
+       }
++      if (dev_curr)
++              dev_put(dev_curr);
+ 
+       if (proto == 0 || !need_rehook)
+               goto out_unlock;
diff --git a/queue-4.1/packet-tpacket_snd-fix-signed-unsigned-comparison.patch b/queue-4.1/packet-tpacket_snd-fix-signed-unsigned-comparison.patch

new file mode 100644 (file)

index 0000000..192b9bb
--- /dev/null
+++ b/queue-4.1/packet-tpacket_snd-fix-signed-unsigned-comparison.patch
@@ -0,0 +1,41 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Alexander Drozdov <al.drozdov@gmail.com>
+Date: Tue, 28 Jul 2015 13:57:01 +0300
+Subject: packet: tpacket_snd(): fix signed/unsigned comparison
+
+From: Alexander Drozdov <al.drozdov@gmail.com>
+
+[ Upstream commit dbd46ab412b8fb395f2b0ff6f6a7eec9df311550 ]
+
+tpacket_fill_skb() can return a negative value (-errno) which
+is stored in tp_len variable. In that case the following
+condition will be (but shouldn't be) true:
+
+tp_len > dev->mtu + dev->hard_header_len
+
+as dev->mtu and dev->hard_header_len are both unsigned.
+
+That may lead to just returning an incorrect EMSGSIZE errno
+to the user.
+
+Fixes: 52f1454f629fa ("packet: allow to transmit +4 byte in TX_RING slot for VLAN case")
+Signed-off-by: Alexander Drozdov <al.drozdov@gmail.com>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -2307,7 +2307,8 @@ static int tpacket_snd(struct packet_soc
+               }
+               tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
+                                         addr, hlen);
+-              if (tp_len > dev->mtu + dev->hard_header_len) {
++              if (likely(tp_len >= 0) &&
++                  tp_len > dev->mtu + dev->hard_header_len) {
+                       struct ethhdr *ehdr;
+                       /* Earlier code assumed this would be a VLAN pkt,
+                        * double-check this now that we have the actual
diff --git a/queue-4.1/rds-fix-an-integer-overflow-test-in-rds_info_getsockopt.patch b/queue-4.1/rds-fix-an-integer-overflow-test-in-rds_info_getsockopt.patch

new file mode 100644 (file)

index 0000000..db0dd34
--- /dev/null
+++ b/queue-4.1/rds-fix-an-integer-overflow-test-in-rds_info_getsockopt.patch
@@ -0,0 +1,36 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Sat, 1 Aug 2015 15:33:26 +0300
+Subject: rds: fix an integer overflow test in rds_info_getsockopt()
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+[ Upstream commit 468b732b6f76b138c0926eadf38ac88467dcd271 ]
+
+"len" is a signed integer.  We check that len is not negative, so it
+goes from zero to INT_MAX.  PAGE_SIZE is unsigned long so the comparison
+is type promoted to unsigned long.  ULONG_MAX - 4095 is a higher than
+INT_MAX so the condition can never be true.
+
+I don't know if this is harmful but it seems safe to limit "len" to
+INT_MAX - 4095.
+
+Fixes: a8c879a7ee98 ('RDS: Info and stats')
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/rds/info.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/rds/info.c
++++ b/net/rds/info.c
+@@ -176,7 +176,7 @@ int rds_info_getsockopt(struct socket *s
+ 
+       /* check for all kinds of wrapping and the like */
+       start = (unsigned long)optval;
+-      if (len < 0 || len + PAGE_SIZE - 1 < len || start + len < start) {
++      if (len < 0 || len > INT_MAX - PAGE_SIZE + 1 || start + len < start) {
+               ret = -EINVAL;
+               goto out;
+       }
diff --git a/queue-4.1/revert-dev-set-iflink-to-0-for-virtual-interfaces.patch b/queue-4.1/revert-dev-set-iflink-to-0-for-virtual-interfaces.patch

new file mode 100644 (file)

index 0000000..0986f92
--- /dev/null
+++ b/queue-4.1/revert-dev-set-iflink-to-0-for-virtual-interfaces.patch
@@ -0,0 +1,36 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Date: Mon, 6 Jul 2015 17:25:10 +0200
+Subject: Revert "dev: set iflink to 0 for virtual interfaces"
+
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+
+[ Upstream commit 95ec655bc465ccb2a3329d4aff9a45e3c8188db5 ]
+
+This reverts commit e1622baf54df8cc958bf29d71de5ad545ea7d93c.
+
+The side effect of this commit is to add a '@NONE' after each virtual
+interface name with a 'ip link'. It may break existing scripts.
+
+Reported-by: Olivier Hartkopp <socketcan@hartkopp.net>
+Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Tested-by: Oliver Hartkopp <socketcan@hartkopp.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c |    4 ----
+ 1 file changed, 4 deletions(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -672,10 +672,6 @@ int dev_get_iflink(const struct net_devi
+       if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink)
+               return dev->netdev_ops->ndo_get_iflink(dev);
+ 
+-      /* If dev->rtnl_link_ops is set, it's a virtual interface. */
+-      if (dev->rtnl_link_ops)
+-              return 0;
+-
+       return dev->ifindex;
+ }
+ EXPORT_SYMBOL(dev_get_iflink);
diff --git a/queue-4.1/revert-sit-add-gro-callbacks-to-sit_offload.patch b/queue-4.1/revert-sit-add-gro-callbacks-to-sit_offload.patch

new file mode 100644 (file)

index 0000000..24b7274
--- /dev/null
+++ b/queue-4.1/revert-sit-add-gro-callbacks-to-sit_offload.patch
@@ -0,0 +1,32 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Herbert Xu <herbert@gondor.apana.org.au>
+Date: Mon, 20 Jul 2015 17:55:38 +0800
+Subject: Revert "sit: Add gro callbacks to sit_offload"
+
+From: Herbert Xu <herbert@gondor.apana.org.au>
+
+[ Upstream commit fdbf5b097bbd9693a86c0b8bfdd071a9a2117cfc ]
+
+This patch reverts 19424e052fb44da2f00d1a868cbb51f3e9f4bbb5 ("sit:
+Add gro callbacks to sit_offload") because it generates packets
+that cannot be handled even by our own GSO.
+
+Reported-by: Wolfgang Walter <linux@stwm.de>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_offload.c |    2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/net/ipv6/ip6_offload.c
++++ b/net/ipv6/ip6_offload.c
+@@ -292,8 +292,6 @@ static struct packet_offload ipv6_packet
+ static const struct net_offload sit_offload = {
+       .callbacks = {
+               .gso_segment    = ipv6_gso_segment,
+-              .gro_receive    = ipv6_gro_receive,
+-              .gro_complete   = ipv6_gro_complete,
+       },
+ };
+ 
diff --git a/queue-4.1/rhashtable-fix-for-resize-events-during-table-walk.patch b/queue-4.1/rhashtable-fix-for-resize-events-during-table-walk.patch

new file mode 100644 (file)

index 0000000..aa75bea
--- /dev/null
+++ b/queue-4.1/rhashtable-fix-for-resize-events-during-table-walk.patch
@@ -0,0 +1,50 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Phil Sutter <phil@nwl.cc>
+Date: Mon, 6 Jul 2015 15:51:20 +0200
+Subject: rhashtable: fix for resize events during table walk
+
+From: Phil Sutter <phil@nwl.cc>
+
+[ Upstream commit 142b942a75cb10ede1b42bf85368d41449ab4e3b ]
+
+If rhashtable_walk_next detects a resize operation in progress, it jumps
+to the new table and continues walking that one. But it misses to drop
+the reference to it's current item, leading it to continue traversing
+the new table's bucket in which the current item is sorted into, and
+after reaching that bucket's end continues traversing the new table's
+second bucket instead of the first one, thereby potentially missing
+items.
+
+This fixes the rhashtable runtime test for me. Bug probably introduced
+by Herbert Xu's patch eddee5ba ("rhashtable: Fix walker behaviour during
+rehash") although not explicitly tested.
+
+Fixes: eddee5ba ("rhashtable: Fix walker behaviour during rehash")
+Signed-off-by: Phil Sutter <phil@nwl.cc>
+Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ lib/rhashtable.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/lib/rhashtable.c
++++ b/lib/rhashtable.c
+@@ -612,6 +612,8 @@ next:
+               iter->skip = 0;
+       }
+ 
++      iter->p = NULL;
++
+       /* Ensure we see any new tables. */
+       smp_rmb();
+ 
+@@ -622,8 +624,6 @@ next:
+               return ERR_PTR(-EAGAIN);
+       }
+ 
+-      iter->p = NULL;
+-
+ out:
+ 
+       return obj;
diff --git a/queue-4.1/rocker-free-netdevice-during-netdevice-removal.patch b/queue-4.1/rocker-free-netdevice-during-netdevice-removal.patch

new file mode 100644 (file)

index 0000000..a400492
--- /dev/null
+++ b/queue-4.1/rocker-free-netdevice-during-netdevice-removal.patch
@@ -0,0 +1,33 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Ido Schimmel <idosch@mellanox.com>
+Date: Sun, 2 Aug 2015 19:29:16 +0200
+Subject: rocker: free netdevice during netdevice removal
+
+From: Ido Schimmel <idosch@mellanox.com>
+
+[ Upstream commit 1ebd47efa4e17391dfac8caa349c6a8d35f996d1 ]
+
+When removing a port's netdevice in 'rocker_remove_ports', we should
+also free the allocated 'net_device' structure. Do that by calling
+'free_netdev' after unregistering it.
+
+Signed-off-by: Ido Schimmel <idosch@mellanox.com>
+Signed-off-by: Jiri Pirko <jiri@resnulli.us>
+Fixes: 4b8ac9660af ("rocker: introduce rocker switch driver")
+Acked-by: Scott Feldman <sfeldma@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/rocker/rocker.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/ethernet/rocker/rocker.c
++++ b/drivers/net/ethernet/rocker/rocker.c
+@@ -4587,6 +4587,7 @@ static void rocker_remove_ports(struct r
+               rocker_port = rocker->ports[i];
+               rocker_port_ig_tbl(rocker_port, ROCKER_OP_FLAG_REMOVE);
+               unregister_netdev(rocker_port->dev);
++              free_netdev(rocker_port->dev);
+       }
+       kfree(rocker->ports);
+ }
diff --git a/queue-4.1/rtnetlink-verify-ifla_vf_info-attributes-before-passing-them-to-driver.patch b/queue-4.1/rtnetlink-verify-ifla_vf_info-attributes-before-passing-them-to-driver.patch

new file mode 100644 (file)

index 0000000..209bce0
--- /dev/null
+++ b/queue-4.1/rtnetlink-verify-ifla_vf_info-attributes-before-passing-them-to-driver.patch
@@ -0,0 +1,269 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Tue, 7 Jul 2015 00:07:52 +0200
+Subject: rtnetlink: verify IFLA_VF_INFO attributes before passing them to driver
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+[ Upstream commit 4f7d2cdfdde71ffe962399b7020c674050329423 ]
+
+Jason Gunthorpe reported that since commit c02db8c6290b ("rtnetlink: make
+SR-IOV VF interface symmetric"), we don't verify IFLA_VF_INFO attributes
+anymore with respect to their policy, that is, ifla_vfinfo_policy[].
+
+Before, they were part of ifla_policy[], but they have been nested since
+placed under IFLA_VFINFO_LIST, that contains the attribute IFLA_VF_INFO,
+which is another nested attribute for the actual VF attributes such as
+IFLA_VF_MAC, IFLA_VF_VLAN, etc.
+
+Despite the policy being split out from ifla_policy[] in this commit,
+it's never applied anywhere. nla_for_each_nested() only does basic nla_ok()
+testing for struct nlattr, but it doesn't know about the data context and
+their requirements.
+
+Fix, on top of Jason's initial work, does 1) parsing of the attributes
+with the right policy, and 2) using the resulting parsed attribute table
+from 1) instead of the nla_for_each_nested() loop (just like we used to
+do when still part of ifla_policy[]).
+
+Reference: http://thread.gmane.org/gmane.linux.network/368913
+Fixes: c02db8c6290b ("rtnetlink: make SR-IOV VF interface symmetric")
+Reported-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
+Cc: Chris Wright <chrisw@sous-sol.org>
+Cc: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
+Cc: Greg Rose <gregory.v.rose@intel.com>
+Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
+Cc: Rony Efraim <ronye@mellanox.com>
+Cc: Vlad Zolotarov <vladz@cloudius-systems.com>
+Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Cc: Thomas Graf <tgraf@suug.ch>
+Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Vlad Zolotarov <vladz@cloudius-systems.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/rtnetlink.c |  187 ++++++++++++++++++++++++++-------------------------
+ 1 file changed, 96 insertions(+), 91 deletions(-)
+
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -1287,10 +1287,6 @@ static const struct nla_policy ifla_info
+       [IFLA_INFO_SLAVE_DATA]  = { .type = NLA_NESTED },
+ };
+ 
+-static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = {
+-      [IFLA_VF_INFO]          = { .type = NLA_NESTED },
+-};
+-
+ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
+       [IFLA_VF_MAC]           = { .len = sizeof(struct ifla_vf_mac) },
+       [IFLA_VF_VLAN]          = { .len = sizeof(struct ifla_vf_vlan) },
+@@ -1437,96 +1433,98 @@ static int validate_linkmsg(struct net_d
+       return 0;
+ }
+ 
+-static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
++static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
+ {
+-      int rem, err = -EINVAL;
+-      struct nlattr *vf;
+       const struct net_device_ops *ops = dev->netdev_ops;
++      int err = -EINVAL;
+ 
+-      nla_for_each_nested(vf, attr, rem) {
+-              switch (nla_type(vf)) {
+-              case IFLA_VF_MAC: {
+-                      struct ifla_vf_mac *ivm;
+-                      ivm = nla_data(vf);
+-                      err = -EOPNOTSUPP;
+-                      if (ops->ndo_set_vf_mac)
+-                              err = ops->ndo_set_vf_mac(dev, ivm->vf,
+-                                                        ivm->mac);
+-                      break;
+-              }
+-              case IFLA_VF_VLAN: {
+-                      struct ifla_vf_vlan *ivv;
+-                      ivv = nla_data(vf);
+-                      err = -EOPNOTSUPP;
+-                      if (ops->ndo_set_vf_vlan)
+-                              err = ops->ndo_set_vf_vlan(dev, ivv->vf,
+-                                                         ivv->vlan,
+-                                                         ivv->qos);
+-                      break;
+-              }
+-              case IFLA_VF_TX_RATE: {
+-                      struct ifla_vf_tx_rate *ivt;
+-                      struct ifla_vf_info ivf;
+-                      ivt = nla_data(vf);
+-                      err = -EOPNOTSUPP;
+-                      if (ops->ndo_get_vf_config)
+-                              err = ops->ndo_get_vf_config(dev, ivt->vf,
+-                                                           &ivf);
+-                      if (err)
+-                              break;
+-                      err = -EOPNOTSUPP;
+-                      if (ops->ndo_set_vf_rate)
+-                              err = ops->ndo_set_vf_rate(dev, ivt->vf,
+-                                                         ivf.min_tx_rate,
+-                                                         ivt->rate);
+-                      break;
+-              }
+-              case IFLA_VF_RATE: {
+-                      struct ifla_vf_rate *ivt;
+-                      ivt = nla_data(vf);
+-                      err = -EOPNOTSUPP;
+-                      if (ops->ndo_set_vf_rate)
+-                              err = ops->ndo_set_vf_rate(dev, ivt->vf,
+-                                                         ivt->min_tx_rate,
+-                                                         ivt->max_tx_rate);
+-                      break;
+-              }
+-              case IFLA_VF_SPOOFCHK: {
+-                      struct ifla_vf_spoofchk *ivs;
+-                      ivs = nla_data(vf);
+-                      err = -EOPNOTSUPP;
+-                      if (ops->ndo_set_vf_spoofchk)
+-                              err = ops->ndo_set_vf_spoofchk(dev, ivs->vf,
+-                                                             ivs->setting);
+-                      break;
+-              }
+-              case IFLA_VF_LINK_STATE: {
+-                      struct ifla_vf_link_state *ivl;
+-                      ivl = nla_data(vf);
+-                      err = -EOPNOTSUPP;
+-                      if (ops->ndo_set_vf_link_state)
+-                              err = ops->ndo_set_vf_link_state(dev, ivl->vf,
+-                                                               ivl->link_state);
+-                      break;
+-              }
+-              case IFLA_VF_RSS_QUERY_EN: {
+-                      struct ifla_vf_rss_query_en *ivrssq_en;
++      if (tb[IFLA_VF_MAC]) {
++              struct ifla_vf_mac *ivm = nla_data(tb[IFLA_VF_MAC]);
+ 
+-                      ivrssq_en = nla_data(vf);
+-                      err = -EOPNOTSUPP;
+-                      if (ops->ndo_set_vf_rss_query_en)
+-                              err = ops->ndo_set_vf_rss_query_en(dev,
+-                                                          ivrssq_en->vf,
+-                                                          ivrssq_en->setting);
+-                      break;
+-              }
+-              default:
+-                      err = -EINVAL;
+-                      break;
+-              }
+-              if (err)
+-                      break;
++              err = -EOPNOTSUPP;
++              if (ops->ndo_set_vf_mac)
++                      err = ops->ndo_set_vf_mac(dev, ivm->vf,
++                                                ivm->mac);
++              if (err < 0)
++                      return err;
++      }
++
++      if (tb[IFLA_VF_VLAN]) {
++              struct ifla_vf_vlan *ivv = nla_data(tb[IFLA_VF_VLAN]);
++
++              err = -EOPNOTSUPP;
++              if (ops->ndo_set_vf_vlan)
++                      err = ops->ndo_set_vf_vlan(dev, ivv->vf, ivv->vlan,
++                                                 ivv->qos);
++              if (err < 0)
++                      return err;
+       }
++
++      if (tb[IFLA_VF_TX_RATE]) {
++              struct ifla_vf_tx_rate *ivt = nla_data(tb[IFLA_VF_TX_RATE]);
++              struct ifla_vf_info ivf;
++
++              err = -EOPNOTSUPP;
++              if (ops->ndo_get_vf_config)
++                      err = ops->ndo_get_vf_config(dev, ivt->vf, &ivf);
++              if (err < 0)
++                      return err;
++
++              err = -EOPNOTSUPP;
++              if (ops->ndo_set_vf_rate)
++                      err = ops->ndo_set_vf_rate(dev, ivt->vf,
++                                                 ivf.min_tx_rate,
++                                                 ivt->rate);
++              if (err < 0)
++                      return err;
++      }
++
++      if (tb[IFLA_VF_RATE]) {
++              struct ifla_vf_rate *ivt = nla_data(tb[IFLA_VF_RATE]);
++
++              err = -EOPNOTSUPP;
++              if (ops->ndo_set_vf_rate)
++                      err = ops->ndo_set_vf_rate(dev, ivt->vf,
++                                                 ivt->min_tx_rate,
++                                                 ivt->max_tx_rate);
++              if (err < 0)
++                      return err;
++      }
++
++      if (tb[IFLA_VF_SPOOFCHK]) {
++              struct ifla_vf_spoofchk *ivs = nla_data(tb[IFLA_VF_SPOOFCHK]);
++
++              err = -EOPNOTSUPP;
++              if (ops->ndo_set_vf_spoofchk)
++                      err = ops->ndo_set_vf_spoofchk(dev, ivs->vf,
++                                                     ivs->setting);
++              if (err < 0)
++                      return err;
++      }
++
++      if (tb[IFLA_VF_LINK_STATE]) {
++              struct ifla_vf_link_state *ivl = nla_data(tb[IFLA_VF_LINK_STATE]);
++
++              err = -EOPNOTSUPP;
++              if (ops->ndo_set_vf_link_state)
++                      err = ops->ndo_set_vf_link_state(dev, ivl->vf,
++                                                       ivl->link_state);
++              if (err < 0)
++                      return err;
++      }
++
++      if (tb[IFLA_VF_RSS_QUERY_EN]) {
++              struct ifla_vf_rss_query_en *ivrssq_en;
++
++              err = -EOPNOTSUPP;
++              ivrssq_en = nla_data(tb[IFLA_VF_RSS_QUERY_EN]);
++              if (ops->ndo_set_vf_rss_query_en)
++                      err = ops->ndo_set_vf_rss_query_en(dev, ivrssq_en->vf,
++                                                         ivrssq_en->setting);
++              if (err < 0)
++                      return err;
++      }
++
+       return err;
+ }
+ 
+@@ -1722,14 +1720,21 @@ static int do_setlink(const struct sk_bu
+       }
+ 
+       if (tb[IFLA_VFINFO_LIST]) {
++              struct nlattr *vfinfo[IFLA_VF_MAX + 1];
+               struct nlattr *attr;
+               int rem;
++
+               nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) {
+-                      if (nla_type(attr) != IFLA_VF_INFO) {
++                      if (nla_type(attr) != IFLA_VF_INFO ||
++                          nla_len(attr) < NLA_HDRLEN) {
+                               err = -EINVAL;
+                               goto errout;
+                       }
+-                      err = do_setvfinfo(dev, attr);
++                      err = nla_parse_nested(vfinfo, IFLA_VF_MAX, attr,
++                                             ifla_vf_policy);
++                      if (err < 0)
++                              goto errout;
++                      err = do_setvfinfo(dev, vfinfo);
+                       if (err < 0)
+                               goto errout;
+                       status |= DO_SETLINK_NOTIFY;
diff --git a/queue-4.1/sched-cls_bpf-fix-panic-on-filter-replace.patch b/queue-4.1/sched-cls_bpf-fix-panic-on-filter-replace.patch

new file mode 100644 (file)

index 0000000..e910dd3
--- /dev/null
+++ b/queue-4.1/sched-cls_bpf-fix-panic-on-filter-replace.patch
@@ -0,0 +1,41 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Fri, 17 Jul 2015 22:38:43 +0200
+Subject: sched: cls_bpf: fix panic on filter replace
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+[ Upstream commit f6bfc46da6292b630ba389592123f0dd02066172 ]
+
+The following test case causes a NULL pointer dereference in cls_bpf:
+
+  FOO="1,6 0 0 4294967295,"
+  tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 action ok
+  tc filter replace dev foo parent 1: pref 49152 handle 0x1 \
+            bpf bytecode "$FOO" flowid 1:1 action drop
+
+The problem is that commit 1f947bf151e9 ("net: sched: rcu'ify cls_bpf")
+accidentally swapped the arguments of list_replace_rcu(), the old
+element needs to be the first argument and the new element the second.
+
+Fixes: 1f947bf151e9 ("net: sched: rcu'ify cls_bpf")
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: John Fastabend <john.r.fastabend@intel.com>
+Acked-by: Alexei Starovoitov <ast@plumgrid.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/cls_bpf.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/sched/cls_bpf.c
++++ b/net/sched/cls_bpf.c
+@@ -364,7 +364,7 @@ static int cls_bpf_change(struct net *ne
+               goto errout;
+ 
+       if (oldprog) {
+-              list_replace_rcu(&prog->link, &oldprog->link);
++              list_replace_rcu(&oldprog->link, &prog->link);
+               tcf_unbind_filter(tp, &oldprog->res);
+               call_rcu(&oldprog->rcu, __cls_bpf_delete_prog);
+       } else {
diff --git a/queue-4.1/sched-cls_flow-fix-panic-on-filter-replace.patch b/queue-4.1/sched-cls_flow-fix-panic-on-filter-replace.patch

new file mode 100644 (file)

index 0000000..64513a8
--- /dev/null
+++ b/queue-4.1/sched-cls_flow-fix-panic-on-filter-replace.patch
@@ -0,0 +1,58 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Fri, 17 Jul 2015 22:38:45 +0200
+Subject: sched: cls_flow: fix panic on filter replace
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+[ Upstream commit 32b2f4b196b37695fdb42b31afcbc15399d6ef91 ]
+
+The following test case causes a NULL pointer dereference in cls_flow:
+
+  tc filter add dev foo parent 1: handle 0x1 flow hash keys dst action ok
+  tc filter replace dev foo parent 1: pref 49152 handle 0x1 \
+            flow hash keys mark action drop
+
+To be more precise, actually two different panics are fixed, the first
+occurs because tcf_exts_init() is not called on the newly allocated
+filter when we do a replace. And the second panic uncovered after that
+happens since the arguments of list_replace_rcu() are swapped, the old
+element needs to be the first argument and the new element the second.
+
+Fixes: 70da9f0bf999 ("net: sched: cls_flow use RCU")
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: John Fastabend <john.r.fastabend@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/cls_flow.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/net/sched/cls_flow.c
++++ b/net/sched/cls_flow.c
+@@ -419,6 +419,8 @@ static int flow_change(struct net *net,
+       if (!fnew)
+               goto err2;
+ 
++      tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
++
+       fold = (struct flow_filter *)*arg;
+       if (fold) {
+               err = -EINVAL;
+@@ -480,7 +482,6 @@ static int flow_change(struct net *net,
+               fnew->mask  = ~0U;
+               fnew->tp = tp;
+               get_random_bytes(&fnew->hashrnd, 4);
+-              tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
+       }
+ 
+       fnew->perturb_timer.function = flow_perturbation;
+@@ -520,7 +521,7 @@ static int flow_change(struct net *net,
+       if (*arg == 0)
+               list_add_tail_rcu(&fnew->list, &head->filters);
+       else
+-              list_replace_rcu(&fnew->list, &fold->list);
++              list_replace_rcu(&fold->list, &fnew->list);
+ 
+       *arg = (unsigned long)fnew;
+ 
diff --git a/queue-4.1/series b/queue-4.1/series

index 4be458f4f1936f43dc5b4139f7a50c893ae614ca..d96fa42f7394b0fd17cef1ee12342621ae58de43 100644 (file)
--- a/queue-4.1/series
+++ b/queue-4.1/series
@@ -110,3 +110,50 @@ stmmac-troubleshoot-unexpected-bits-in-des0-des1.patch
  net-stmmac-dwmac-rk-fix-clk-rate-when-provided-by-soc.patch
  hfs-hfsplus-cache-pages-correctly-between-bnode_create-and-bnode_free.patch
  lib-decompressors-use-real-out-buf-size-for-gunzip-with-kernel.patch
+jbd2-avoid-infinite-loop-when-destroying-aborted-journal.patch
+ipv6-make-mld-packets-to-only-be-processed-locally.patch
+rhashtable-fix-for-resize-events-during-table-walk.patch
+net-graceful-exit-from-netif_alloc_netdev_queues.patch
+revert-dev-set-iflink-to-0-for-virtual-interfaces.patch
+rtnetlink-verify-ifla_vf_info-attributes-before-passing-them-to-driver.patch
+ip_tunnel-fix-ipv4-pmtu-check-to-honor-inner-ip-header-df.patch
+net-tipc-initialize-security-state-for-new-connection-socket.patch
+bridge-mdb-zero-out-the-local-br_ip-variable-before-use.patch
+net-pktgen-fix-race-between-pktgen_thread_worker-and-kthread_stop.patch
+bridge-fix-potential-crash-in-__netdev_pick_tx.patch
+net-do-not-process-device-backlog-during-unregistration.patch
+net-call-rcu_read_lock-early-in-process_backlog.patch
+net-xen-netback-off-by-one-in-bug_on-condition.patch
+net-clone-skb-before-setting-peeked-flag.patch
+net-fix-skb-csum-races-when-peeking.patch
+net-fix-skb_set_peeked-use-after-free-bug.patch
+bridge-mdb-fix-double-add-notification.patch
+fq_codel-fix-a-use-after-free.patch
+isdn-gigaset-reset-tty-receive_room-when-attaching-ser_gigaset.patch
+ipv6-lock-socket-in-ip6_datagram_connect.patch
+bonding-fix-destruction-of-bond-with-devices-different-from-arphrd_ether.patch
+revert-sit-add-gro-callbacks-to-sit_offload.patch
+bonding-correct-the-mac-address-for-follow-fail_over_mac-policy.patch
+sched-cls_bpf-fix-panic-on-filter-replace.patch
+sched-cls_flow-fix-panic-on-filter-replace.patch
+inet-frags-fix-defragmented-packet-s-ip-header-for-af_packet.patch
+netlink-don-t-hold-mutex-in-rcu-callback-when-releasing-mmapd-ring.patch
+virtio_net-don-t-require-any_layout-with-version_1.patch
+bridge-netlink-fix-slave_changelink-br_setport-race-conditions.patch
+net-mlx4_core-fix-wrong-index-in-propagating-port-change-event-to-vfs.patch
+fib_trie-drop-unnecessary-calls-to-leaf_pull_suffix.patch
+packet-missing-dev_put-in-packet_do_bind.patch
+packet-tpacket_snd-fix-signed-unsigned-comparison.patch
+act_bpf-fix-memory-leaks-when-replacing-bpf-programs.patch
+net-sched-fix-refcount-imbalance-in-actions.patch
+rocker-free-netdevice-during-netdevice-removal.patch
+rds-fix-an-integer-overflow-test-in-rds_info_getsockopt.patch
+udp-fix-dst-races-with-multicast-early-demux.patch
+bridge-netlink-account-for-the-ifla_brport_proxyarp-attribute-size-and-policy.patch
+bridge-netlink-account-for-the-ifla_brport_proxyarp_wifi-attribute-size-and-policy.patch
+bna-fix-interrupts-storm-caused-by-erroneous-packets.patch
+netlink-make-sure-ebusy-won-t-escape-from-netlink_insert.patch
+inet-fix-possible-request-socket-leak.patch
+inet-fix-races-with-reqsk-timers.patch
+net-dsa-do-not-override-phy-interface-if-already-configured.patch
+ipv4-off-by-one-in-continuation-handling-in-proc-net-route.patch
diff --git a/queue-4.1/udp-fix-dst-races-with-multicast-early-demux.patch b/queue-4.1/udp-fix-dst-races-with-multicast-early-demux.patch

new file mode 100644 (file)

index 0000000..1a48b05
--- /dev/null
+++ b/queue-4.1/udp-fix-dst-races-with-multicast-early-demux.patch
@@ -0,0 +1,62 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: Eric Dumazet <edumazet@google.com>
+Date: Sat, 1 Aug 2015 12:14:33 +0200
+Subject: udp: fix dst races with multicast early demux
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 10e2eb878f3ca07ac2f05fa5ca5e6c4c9174a27a ]
+
+Multicast dst are not cached. They carry DST_NOCACHE.
+
+As mentioned in commit f8864972126899 ("ipv4: fix dst race in
+sk_dst_get()"), these dst need special care before caching them
+into a socket.
+
+Caching them is allowed only if their refcnt was not 0, ie we
+must use atomic_inc_not_zero()
+
+Also, we must use READ_ONCE() to fetch sk->sk_rx_dst, as mentioned
+in commit d0c294c53a771 ("tcp: prevent fetching dst twice in early demux
+code")
+
+Fixes: 421b3885bf6d ("udp: ipv4: Add udp early demux")
+Tested-by: Gregory Hoggarth <Gregory.Hoggarth@alliedtelesis.co.nz>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Gregory Hoggarth <Gregory.Hoggarth@alliedtelesis.co.nz>
+Reported-by: Alex Gartrell <agartrell@fb.com>
+Cc: Michal Kubeček <mkubecek@suse.cz>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/udp.c |   13 ++++++++++---
+ 1 file changed, 10 insertions(+), 3 deletions(-)
+
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -1995,12 +1995,19 @@ void udp_v4_early_demux(struct sk_buff *
+ 
+       skb->sk = sk;
+       skb->destructor = sock_efree;
+-      dst = sk->sk_rx_dst;
++      dst = READ_ONCE(sk->sk_rx_dst);
+ 
+       if (dst)
+               dst = dst_check(dst, 0);
+-      if (dst)
+-              skb_dst_set_noref(skb, dst);
++      if (dst) {
++              /* DST_NOCACHE can not be used without taking a reference */
++              if (dst->flags & DST_NOCACHE) {
++                      if (likely(atomic_inc_not_zero(&dst->__refcnt)))
++                              skb_dst_set(skb, dst);
++              } else {
++                      skb_dst_set_noref(skb, dst);
++              }
++      }
+ }
+ 
+ int udp_rcv(struct sk_buff *skb)
diff --git a/queue-4.1/virtio_net-don-t-require-any_layout-with-version_1.patch b/queue-4.1/virtio_net-don-t-require-any_layout-with-version_1.patch

new file mode 100644 (file)

index 0000000..0219384
--- /dev/null
+++ b/queue-4.1/virtio_net-don-t-require-any_layout-with-version_1.patch
@@ -0,0 +1,34 @@
+From foo@baz Sat Sep 26 11:13:07 PDT 2015
+From: "Michael S. Tsirkin" <mst@redhat.com>
+Date: Wed, 15 Jul 2015 15:26:19 +0300
+Subject: virtio_net: don't require ANY_LAYOUT with VERSION_1
+
+From: "Michael S. Tsirkin" <mst@redhat.com>
+
+[ Upstream commit 75993300d008f418ee2569a632185fc1d7d50674 ]
+
+ANY_LAYOUT is a compatibility feature. It's implied
+for VERSION_1 devices, and non-transitional devices
+might not offer it. Change code to behave accordingly.
+
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
+Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/virtio_net.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -1828,7 +1828,8 @@ static int virtnet_probe(struct virtio_d
+       else
+               vi->hdr_len = sizeof(struct virtio_net_hdr);
+ 
+-      if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT))
++      if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) ||
++          virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
+               vi->any_header_sg = true;
+ 
+       if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sat, 26 Sep 2015 18:40:29 +0000 (11:40 -0700)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sat, 26 Sep 2015 18:40:29 +0000 (11:40 -0700)
queue-4.1/act_bpf-fix-memory-leaks-when-replacing-bpf-programs.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/bna-fix-interrupts-storm-caused-by-erroneous-packets.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/bonding-correct-the-mac-address-for-follow-fail_over_mac-policy.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/bonding-fix-destruction-of-bond-with-devices-different-from-arphrd_ether.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/bridge-fix-potential-crash-in-__netdev_pick_tx.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/bridge-mdb-fix-double-add-notification.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/bridge-mdb-zero-out-the-local-br_ip-variable-before-use.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/bridge-netlink-account-for-the-ifla_brport_proxyarp-attribute-size-and-policy.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/bridge-netlink-account-for-the-ifla_brport_proxyarp_wifi-attribute-size-and-policy.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/bridge-netlink-fix-slave_changelink-br_setport-race-conditions.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/fib_trie-drop-unnecessary-calls-to-leaf_pull_suffix.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/fq_codel-fix-a-use-after-free.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/inet-fix-possible-request-socket-leak.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/inet-fix-races-with-reqsk-timers.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/inet-frags-fix-defragmented-packet-s-ip-header-for-af_packet.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/ip_tunnel-fix-ipv4-pmtu-check-to-honor-inner-ip-header-df.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/ipv4-off-by-one-in-continuation-handling-in-proc-net-route.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/ipv6-lock-socket-in-ip6_datagram_connect.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/ipv6-make-mld-packets-to-only-be-processed-locally.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/isdn-gigaset-reset-tty-receive_room-when-attaching-ser_gigaset.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/jbd2-avoid-infinite-loop-when-destroying-aborted-journal.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/net-call-rcu_read_lock-early-in-process_backlog.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/net-clone-skb-before-setting-peeked-flag.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/net-do-not-process-device-backlog-during-unregistration.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/net-dsa-do-not-override-phy-interface-if-already-configured.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/net-fix-skb-csum-races-when-peeking.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/net-fix-skb_set_peeked-use-after-free-bug.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/net-graceful-exit-from-netif_alloc_netdev_queues.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/net-mlx4_core-fix-wrong-index-in-propagating-port-change-event-to-vfs.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/net-pktgen-fix-race-between-pktgen_thread_worker-and-kthread_stop.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/net-sched-fix-refcount-imbalance-in-actions.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/net-tipc-initialize-security-state-for-new-connection-socket.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/net-xen-netback-off-by-one-in-bug_on-condition.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/netlink-don-t-hold-mutex-in-rcu-callback-when-releasing-mmapd-ring.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/netlink-make-sure-ebusy-won-t-escape-from-netlink_insert.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/packet-missing-dev_put-in-packet_do_bind.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/packet-tpacket_snd-fix-signed-unsigned-comparison.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/rds-fix-an-integer-overflow-test-in-rds_info_getsockopt.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/revert-dev-set-iflink-to-0-for-virtual-interfaces.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/revert-sit-add-gro-callbacks-to-sit_offload.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/rhashtable-fix-for-resize-events-during-table-walk.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/rocker-free-netdevice-during-netdevice-removal.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/rtnetlink-verify-ifla_vf_info-attributes-before-passing-them-to-driver.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/sched-cls_bpf-fix-panic-on-filter-replace.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/sched-cls_flow-fix-panic-on-filter-replace.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/series		patch \| blob \| blame \| history
queue-4.1/udp-fix-dst-races-with-multicast-early-demux.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/virtio_net-don-t-require-any_layout-with-version_1.patch	[new file with mode: 0644]	patch \| blob