fixes for 4.4

author Sasha Levin <sashal@kernel.org>

Wed, 28 Aug 2019 03:02:35 +0000 (23:02 -0400)

committer Sasha Levin <sashal@kernel.org>

Wed, 28 Aug 2019 03:02:35 +0000 (23:02 -0400)
author Sasha Levin <sashal@kernel.org>
Wed, 28 Aug 2019 03:02:35 +0000 (23:02 -0400)
committer Sasha Levin <sashal@kernel.org>
Wed, 28 Aug 2019 03:02:35 +0000 (23:02 -0400)
diff --git a/queue-4.4/gfs2-don-t-set-rgrp-gl_object-until-it-s-inserted-in.patch b/queue-4.4/gfs2-don-t-set-rgrp-gl_object-until-it-s-inserted-in.patch

new file mode 100644 (file)

index 0000000..f692377
--- /dev/null
+++ b/queue-4.4/gfs2-don-t-set-rgrp-gl_object-until-it-s-inserted-in.patch
@@ -0,0 +1,86 @@
+From f8e88418b92cf79b7083dd5386fb3ebb3c30744b Mon Sep 17 00:00:00 2001
+From: Bob Peterson <rpeterso@redhat.com>
+Date: Wed, 28 Aug 2019 00:09:06 +0100
+Subject: GFS2: don't set rgrp gl_object until it's inserted into rgrp tree
+
+commit 36e4ad0316c017d5b271378ed9a1c9a4b77fab5f upstream.
+
+Before this patch, function read_rindex_entry would set a rgrp
+glock's gl_object pointer to itself before inserting the rgrp into
+the rgrp rbtree. The problem is: if another process was also reading
+the rgrp in, and had already inserted its newly created rgrp, then
+the second call to read_rindex_entry would overwrite that value,
+then return a bad return code to the caller. Later, other functions
+would reference the now-freed rgrp memory by way of gl_object.
+In some cases, that could result in gfs2_rgrp_brelse being called
+twice for the same rgrp: once for the failed attempt and once for
+the "real" rgrp release. Eventually the kernel would panic.
+There are also a number of other things that could go wrong when
+a kernel module is accessing freed storage. For example, this could
+result in rgrp corruption because the fake rgrp would point to a
+fake bitmap in memory too, causing gfs2_inplace_reserve to search
+some random memory for free blocks, and find some, since we were
+never setting rgd->rd_bits to NULL before freeing it.
+
+This patch fixes the problem by not setting gl_object until we
+have successfully inserted the rgrp into the rbtree. Also, it sets
+rd_bits to NULL as it frees them, which will ensure any accidental
+access to the wrong rgrp will result in a kernel panic rather than
+file system corruption, which is preferred.
+
+Signed-off-by: Bob Peterson <rpeterso@redhat.com>
+[bwh: Backported to 4.4: adjust context]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/gfs2/rgrp.c | 13 ++++++++-----
+ 1 file changed, 8 insertions(+), 5 deletions(-)
+
+diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
+index ef24894edecc1..9c159e6ad1164 100644
+--- a/fs/gfs2/rgrp.c
++++ b/fs/gfs2/rgrp.c
+@@ -739,6 +739,7 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
+ 
+               gfs2_free_clones(rgd);
+               kfree(rgd->rd_bits);
++              rgd->rd_bits = NULL;
+               return_all_reservations(rgd);
+               kmem_cache_free(gfs2_rgrpd_cachep, rgd);
+       }
+@@ -933,10 +934,6 @@ static int read_rindex_entry(struct gfs2_inode *ip)
+       if (error)
+               goto fail;
+ 
+-      rgd->rd_gl->gl_object = rgd;
+-      rgd->rd_gl->gl_vm.start = (rgd->rd_addr * bsize) & PAGE_CACHE_MASK;
+-      rgd->rd_gl->gl_vm.end = PAGE_CACHE_ALIGN((rgd->rd_addr +
+-                                                rgd->rd_length) * bsize) - 1;
+       rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr;
+       rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED);
+       if (rgd->rd_data > sdp->sd_max_rg_data)
+@@ -944,14 +941,20 @@ static int read_rindex_entry(struct gfs2_inode *ip)
+       spin_lock(&sdp->sd_rindex_spin);
+       error = rgd_insert(rgd);
+       spin_unlock(&sdp->sd_rindex_spin);
+-      if (!error)
++      if (!error) {
++              rgd->rd_gl->gl_object = rgd;
++              rgd->rd_gl->gl_vm.start = (rgd->rd_addr * bsize) & PAGE_MASK;
++              rgd->rd_gl->gl_vm.end = PAGE_ALIGN((rgd->rd_addr +
++                                                  rgd->rd_length) * bsize) - 1;
+               return 0;
++      }
+ 
+       error = 0; /* someone else read in the rgrp; free it and ignore it */
+       gfs2_glock_put(rgd->rd_gl);
+ 
+ fail:
+       kfree(rgd->rd_bits);
++      rgd->rd_bits = NULL;
+       kmem_cache_free(gfs2_rgrpd_cachep, rgd);
+       return error;
+ }
+-- 
+2.20.1
+
diff --git a/queue-4.4/inet-switch-ip-id-generator-to-siphash.patch b/queue-4.4/inet-switch-ip-id-generator-to-siphash.patch

new file mode 100644 (file)

index 0000000..22829f0
--- /dev/null
+++ b/queue-4.4/inet-switch-ip-id-generator-to-siphash.patch
@@ -0,0 +1,163 @@
+From c4c30d3af3ba504f5a6de5956366e1a63659269a Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 28 Aug 2019 00:11:06 +0100
+Subject: inet: switch IP ID generator to siphash
+
+commit df453700e8d81b1bdafdf684365ee2b9431fb702 upstream.
+
+According to Amit Klein and Benny Pinkas, IP ID generation is too weak
+and might be used by attackers.
+
+Even with recent net_hash_mix() fix (netns: provide pure entropy for net_hash_mix())
+having 64bit key and Jenkins hash is risky.
+
+It is time to switch to siphash and its 128bit keys.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Amit Klein <aksecurity@gmail.com>
+Reported-by: Benny Pinkas <benny@pinkas.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+[bwh: Backported to 4.4: adjust context]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/siphash.h  |  5 +++++
+ include/net/netns/ipv4.h |  2 ++
+ net/ipv4/route.c         | 12 +++++++-----
+ net/ipv6/output_core.c   | 30 ++++++++++++++++--------------
+ 4 files changed, 30 insertions(+), 19 deletions(-)
+
+diff --git a/include/linux/siphash.h b/include/linux/siphash.h
+index fa7a6b9cedbff..bf21591a9e5e6 100644
+--- a/include/linux/siphash.h
++++ b/include/linux/siphash.h
+@@ -21,6 +21,11 @@ typedef struct {
+       u64 key[2];
+ } siphash_key_t;
+ 
++static inline bool siphash_key_is_zero(const siphash_key_t *key)
++{
++      return !(key->key[0] | key->key[1]);
++}
++
+ u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key);
+ #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key);
+diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
+index 61c38f87ea079..e6f49f22e0066 100644
+--- a/include/net/netns/ipv4.h
++++ b/include/net/netns/ipv4.h
+@@ -8,6 +8,7 @@
+ #include <linux/uidgid.h>
+ #include <net/inet_frag.h>
+ #include <linux/rcupdate.h>
++#include <linux/siphash.h>
+ 
+ struct tcpm_hash_bucket;
+ struct ctl_table_header;
+@@ -109,5 +110,6 @@ struct netns_ipv4 {
+ #endif
+ #endif
+       atomic_t        rt_genid;
++      siphash_key_t   ip_id_key;
+ };
+ #endif
+diff --git a/net/ipv4/route.c b/net/ipv4/route.c
+index a58effba760ae..3c605a788ba1e 100644
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -490,15 +490,17 @@ EXPORT_SYMBOL(ip_idents_reserve);
+ 
+ void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
+ {
+-      static u32 ip_idents_hashrnd __read_mostly;
+       u32 hash, id;
+ 
+-      net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd));
++      /* Note the following code is not safe, but this is okay. */
++      if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key)))
++              get_random_bytes(&net->ipv4.ip_id_key,
++                               sizeof(net->ipv4.ip_id_key));
+ 
+-      hash = jhash_3words((__force u32)iph->daddr,
++      hash = siphash_3u32((__force u32)iph->daddr,
+                           (__force u32)iph->saddr,
+-                          iph->protocol ^ net_hash_mix(net),
+-                          ip_idents_hashrnd);
++                          iph->protocol,
++                          &net->ipv4.ip_id_key);
+       id = ip_idents_reserve(hash, segs);
+       iph->id = htons(id);
+ }
+diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
+index f99a04674419b..6b896cc9604e5 100644
+--- a/net/ipv6/output_core.c
++++ b/net/ipv6/output_core.c
+@@ -10,15 +10,25 @@
+ #include <net/secure_seq.h>
+ #include <linux/netfilter.h>
+ 
+-static u32 __ipv6_select_ident(struct net *net, u32 hashrnd,
++static u32 __ipv6_select_ident(struct net *net,
+                              const struct in6_addr *dst,
+                              const struct in6_addr *src)
+ {
++      const struct {
++              struct in6_addr dst;
++              struct in6_addr src;
++      } __aligned(SIPHASH_ALIGNMENT) combined = {
++              .dst = *dst,
++              .src = *src,
++      };
+       u32 hash, id;
+ 
+-      hash = __ipv6_addr_jhash(dst, hashrnd);
+-      hash = __ipv6_addr_jhash(src, hash);
+-      hash ^= net_hash_mix(net);
++      /* Note the following code is not safe, but this is okay. */
++      if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key)))
++              get_random_bytes(&net->ipv4.ip_id_key,
++                               sizeof(net->ipv4.ip_id_key));
++
++      hash = siphash(&combined, sizeof(combined), &net->ipv4.ip_id_key);
+ 
+       /* Treat id of 0 as unset and if we get 0 back from ip_idents_reserve,
+        * set the hight order instead thus minimizing possible future
+@@ -41,7 +51,6 @@ static u32 __ipv6_select_ident(struct net *net, u32 hashrnd,
+  */
+ void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb)
+ {
+-      static u32 ip6_proxy_idents_hashrnd __read_mostly;
+       struct in6_addr buf[2];
+       struct in6_addr *addrs;
+       u32 id;
+@@ -53,11 +62,7 @@ void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb)
+       if (!addrs)
+               return;
+ 
+-      net_get_random_once(&ip6_proxy_idents_hashrnd,
+-                          sizeof(ip6_proxy_idents_hashrnd));
+-
+-      id = __ipv6_select_ident(net, ip6_proxy_idents_hashrnd,
+-                               &addrs[1], &addrs[0]);
++      id = __ipv6_select_ident(net, &addrs[1], &addrs[0]);
+       skb_shinfo(skb)->ip6_frag_id = htonl(id);
+ }
+ EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident);
+@@ -66,12 +71,9 @@ __be32 ipv6_select_ident(struct net *net,
+                        const struct in6_addr *daddr,
+                        const struct in6_addr *saddr)
+ {
+-      static u32 ip6_idents_hashrnd __read_mostly;
+       u32 id;
+ 
+-      net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
+-
+-      id = __ipv6_select_ident(net, ip6_idents_hashrnd, daddr, saddr);
++      id = __ipv6_select_ident(net, daddr, saddr);
+       return htonl(id);
+ }
+ EXPORT_SYMBOL(ipv6_select_ident);
+-- 
+2.20.1
+
diff --git a/queue-4.4/net-arc_emac-fix-koops-caused-by-sk_buff-free.patch b/queue-4.4/net-arc_emac-fix-koops-caused-by-sk_buff-free.patch

new file mode 100644 (file)

index 0000000..3a51eb4
--- /dev/null
+++ b/queue-4.4/net-arc_emac-fix-koops-caused-by-sk_buff-free.patch
@@ -0,0 +1,163 @@
+From af0b371153dd9db0af1cbdf1b3cb93d3b6b340e0 Mon Sep 17 00:00:00 2001
+From: Alexander Kochetkov <al.kochet@gmail.com>
+Date: Wed, 28 Aug 2019 00:10:06 +0100
+Subject: net: arc_emac: fix koops caused by sk_buff free
+
+commit c278c253f3d992c6994d08aa0efb2b6806ca396f upstream.
+
+There is a race between arc_emac_tx() and arc_emac_tx_clean().
+sk_buff got freed by arc_emac_tx_clean() while arc_emac_tx()
+submitting sk_buff.
+
+In order to free sk_buff arc_emac_tx_clean() checks:
+    if ((info & FOR_EMAC) || !txbd->data)
+        break;
+    ...
+    dev_kfree_skb_irq(skb);
+
+If condition false, arc_emac_tx_clean() free sk_buff.
+
+In order to submit txbd, arc_emac_tx() do:
+    priv->tx_buff[*txbd_curr].skb = skb;
+    ...
+    priv->txbd[*txbd_curr].data = cpu_to_le32(addr);
+    ...
+    ...  <== arc_emac_tx_clean() check condition here
+    ...  <== (info & FOR_EMAC) is false
+    ...  <== !txbd->data is false
+    ...
+    *info = cpu_to_le32(FOR_EMAC | FIRST_OR_LAST_MASK | len);
+
+In order to reproduce the situation,
+run device:
+    # iperf -s
+run on host:
+    # iperf -t 600 -c <device-ip-addr>
+
+[   28.396284] ------------[ cut here ]------------
+[   28.400912] kernel BUG at .../net/core/skbuff.c:1355!
+[   28.414019] Internal error: Oops - BUG: 0 [#1] SMP ARM
+[   28.419150] Modules linked in:
+[   28.422219] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G    B           4.4.0+ #120
+[   28.429516] Hardware name: Rockchip (Device Tree)
+[   28.434216] task: c0665070 ti: c0660000 task.ti: c0660000
+[   28.439622] PC is at skb_put+0x10/0x54
+[   28.443381] LR is at arc_emac_poll+0x260/0x474
+[   28.447821] pc : [<c03af580>]    lr : [<c028fec4>]    psr: a0070113
+[   28.447821] sp : c0661e58  ip : eea68502  fp : ef377000
+[   28.459280] r10: 0000012c  r9 : f08b2000  r8 : eeb57100
+[   28.464498] r7 : 00000000  r6 : ef376594  r5 : 00000077  r4 : ef376000
+[   28.471015] r3 : 0030488b  r2 : ef13e880  r1 : 000005ee  r0 : eeb57100
+[   28.477534] Flags: NzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
+[   28.484658] Control: 10c5387d  Table: 8eaf004a  DAC: 00000051
+[   28.490396] Process swapper/0 (pid: 0, stack limit = 0xc0660210)
+[   28.496393] Stack: (0xc0661e58 to 0xc0662000)
+[   28.500745] 1e40:                                                       00000002 00000000
+[   28.508913] 1e60: 00000000 ef376520 00000028 f08b23b8 00000000 ef376520 ef7b6900 c028fc64
+[   28.517082] 1e80: 2f158000 c0661ea8 c0661eb0 0000012c c065e900 c03bdeac ffff95e9 c0662100
+[   28.525250] 1ea0: c0663924 00000028 c0661ea8 c0661ea8 c0661eb0 c0661eb0 0000001e c0660000
+[   28.533417] 1ec0: 40000003 00000008 c0695a00 0000000a c066208c 00000100 c0661ee0 c0027410
+[   28.541584] 1ee0: ef0fb700 2f158000 00200000 ffff95e8 00000004 c0662100 c0662080 00000003
+[   28.549751] 1f00: 00000000 00000000 00000000 c065b45c 0000001e ef005000 c0647a30 00000000
+[   28.557919] 1f20: 00000000 c0027798 00000000 c005cf40 f0802100 c0662ffc c0661f60 f0803100
+[   28.566088] 1f40: c0661fb8 c00093bc c000ffb4 60070013 ffffffff c0661f94 c0661fb8 c00137d4
+[   28.574267] 1f60: 00000001 00000000 00000000 c001ffa0 00000000 c0660000 00000000 c065a364
+[   28.582441] 1f80: c0661fb8 c0647a30 00000000 00000000 00000000 c0661fb0 c000ffb0 c000ffb4
+[   28.590608] 1fa0: 60070013 ffffffff 00000051 00000000 00000000 c005496c c0662400 c061bc40
+[   28.598776] 1fc0: ffffffff ffffffff 00000000 c061b680 00000000 c0647a30 00000000 c0695294
+[   28.606943] 1fe0: c0662488 c0647a2c c066619c 6000406a 413fc090 6000807c 00000000 00000000
+[   28.615127] [<c03af580>] (skb_put) from [<ef376520>] (0xef376520)
+[   28.621218] Code: e5902054 e590c090 e3520000 0a000000 (e7f001f2)
+[   28.627307] ---[ end trace 4824734e2243fdb6 ]---
+
+[   34.377068] Internal error: Oops: 17 [#1] SMP ARM
+[   34.382854] Modules linked in:
+[   34.385947] CPU: 0 PID: 3 Comm: ksoftirqd/0 Not tainted 4.4.0+ #120
+[   34.392219] Hardware name: Rockchip (Device Tree)
+[   34.396937] task: ef02d040 ti: ef05c000 task.ti: ef05c000
+[   34.402376] PC is at __dev_kfree_skb_irq+0x4/0x80
+[   34.407121] LR is at arc_emac_poll+0x130/0x474
+[   34.411583] pc : [<c03bb640>]    lr : [<c028fd94>]    psr: 60030013
+[   34.411583] sp : ef05de68  ip : 0008e83c  fp : ef377000
+[   34.423062] r10: c001bec4  r9 : 00000000  r8 : f08b24c8
+[   34.428296] r7 : f08b2400  r6 : 00000075  r5 : 00000019  r4 : ef376000
+[   34.434827] r3 : 00060000  r2 : 00000042  r1 : 00000001  r0 : 00000000
+[   34.441365] Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
+[   34.448507] Control: 10c5387d  Table: 8f25c04a  DAC: 00000051
+[   34.454262] Process ksoftirqd/0 (pid: 3, stack limit = 0xef05c210)
+[   34.460449] Stack: (0xef05de68 to 0xef05e000)
+[   34.464827] de60:                   ef376000 c028fd94 00000000 c0669480 c0669480 ef376520
+[   34.473022] de80: 00000028 00000001 00002ae4 ef376520 ef7b6900 c028fc64 2f158000 ef05dec0
+[   34.481215] dea0: ef05dec8 0000012c c065e900 c03bdeac ffff983f c0662100 c0663924 00000028
+[   34.489409] dec0: ef05dec0 ef05dec0 ef05dec8 ef05dec8 ef7b6000 ef05c000 40000003 00000008
+[   34.497600] dee0: c0695a00 0000000a c066208c 00000100 ef05def8 c0027410 ef7b6000 40000000
+[   34.505795] df00: 04208040 ffff983e 00000004 c0662100 c0662080 00000003 ef05c000 ef027340
+[   34.513985] df20: ef05c000 c0666c2c 00000000 00000001 00000002 00000000 00000000 c0027568
+[   34.522176] df40: ef027340 c003ef48 ef027300 00000000 ef027340 c003edd4 00000000 00000000
+[   34.530367] df60: 00000000 c003c37c ffffff7f 00000001 00000000 ef027340 00000000 00030003
+[   34.538559] df80: ef05df80 ef05df80 00000000 00000000 ef05df90 ef05df90 ef05dfac ef027300
+[   34.546750] dfa0: c003c2a4 00000000 00000000 c000f578 00000000 00000000 00000000 00000000
+[   34.554939] dfc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
+[   34.563129] dfe0: 00000000 00000000 00000000 00000000 00000013 00000000 ffffffff dfff7fff
+[   34.571360] [<c03bb640>] (__dev_kfree_skb_irq) from [<c028fd94>] (arc_emac_poll+0x130/0x474)
+[   34.579840] [<c028fd94>] (arc_emac_poll) from [<c03bdeac>] (net_rx_action+0xdc/0x28c)
+[   34.587712] [<c03bdeac>] (net_rx_action) from [<c0027410>] (__do_softirq+0xcc/0x1f8)
+[   34.595482] [<c0027410>] (__do_softirq) from [<c0027568>] (run_ksoftirqd+0x2c/0x50)
+[   34.603168] [<c0027568>] (run_ksoftirqd) from [<c003ef48>] (smpboot_thread_fn+0x174/0x18c)
+[   34.611466] [<c003ef48>] (smpboot_thread_fn) from [<c003c37c>] (kthread+0xd8/0xec)
+[   34.619075] [<c003c37c>] (kthread) from [<c000f578>] (ret_from_fork+0x14/0x3c)
+[   34.626317] Code: e8bd8010 e3a00000 e12fff1e e92d4010 (e59030a4)
+[   34.632572] ---[ end trace cca5a3d86a82249a ]---
+
+Signed-off-by: Alexander Kochetkov <al.kochet@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/arc/emac_main.c | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c
+index 9cc5daed13edd..b0285ac203f09 100644
+--- a/drivers/net/ethernet/arc/emac_main.c
++++ b/drivers/net/ethernet/arc/emac_main.c
+@@ -163,7 +163,7 @@ static void arc_emac_tx_clean(struct net_device *ndev)
+               struct sk_buff *skb = tx_buff->skb;
+               unsigned int info = le32_to_cpu(txbd->info);
+ 
+-              if ((info & FOR_EMAC) || !txbd->data)
++              if ((info & FOR_EMAC) || !txbd->data || !skb)
+                       break;
+ 
+               if (unlikely(info & (DROP | DEFR | LTCL | UFLO))) {
+@@ -191,6 +191,7 @@ static void arc_emac_tx_clean(struct net_device *ndev)
+ 
+               txbd->data = 0;
+               txbd->info = 0;
++              tx_buff->skb = NULL;
+ 
+               *txbd_dirty = (*txbd_dirty + 1) % TX_BD_NUM;
+       }
+@@ -619,7 +620,6 @@ static int arc_emac_tx(struct sk_buff *skb, struct net_device *ndev)
+       dma_unmap_addr_set(&priv->tx_buff[*txbd_curr], addr, addr);
+       dma_unmap_len_set(&priv->tx_buff[*txbd_curr], len, len);
+ 
+-      priv->tx_buff[*txbd_curr].skb = skb;
+       priv->txbd[*txbd_curr].data = cpu_to_le32(addr);
+ 
+       /* Make sure pointer to data buffer is set */
+@@ -629,6 +629,11 @@ static int arc_emac_tx(struct sk_buff *skb, struct net_device *ndev)
+ 
+       *info = cpu_to_le32(FOR_EMAC | FIRST_OR_LAST_MASK | len);
+ 
++      /* Make sure info word is set */
++      wmb();
++
++      priv->tx_buff[*txbd_curr].skb = skb;
++
+       /* Increment index to point to the next BD */
+       *txbd_curr = (*txbd_curr + 1) % TX_BD_NUM;
+ 
+-- 
+2.20.1
+
diff --git a/queue-4.4/netfilter-conntrack-use-consistent-ct-id-hash-calcul.patch b/queue-4.4/netfilter-conntrack-use-consistent-ct-id-hash-calcul.patch

new file mode 100644 (file)

index 0000000..e54aeb3
--- /dev/null
+++ b/queue-4.4/netfilter-conntrack-use-consistent-ct-id-hash-calcul.patch
@@ -0,0 +1,68 @@
+From 01ad41c521f265e9d86d72cfd636fc2baef458ec Mon Sep 17 00:00:00 2001
+From: Dirk Morris <dmorris@metaloft.com>
+Date: Wed, 28 Aug 2019 00:11:19 +0100
+Subject: netfilter: conntrack: Use consistent ct id hash calculation
+
+commit 656c8e9cc1badbc18eefe6ba01d33ebbcae61b9a upstream.
+
+Change ct id hash calculation to only use invariants.
+
+Currently the ct id hash calculation is based on some fields that can
+change in the lifetime on a conntrack entry in some corner cases. The
+current hash uses the whole tuple which contains an hlist pointer which
+will change when the conntrack is placed on the dying list resulting in
+a ct id change.
+
+This patch also removes the reply-side tuple and extension pointer from
+the hash calculation so that the ct id will will not change from
+initialization until confirmation.
+
+Fixes: 3c79107631db1f7 ("netfilter: ctnetlink: don't use conntrack/expect object addresses as id")
+Signed-off-by: Dirk Morris <dmorris@metaloft.com>
+Acked-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_conntrack_core.c | 16 ++++++++--------
+ 1 file changed, 8 insertions(+), 8 deletions(-)
+
+diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
+index fd301fb137194..de0aad12b91d2 100644
+--- a/net/netfilter/nf_conntrack_core.c
++++ b/net/netfilter/nf_conntrack_core.c
+@@ -241,13 +241,12 @@ EXPORT_SYMBOL_GPL(nf_ct_invert_tuple);
+  * table location, we assume id gets exposed to userspace.
+  *
+  * Following nf_conn items do not change throughout lifetime
+- * of the nf_conn after it has been committed to main hash table:
++ * of the nf_conn:
+  *
+  * 1. nf_conn address
+- * 2. nf_conn->ext address
+- * 3. nf_conn->master address (normally NULL)
+- * 4. tuple
+- * 5. the associated net namespace
++ * 2. nf_conn->master address (normally NULL)
++ * 3. the associated net namespace
++ * 4. the original direction tuple
+  */
+ u32 nf_ct_get_id(const struct nf_conn *ct)
+ {
+@@ -257,9 +256,10 @@ u32 nf_ct_get_id(const struct nf_conn *ct)
+       net_get_random_once(&ct_id_seed, sizeof(ct_id_seed));
+ 
+       a = (unsigned long)ct;
+-      b = (unsigned long)ct->master ^ net_hash_mix(nf_ct_net(ct));
+-      c = (unsigned long)ct->ext;
+-      d = (unsigned long)siphash(&ct->tuplehash, sizeof(ct->tuplehash),
++      b = (unsigned long)ct->master;
++      c = (unsigned long)nf_ct_net(ct);
++      d = (unsigned long)siphash(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
++                                 sizeof(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple),
+                                  &ct_id_seed);
+ #ifdef CONFIG_64BIT
+       return siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &ct_id_seed);
+-- 
+2.20.1
+
diff --git a/queue-4.4/netfilter-ctnetlink-don-t-use-conntrack-expect-objec.patch b/queue-4.4/netfilter-ctnetlink-don-t-use-conntrack-expect-objec.patch

new file mode 100644 (file)

index 0000000..409a796
--- /dev/null
+++ b/queue-4.4/netfilter-ctnetlink-don-t-use-conntrack-expect-objec.patch
@@ -0,0 +1,179 @@
+From aba18ebcca252ba6851ad9cde8a29b566bf12fde Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Wed, 28 Aug 2019 00:11:12 +0100
+Subject: netfilter: ctnetlink: don't use conntrack/expect object addresses as
+ id
+
+commit 3c79107631db1f7fd32cf3f7368e4672004a3010 upstream.
+
+else, we leak the addresses to userspace via ctnetlink events
+and dumps.
+
+Compute an ID on demand based on the immutable parts of nf_conn struct.
+
+Another advantage compared to using an address is that there is no
+immediate re-use of the same ID in case the conntrack entry is freed and
+reallocated again immediately.
+
+Fixes: 3583240249ef ("[NETFILTER]: nf_conntrack_expect: kill unique ID")
+Fixes: 7f85f914721f ("[NETFILTER]: nf_conntrack: kill unique ID")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+[bwh: Backported to 4.4: adjust context]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/netfilter/nf_conntrack.h |  2 ++
+ net/netfilter/nf_conntrack_core.c    | 35 ++++++++++++++++++++++++++++
+ net/netfilter/nf_conntrack_netlink.c | 34 +++++++++++++++++++++++----
+ 3 files changed, 66 insertions(+), 5 deletions(-)
+
+diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
+index fde4068eec0b2..636e9e11bd5f6 100644
+--- a/include/net/netfilter/nf_conntrack.h
++++ b/include/net/netfilter/nf_conntrack.h
+@@ -297,6 +297,8 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
+                                gfp_t flags);
+ void nf_ct_tmpl_free(struct nf_conn *tmpl);
+ 
++u32 nf_ct_get_id(const struct nf_conn *ct);
++
+ #define NF_CT_STAT_INC(net, count)      __this_cpu_inc((net)->ct.stat->count)
+ #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count)
+ 
+diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
+index 5f747089024fa..fd301fb137194 100644
+--- a/net/netfilter/nf_conntrack_core.c
++++ b/net/netfilter/nf_conntrack_core.c
+@@ -23,6 +23,7 @@
+ #include <linux/slab.h>
+ #include <linux/random.h>
+ #include <linux/jhash.h>
++#include <linux/siphash.h>
+ #include <linux/err.h>
+ #include <linux/percpu.h>
+ #include <linux/moduleparam.h>
+@@ -234,6 +235,40 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
+ }
+ EXPORT_SYMBOL_GPL(nf_ct_invert_tuple);
+ 
++/* Generate a almost-unique pseudo-id for a given conntrack.
++ *
++ * intentionally doesn't re-use any of the seeds used for hash
++ * table location, we assume id gets exposed to userspace.
++ *
++ * Following nf_conn items do not change throughout lifetime
++ * of the nf_conn after it has been committed to main hash table:
++ *
++ * 1. nf_conn address
++ * 2. nf_conn->ext address
++ * 3. nf_conn->master address (normally NULL)
++ * 4. tuple
++ * 5. the associated net namespace
++ */
++u32 nf_ct_get_id(const struct nf_conn *ct)
++{
++      static __read_mostly siphash_key_t ct_id_seed;
++      unsigned long a, b, c, d;
++
++      net_get_random_once(&ct_id_seed, sizeof(ct_id_seed));
++
++      a = (unsigned long)ct;
++      b = (unsigned long)ct->master ^ net_hash_mix(nf_ct_net(ct));
++      c = (unsigned long)ct->ext;
++      d = (unsigned long)siphash(&ct->tuplehash, sizeof(ct->tuplehash),
++                                 &ct_id_seed);
++#ifdef CONFIG_64BIT
++      return siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &ct_id_seed);
++#else
++      return siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &ct_id_seed);
++#endif
++}
++EXPORT_SYMBOL_GPL(nf_ct_get_id);
++
+ static void
+ clean_from_lists(struct nf_conn *ct)
+ {
+diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
+index c68e020427ab9..3a24c01cb9090 100644
+--- a/net/netfilter/nf_conntrack_netlink.c
++++ b/net/netfilter/nf_conntrack_netlink.c
+@@ -29,6 +29,7 @@
+ #include <linux/spinlock.h>
+ #include <linux/interrupt.h>
+ #include <linux/slab.h>
++#include <linux/siphash.h>
+ 
+ #include <linux/netfilter.h>
+ #include <net/netlink.h>
+@@ -451,7 +452,9 @@ ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, const struct nf_conn *ct)
+ static inline int
+ ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
+ {
+-      if (nla_put_be32(skb, CTA_ID, htonl((unsigned long)ct)))
++      __be32 id = (__force __be32)nf_ct_get_id(ct);
++
++      if (nla_put_be32(skb, CTA_ID, id))
+               goto nla_put_failure;
+       return 0;
+ 
+@@ -1159,8 +1162,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
+       ct = nf_ct_tuplehash_to_ctrack(h);
+ 
+       if (cda[CTA_ID]) {
+-              u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID]));
+-              if (id != (u32)(unsigned long)ct) {
++              __be32 id = nla_get_be32(cda[CTA_ID]);
++
++              if (id != (__force __be32)nf_ct_get_id(ct)) {
+                       nf_ct_put(ct);
+                       return -ENOENT;
+               }
+@@ -2480,6 +2484,25 @@ nla_put_failure:
+ 
+ static const union nf_inet_addr any_addr;
+ 
++static __be32 nf_expect_get_id(const struct nf_conntrack_expect *exp)
++{
++      static __read_mostly siphash_key_t exp_id_seed;
++      unsigned long a, b, c, d;
++
++      net_get_random_once(&exp_id_seed, sizeof(exp_id_seed));
++
++      a = (unsigned long)exp;
++      b = (unsigned long)exp->helper;
++      c = (unsigned long)exp->master;
++      d = (unsigned long)siphash(&exp->tuple, sizeof(exp->tuple), &exp_id_seed);
++
++#ifdef CONFIG_64BIT
++      return (__force __be32)siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &exp_id_seed);
++#else
++      return (__force __be32)siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &exp_id_seed);
++#endif
++}
++
+ static int
+ ctnetlink_exp_dump_expect(struct sk_buff *skb,
+                         const struct nf_conntrack_expect *exp)
+@@ -2527,7 +2550,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
+       }
+ #endif
+       if (nla_put_be32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout)) ||
+-          nla_put_be32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp)) ||
++          nla_put_be32(skb, CTA_EXPECT_ID, nf_expect_get_id(exp)) ||
+           nla_put_be32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags)) ||
+           nla_put_be32(skb, CTA_EXPECT_CLASS, htonl(exp->class)))
+               goto nla_put_failure;
+@@ -2824,7 +2847,8 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
+ 
+       if (cda[CTA_EXPECT_ID]) {
+               __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
+-              if (ntohl(id) != (u32)(unsigned long)exp) {
++
++              if (id != nf_expect_get_id(exp)) {
+                       nf_ct_expect_put(exp);
+                       return -ENOENT;
+               }
+-- 
+2.20.1
+
diff --git a/queue-4.4/revert-perf-test-6-fix-missing-kvm-module-load-for-s.patch b/queue-4.4/revert-perf-test-6-fix-missing-kvm-module-load-for-s.patch

new file mode 100644 (file)

index 0000000..a880948
--- /dev/null
+++ b/queue-4.4/revert-perf-test-6-fix-missing-kvm-module-load-for-s.patch
@@ -0,0 +1,65 @@
+From 411d66a8492f3583e844c6c374d15b1d8bc04335 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 27 Aug 2019 22:58:51 -0400
+Subject: Revert "perf test 6: Fix missing kvm module load for s390"
+
+This reverts commit 5f18429ae48faebefc00533cb24afdd01064754c.
+
+Which was upstream commit 53fe307dfd309e425b171f6272d64296a54f4dff.
+
+Ben Hutchings reports that this commit depends on new code added in
+v4.18, and so is irrelevant on older kernels, and breaks the build.
+
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/tests/parse-events.c | 27 ---------------------------
+ 1 file changed, 27 deletions(-)
+
+diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
+index 1a35ab044c11d..54af2f2e2ee4f 100644
+--- a/tools/perf/tests/parse-events.c
++++ b/tools/perf/tests/parse-events.c
+@@ -12,32 +12,6 @@
+ #define PERF_TP_SAMPLE_TYPE (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | \
+                            PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD)
+ 
+-#if defined(__s390x__)
+-/* Return true if kvm module is available and loaded. Test this
+- * and retun success when trace point kvm_s390_create_vm
+- * exists. Otherwise this test always fails.
+- */
+-static bool kvm_s390_create_vm_valid(void)
+-{
+-      char *eventfile;
+-      bool rc = false;
+-
+-      eventfile = get_events_file("kvm-s390");
+-
+-      if (eventfile) {
+-              DIR *mydir = opendir(eventfile);
+-
+-              if (mydir) {
+-                      rc = true;
+-                      closedir(mydir);
+-              }
+-              put_events_file(eventfile);
+-      }
+-
+-      return rc;
+-}
+-#endif
+-
+ static int test__checkevent_tracepoint(struct perf_evlist *evlist)
+ {
+       struct perf_evsel *evsel = perf_evlist__first(evlist);
+@@ -1587,7 +1561,6 @@ static struct evlist_test test__events[] = {
+       {
+               .name  = "kvm-s390:kvm_s390_create_vm",
+               .check = test__checkevent_tracepoint,
+-              .valid = kvm_s390_create_vm_valid,
+               .id    = 100,
+       },
+ #endif
+-- 
+2.20.1
+
diff --git a/queue-4.4/series b/queue-4.4/series

index 22b5c7e5549df243551e59b7c78fcb11f5964eda..44ef3b4c785cb709c2c88913ea1c29ddcfa6fb8f 100644 (file)
--- a/queue-4.4/series
+++ b/queue-4.4/series
@@ -31,3 +31,17 @@ dm-btree-fix-order-of-block-initialization-in-btree_split_beneath.patch
  dm-space-map-metadata-fix-missing-store-of-apply_bops-return-value.patch
  dm-table-fix-invalid-memory-accesses-with-too-high-sector-number.patch
  cgroup-disable-irqs-while-holding-css_set_lock.patch
+gfs2-don-t-set-rgrp-gl_object-until-it-s-inserted-in.patch
+net-arc_emac-fix-koops-caused-by-sk_buff-free.patch
+vhost-net-set-packet-weight-of-tx-polling-to-2-vq-si.patch
+vhost_net-use-packet-weight-for-rx-handler-too.patch
+vhost_net-introduce-vhost_exceeds_weight.patch
+vhost-introduce-vhost_exceeds_weight.patch
+vhost_net-fix-possible-infinite-loop.patch
+vhost-scsi-add-weight-support.patch
+siphash-add-cryptographically-secure-prf.patch
+siphash-implement-halfsiphash1-3-for-hash-tables.patch
+inet-switch-ip-id-generator-to-siphash.patch
+netfilter-ctnetlink-don-t-use-conntrack-expect-objec.patch
+netfilter-conntrack-use-consistent-ct-id-hash-calcul.patch
+revert-perf-test-6-fix-missing-kvm-module-load-for-s.patch
diff --git a/queue-4.4/siphash-add-cryptographically-secure-prf.patch b/queue-4.4/siphash-add-cryptographically-secure-prf.patch

new file mode 100644 (file)

index 0000000..f51afaa
--- /dev/null
+++ b/queue-4.4/siphash-add-cryptographically-secure-prf.patch
@@ -0,0 +1,712 @@
+From d6aad4115e088a307ca49a8c3f4d73873374c767 Mon Sep 17 00:00:00 2001
+From: "Jason A. Donenfeld" <Jason@zx2c4.com>
+Date: Wed, 28 Aug 2019 00:10:54 +0100
+Subject: siphash: add cryptographically secure PRF
+
+commit 2c956a60778cbb6a27e0c7a8a52a91378c90e1d1 upstream.
+
+SipHash is a 64-bit keyed hash function that is actually a
+cryptographically secure PRF, like HMAC. Except SipHash is super fast,
+and is meant to be used as a hashtable keyed lookup function, or as a
+general PRF for short input use cases, such as sequence numbers or RNG
+chaining.
+
+For the first usage:
+
+There are a variety of attacks known as "hashtable poisoning" in which an
+attacker forms some data such that the hash of that data will be the
+same, and then preceeds to fill up all entries of a hashbucket. This is
+a realistic and well-known denial-of-service vector. Currently
+hashtables use jhash, which is fast but not secure, and some kind of
+rotating key scheme (or none at all, which isn't good). SipHash is meant
+as a replacement for jhash in these cases.
+
+There are a modicum of places in the kernel that are vulnerable to
+hashtable poisoning attacks, either via userspace vectors or network
+vectors, and there's not a reliable mechanism inside the kernel at the
+moment to fix it. The first step toward fixing these issues is actually
+getting a secure primitive into the kernel for developers to use. Then
+we can, bit by bit, port things over to it as deemed appropriate.
+
+While SipHash is extremely fast for a cryptographically secure function,
+it is likely a bit slower than the insecure jhash, and so replacements
+will be evaluated on a case-by-case basis based on whether or not the
+difference in speed is negligible and whether or not the current jhash usage
+poses a real security risk.
+
+For the second usage:
+
+A few places in the kernel are using MD5 or SHA1 for creating secure
+sequence numbers, syn cookies, port numbers, or fast random numbers.
+SipHash is a faster and more fitting, and more secure replacement for MD5
+in those situations. Replacing MD5 and SHA1 with SipHash for these uses is
+obvious and straight-forward, and so is submitted along with this patch
+series. There shouldn't be much of a debate over its efficacy.
+
+Dozens of languages are already using this internally for their hash
+tables and PRFs. Some of the BSDs already use this in their kernels.
+SipHash is a widely known high-speed solution to a widely known set of
+problems, and it's time we catch-up.
+
+Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Reviewed-by: Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Eric Biggers <ebiggers3@gmail.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Eric Dumazet <eric.dumazet@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+[bwh: Backported to 4.4 as dependency of commits df453700e8d8 "inet: switch
+ IP ID generator to siphash" and 3c79107631db "netfilter: ctnetlink: don't
+ use conntrack/expect object addresses as id":
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/siphash.txt | 100 ++++++++++++++++
+ MAINTAINERS               |   7 ++
+ include/linux/siphash.h   |  85 ++++++++++++++
+ lib/Kconfig.debug         |  10 ++
+ lib/Makefile              |   3 +-
+ lib/siphash.c             | 232 ++++++++++++++++++++++++++++++++++++++
+ lib/test_siphash.c        | 131 +++++++++++++++++++++
+ 7 files changed, 567 insertions(+), 1 deletion(-)
+ create mode 100644 Documentation/siphash.txt
+ create mode 100644 include/linux/siphash.h
+ create mode 100644 lib/siphash.c
+ create mode 100644 lib/test_siphash.c
+
+diff --git a/Documentation/siphash.txt b/Documentation/siphash.txt
+new file mode 100644
+index 0000000000000..e8e6ddbbaab47
+--- /dev/null
++++ b/Documentation/siphash.txt
+@@ -0,0 +1,100 @@
++         SipHash - a short input PRF
++-----------------------------------------------
++Written by Jason A. Donenfeld <jason@zx2c4.com>
++
++SipHash is a cryptographically secure PRF -- a keyed hash function -- that
++performs very well for short inputs, hence the name. It was designed by
++cryptographers Daniel J. Bernstein and Jean-Philippe Aumasson. It is intended
++as a replacement for some uses of: `jhash`, `md5_transform`, `sha_transform`,
++and so forth.
++
++SipHash takes a secret key filled with randomly generated numbers and either
++an input buffer or several input integers. It spits out an integer that is
++indistinguishable from random. You may then use that integer as part of secure
++sequence numbers, secure cookies, or mask it off for use in a hash table.
++
++1. Generating a key
++
++Keys should always be generated from a cryptographically secure source of
++random numbers, either using get_random_bytes or get_random_once:
++
++siphash_key_t key;
++get_random_bytes(&key, sizeof(key));
++
++If you're not deriving your key from here, you're doing it wrong.
++
++2. Using the functions
++
++There are two variants of the function, one that takes a list of integers, and
++one that takes a buffer:
++
++u64 siphash(const void *data, size_t len, const siphash_key_t *key);
++
++And:
++
++u64 siphash_1u64(u64, const siphash_key_t *key);
++u64 siphash_2u64(u64, u64, const siphash_key_t *key);
++u64 siphash_3u64(u64, u64, u64, const siphash_key_t *key);
++u64 siphash_4u64(u64, u64, u64, u64, const siphash_key_t *key);
++u64 siphash_1u32(u32, const siphash_key_t *key);
++u64 siphash_2u32(u32, u32, const siphash_key_t *key);
++u64 siphash_3u32(u32, u32, u32, const siphash_key_t *key);
++u64 siphash_4u32(u32, u32, u32, u32, const siphash_key_t *key);
++
++If you pass the generic siphash function something of a constant length, it
++will constant fold at compile-time and automatically choose one of the
++optimized functions.
++
++3. Hashtable key function usage:
++
++struct some_hashtable {
++      DECLARE_HASHTABLE(hashtable, 8);
++      siphash_key_t key;
++};
++
++void init_hashtable(struct some_hashtable *table)
++{
++      get_random_bytes(&table->key, sizeof(table->key));
++}
++
++static inline hlist_head *some_hashtable_bucket(struct some_hashtable *table, struct interesting_input *input)
++{
++      return &table->hashtable[siphash(input, sizeof(*input), &table->key) & (HASH_SIZE(table->hashtable) - 1)];
++}
++
++You may then iterate like usual over the returned hash bucket.
++
++4. Security
++
++SipHash has a very high security margin, with its 128-bit key. So long as the
++key is kept secret, it is impossible for an attacker to guess the outputs of
++the function, even if being able to observe many outputs, since 2^128 outputs
++is significant.
++
++Linux implements the "2-4" variant of SipHash.
++
++5. Struct-passing Pitfalls
++
++Often times the XuY functions will not be large enough, and instead you'll
++want to pass a pre-filled struct to siphash. When doing this, it's important
++to always ensure the struct has no padding holes. The easiest way to do this
++is to simply arrange the members of the struct in descending order of size,
++and to use offsetendof() instead of sizeof() for getting the size. For
++performance reasons, if possible, it's probably a good thing to align the
++struct to the right boundary. Here's an example:
++
++const struct {
++      struct in6_addr saddr;
++      u32 counter;
++      u16 dport;
++} __aligned(SIPHASH_ALIGNMENT) combined = {
++      .saddr = *(struct in6_addr *)saddr,
++      .counter = counter,
++      .dport = dport
++};
++u64 h = siphash(&combined, offsetofend(typeof(combined), dport), &secret);
++
++6. Resources
++
++Read the SipHash paper if you're interested in learning more:
++https://131002.net/siphash/siphash.pdf
+diff --git a/MAINTAINERS b/MAINTAINERS
+index f4d4a5544dc10..20a31b3579299 100644
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -9749,6 +9749,13 @@ F:      arch/arm/mach-s3c24xx/mach-bast.c
+ F:    arch/arm/mach-s3c24xx/bast-ide.c
+ F:    arch/arm/mach-s3c24xx/bast-irq.c
+ 
++SIPHASH PRF ROUTINES
++M:    Jason A. Donenfeld <Jason@zx2c4.com>
++S:    Maintained
++F:    lib/siphash.c
++F:    lib/test_siphash.c
++F:    include/linux/siphash.h
++
+ TI DAVINCI MACHINE SUPPORT
+ M:    Sekhar Nori <nsekhar@ti.com>
+ M:    Kevin Hilman <khilman@deeprootsystems.com>
+diff --git a/include/linux/siphash.h b/include/linux/siphash.h
+new file mode 100644
+index 0000000000000..feeb29cd113ed
+--- /dev/null
++++ b/include/linux/siphash.h
+@@ -0,0 +1,85 @@
++/* Copyright (C) 2016 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ *
++ * This file is provided under a dual BSD/GPLv2 license.
++ *
++ * SipHash: a fast short-input PRF
++ * https://131002.net/siphash/
++ *
++ * This implementation is specifically for SipHash2-4.
++ */
++
++#ifndef _LINUX_SIPHASH_H
++#define _LINUX_SIPHASH_H
++
++#include <linux/types.h>
++#include <linux/kernel.h>
++
++#define SIPHASH_ALIGNMENT __alignof__(u64)
++typedef struct {
++      u64 key[2];
++} siphash_key_t;
++
++u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key);
++#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
++u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key);
++#endif
++
++u64 siphash_1u64(const u64 a, const siphash_key_t *key);
++u64 siphash_2u64(const u64 a, const u64 b, const siphash_key_t *key);
++u64 siphash_3u64(const u64 a, const u64 b, const u64 c,
++               const siphash_key_t *key);
++u64 siphash_4u64(const u64 a, const u64 b, const u64 c, const u64 d,
++               const siphash_key_t *key);
++u64 siphash_1u32(const u32 a, const siphash_key_t *key);
++u64 siphash_3u32(const u32 a, const u32 b, const u32 c,
++               const siphash_key_t *key);
++
++static inline u64 siphash_2u32(const u32 a, const u32 b,
++                             const siphash_key_t *key)
++{
++      return siphash_1u64((u64)b << 32 | a, key);
++}
++static inline u64 siphash_4u32(const u32 a, const u32 b, const u32 c,
++                             const u32 d, const siphash_key_t *key)
++{
++      return siphash_2u64((u64)b << 32 | a, (u64)d << 32 | c, key);
++}
++
++
++static inline u64 ___siphash_aligned(const __le64 *data, size_t len,
++                                   const siphash_key_t *key)
++{
++      if (__builtin_constant_p(len) && len == 4)
++              return siphash_1u32(le32_to_cpup((const __le32 *)data), key);
++      if (__builtin_constant_p(len) && len == 8)
++              return siphash_1u64(le64_to_cpu(data[0]), key);
++      if (__builtin_constant_p(len) && len == 16)
++              return siphash_2u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]),
++                                  key);
++      if (__builtin_constant_p(len) && len == 24)
++              return siphash_3u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]),
++                                  le64_to_cpu(data[2]), key);
++      if (__builtin_constant_p(len) && len == 32)
++              return siphash_4u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]),
++                                  le64_to_cpu(data[2]), le64_to_cpu(data[3]),
++                                  key);
++      return __siphash_aligned(data, len, key);
++}
++
++/**
++ * siphash - compute 64-bit siphash PRF value
++ * @data: buffer to hash
++ * @size: size of @data
++ * @key: the siphash key
++ */
++static inline u64 siphash(const void *data, size_t len,
++                        const siphash_key_t *key)
++{
++#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
++      if (!IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT))
++              return __siphash_unaligned(data, len, key);
++#endif
++      return ___siphash_aligned(data, len, key);
++}
++
++#endif /* _LINUX_SIPHASH_H */
+diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
+index f0602beeba26d..fd1205a3dbdbc 100644
+--- a/lib/Kconfig.debug
++++ b/lib/Kconfig.debug
+@@ -1706,6 +1706,16 @@ config TEST_RHASHTABLE
+ 
+         If unsure, say N.
+ 
++config TEST_HASH
++      tristate "Perform selftest on hash functions"
++      default n
++      help
++        Enable this option to test the kernel's siphash (<linux/siphash.h>)
++        hash functions on boot (or module load).
++
++        This is intended to help people writing architecture-specific
++        optimized versions.  If unsure, say N.
++
+ endmenu # runtime tests
+ 
+ config PROVIDE_OHCI1394_DMA_INIT
+diff --git a/lib/Makefile b/lib/Makefile
+index cb4f6aa95013e..6c6c1fb2fa04e 100644
+--- a/lib/Makefile
++++ b/lib/Makefile
+@@ -13,7 +13,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
+        sha1.o md5.o irq_regs.o argv_split.o \
+        proportions.o flex_proportions.o ratelimit.o show_mem.o \
+        is_single_threaded.o plist.o decompress.o kobject_uevent.o \
+-       earlycpio.o seq_buf.o nmi_backtrace.o
++       earlycpio.o seq_buf.o siphash.o nmi_backtrace.o
+ 
+ obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o
+ lib-$(CONFIG_MMU) += ioremap.o
+@@ -35,6 +35,7 @@ obj-$(CONFIG_TEST_HEXDUMP) += test-hexdump.o
+ obj-y += kstrtox.o
+ obj-$(CONFIG_TEST_BPF) += test_bpf.o
+ obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
++obj-$(CONFIG_TEST_HASH) += test_siphash.o
+ obj-$(CONFIG_TEST_KASAN) += test_kasan.o
+ obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
+ obj-$(CONFIG_TEST_LKM) += test_module.o
+diff --git a/lib/siphash.c b/lib/siphash.c
+new file mode 100644
+index 0000000000000..c43cf406e71b2
+--- /dev/null
++++ b/lib/siphash.c
+@@ -0,0 +1,232 @@
++/* Copyright (C) 2016 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ *
++ * This file is provided under a dual BSD/GPLv2 license.
++ *
++ * SipHash: a fast short-input PRF
++ * https://131002.net/siphash/
++ *
++ * This implementation is specifically for SipHash2-4.
++ */
++
++#include <linux/siphash.h>
++#include <asm/unaligned.h>
++
++#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
++#include <linux/dcache.h>
++#include <asm/word-at-a-time.h>
++#endif
++
++#define SIPROUND \
++      do { \
++      v0 += v1; v1 = rol64(v1, 13); v1 ^= v0; v0 = rol64(v0, 32); \
++      v2 += v3; v3 = rol64(v3, 16); v3 ^= v2; \
++      v0 += v3; v3 = rol64(v3, 21); v3 ^= v0; \
++      v2 += v1; v1 = rol64(v1, 17); v1 ^= v2; v2 = rol64(v2, 32); \
++      } while (0)
++
++#define PREAMBLE(len) \
++      u64 v0 = 0x736f6d6570736575ULL; \
++      u64 v1 = 0x646f72616e646f6dULL; \
++      u64 v2 = 0x6c7967656e657261ULL; \
++      u64 v3 = 0x7465646279746573ULL; \
++      u64 b = ((u64)(len)) << 56; \
++      v3 ^= key->key[1]; \
++      v2 ^= key->key[0]; \
++      v1 ^= key->key[1]; \
++      v0 ^= key->key[0];
++
++#define POSTAMBLE \
++      v3 ^= b; \
++      SIPROUND; \
++      SIPROUND; \
++      v0 ^= b; \
++      v2 ^= 0xff; \
++      SIPROUND; \
++      SIPROUND; \
++      SIPROUND; \
++      SIPROUND; \
++      return (v0 ^ v1) ^ (v2 ^ v3);
++
++u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key)
++{
++      const u8 *end = data + len - (len % sizeof(u64));
++      const u8 left = len & (sizeof(u64) - 1);
++      u64 m;
++      PREAMBLE(len)
++      for (; data != end; data += sizeof(u64)) {
++              m = le64_to_cpup(data);
++              v3 ^= m;
++              SIPROUND;
++              SIPROUND;
++              v0 ^= m;
++      }
++#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
++      if (left)
++              b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
++                                                bytemask_from_count(left)));
++#else
++      switch (left) {
++      case 7: b |= ((u64)end[6]) << 48;
++      case 6: b |= ((u64)end[5]) << 40;
++      case 5: b |= ((u64)end[4]) << 32;
++      case 4: b |= le32_to_cpup(data); break;
++      case 3: b |= ((u64)end[2]) << 16;
++      case 2: b |= le16_to_cpup(data); break;
++      case 1: b |= end[0];
++      }
++#endif
++      POSTAMBLE
++}
++EXPORT_SYMBOL(__siphash_aligned);
++
++#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
++u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key)
++{
++      const u8 *end = data + len - (len % sizeof(u64));
++      const u8 left = len & (sizeof(u64) - 1);
++      u64 m;
++      PREAMBLE(len)
++      for (; data != end; data += sizeof(u64)) {
++              m = get_unaligned_le64(data);
++              v3 ^= m;
++              SIPROUND;
++              SIPROUND;
++              v0 ^= m;
++      }
++#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
++      if (left)
++              b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
++                                                bytemask_from_count(left)));
++#else
++      switch (left) {
++      case 7: b |= ((u64)end[6]) << 48;
++      case 6: b |= ((u64)end[5]) << 40;
++      case 5: b |= ((u64)end[4]) << 32;
++      case 4: b |= get_unaligned_le32(end); break;
++      case 3: b |= ((u64)end[2]) << 16;
++      case 2: b |= get_unaligned_le16(end); break;
++      case 1: b |= end[0];
++      }
++#endif
++      POSTAMBLE
++}
++EXPORT_SYMBOL(__siphash_unaligned);
++#endif
++
++/**
++ * siphash_1u64 - compute 64-bit siphash PRF value of a u64
++ * @first: first u64
++ * @key: the siphash key
++ */
++u64 siphash_1u64(const u64 first, const siphash_key_t *key)
++{
++      PREAMBLE(8)
++      v3 ^= first;
++      SIPROUND;
++      SIPROUND;
++      v0 ^= first;
++      POSTAMBLE
++}
++EXPORT_SYMBOL(siphash_1u64);
++
++/**
++ * siphash_2u64 - compute 64-bit siphash PRF value of 2 u64
++ * @first: first u64
++ * @second: second u64
++ * @key: the siphash key
++ */
++u64 siphash_2u64(const u64 first, const u64 second, const siphash_key_t *key)
++{
++      PREAMBLE(16)
++      v3 ^= first;
++      SIPROUND;
++      SIPROUND;
++      v0 ^= first;
++      v3 ^= second;
++      SIPROUND;
++      SIPROUND;
++      v0 ^= second;
++      POSTAMBLE
++}
++EXPORT_SYMBOL(siphash_2u64);
++
++/**
++ * siphash_3u64 - compute 64-bit siphash PRF value of 3 u64
++ * @first: first u64
++ * @second: second u64
++ * @third: third u64
++ * @key: the siphash key
++ */
++u64 siphash_3u64(const u64 first, const u64 second, const u64 third,
++               const siphash_key_t *key)
++{
++      PREAMBLE(24)
++      v3 ^= first;
++      SIPROUND;
++      SIPROUND;
++      v0 ^= first;
++      v3 ^= second;
++      SIPROUND;
++      SIPROUND;
++      v0 ^= second;
++      v3 ^= third;
++      SIPROUND;
++      SIPROUND;
++      v0 ^= third;
++      POSTAMBLE
++}
++EXPORT_SYMBOL(siphash_3u64);
++
++/**
++ * siphash_4u64 - compute 64-bit siphash PRF value of 4 u64
++ * @first: first u64
++ * @second: second u64
++ * @third: third u64
++ * @forth: forth u64
++ * @key: the siphash key
++ */
++u64 siphash_4u64(const u64 first, const u64 second, const u64 third,
++               const u64 forth, const siphash_key_t *key)
++{
++      PREAMBLE(32)
++      v3 ^= first;
++      SIPROUND;
++      SIPROUND;
++      v0 ^= first;
++      v3 ^= second;
++      SIPROUND;
++      SIPROUND;
++      v0 ^= second;
++      v3 ^= third;
++      SIPROUND;
++      SIPROUND;
++      v0 ^= third;
++      v3 ^= forth;
++      SIPROUND;
++      SIPROUND;
++      v0 ^= forth;
++      POSTAMBLE
++}
++EXPORT_SYMBOL(siphash_4u64);
++
++u64 siphash_1u32(const u32 first, const siphash_key_t *key)
++{
++      PREAMBLE(4)
++      b |= first;
++      POSTAMBLE
++}
++EXPORT_SYMBOL(siphash_1u32);
++
++u64 siphash_3u32(const u32 first, const u32 second, const u32 third,
++               const siphash_key_t *key)
++{
++      u64 combined = (u64)second << 32 | first;
++      PREAMBLE(12)
++      v3 ^= combined;
++      SIPROUND;
++      SIPROUND;
++      v0 ^= combined;
++      b |= third;
++      POSTAMBLE
++}
++EXPORT_SYMBOL(siphash_3u32);
+diff --git a/lib/test_siphash.c b/lib/test_siphash.c
+new file mode 100644
+index 0000000000000..d972acfc15e4a
+--- /dev/null
++++ b/lib/test_siphash.c
+@@ -0,0 +1,131 @@
++/* Test cases for siphash.c
++ *
++ * Copyright (C) 2016 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ *
++ * This file is provided under a dual BSD/GPLv2 license.
++ *
++ * SipHash: a fast short-input PRF
++ * https://131002.net/siphash/
++ *
++ * This implementation is specifically for SipHash2-4.
++ */
++
++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
++
++#include <linux/siphash.h>
++#include <linux/kernel.h>
++#include <linux/string.h>
++#include <linux/errno.h>
++#include <linux/module.h>
++
++/* Test vectors taken from official reference source available at:
++ *     https://131002.net/siphash/siphash24.c
++ */
++
++static const siphash_key_t test_key_siphash =
++      {{ 0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL }};
++
++static const u64 test_vectors_siphash[64] = {
++      0x726fdb47dd0e0e31ULL, 0x74f839c593dc67fdULL, 0x0d6c8009d9a94f5aULL,
++      0x85676696d7fb7e2dULL, 0xcf2794e0277187b7ULL, 0x18765564cd99a68dULL,
++      0xcbc9466e58fee3ceULL, 0xab0200f58b01d137ULL, 0x93f5f5799a932462ULL,
++      0x9e0082df0ba9e4b0ULL, 0x7a5dbbc594ddb9f3ULL, 0xf4b32f46226bada7ULL,
++      0x751e8fbc860ee5fbULL, 0x14ea5627c0843d90ULL, 0xf723ca908e7af2eeULL,
++      0xa129ca6149be45e5ULL, 0x3f2acc7f57c29bdbULL, 0x699ae9f52cbe4794ULL,
++      0x4bc1b3f0968dd39cULL, 0xbb6dc91da77961bdULL, 0xbed65cf21aa2ee98ULL,
++      0xd0f2cbb02e3b67c7ULL, 0x93536795e3a33e88ULL, 0xa80c038ccd5ccec8ULL,
++      0xb8ad50c6f649af94ULL, 0xbce192de8a85b8eaULL, 0x17d835b85bbb15f3ULL,
++      0x2f2e6163076bcfadULL, 0xde4daaaca71dc9a5ULL, 0xa6a2506687956571ULL,
++      0xad87a3535c49ef28ULL, 0x32d892fad841c342ULL, 0x7127512f72f27cceULL,
++      0xa7f32346f95978e3ULL, 0x12e0b01abb051238ULL, 0x15e034d40fa197aeULL,
++      0x314dffbe0815a3b4ULL, 0x027990f029623981ULL, 0xcadcd4e59ef40c4dULL,
++      0x9abfd8766a33735cULL, 0x0e3ea96b5304a7d0ULL, 0xad0c42d6fc585992ULL,
++      0x187306c89bc215a9ULL, 0xd4a60abcf3792b95ULL, 0xf935451de4f21df2ULL,
++      0xa9538f0419755787ULL, 0xdb9acddff56ca510ULL, 0xd06c98cd5c0975ebULL,
++      0xe612a3cb9ecba951ULL, 0xc766e62cfcadaf96ULL, 0xee64435a9752fe72ULL,
++      0xa192d576b245165aULL, 0x0a8787bf8ecb74b2ULL, 0x81b3e73d20b49b6fULL,
++      0x7fa8220ba3b2eceaULL, 0x245731c13ca42499ULL, 0xb78dbfaf3a8d83bdULL,
++      0xea1ad565322a1a0bULL, 0x60e61c23a3795013ULL, 0x6606d7e446282b93ULL,
++      0x6ca4ecb15c5f91e1ULL, 0x9f626da15c9625f3ULL, 0xe51b38608ef25f57ULL,
++      0x958a324ceb064572ULL
++};
++
++static int __init siphash_test_init(void)
++{
++      u8 in[64] __aligned(SIPHASH_ALIGNMENT);
++      u8 in_unaligned[65] __aligned(SIPHASH_ALIGNMENT);
++      u8 i;
++      int ret = 0;
++
++      for (i = 0; i < 64; ++i) {
++              in[i] = i;
++              in_unaligned[i + 1] = i;
++              if (siphash(in, i, &test_key_siphash) !=
++                                              test_vectors_siphash[i]) {
++                      pr_info("siphash self-test aligned %u: FAIL\n", i + 1);
++                      ret = -EINVAL;
++              }
++              if (siphash(in_unaligned + 1, i, &test_key_siphash) !=
++                                              test_vectors_siphash[i]) {
++                      pr_info("siphash self-test unaligned %u: FAIL\n", i + 1);
++                      ret = -EINVAL;
++              }
++      }
++      if (siphash_1u64(0x0706050403020100ULL, &test_key_siphash) !=
++                                              test_vectors_siphash[8]) {
++              pr_info("siphash self-test 1u64: FAIL\n");
++              ret = -EINVAL;
++      }
++      if (siphash_2u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL,
++                       &test_key_siphash) != test_vectors_siphash[16]) {
++              pr_info("siphash self-test 2u64: FAIL\n");
++              ret = -EINVAL;
++      }
++      if (siphash_3u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL,
++                       0x1716151413121110ULL, &test_key_siphash) !=
++                                              test_vectors_siphash[24]) {
++              pr_info("siphash self-test 3u64: FAIL\n");
++              ret = -EINVAL;
++      }
++      if (siphash_4u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL,
++                       0x1716151413121110ULL, 0x1f1e1d1c1b1a1918ULL,
++                       &test_key_siphash) != test_vectors_siphash[32]) {
++              pr_info("siphash self-test 4u64: FAIL\n");
++              ret = -EINVAL;
++      }
++      if (siphash_1u32(0x03020100U, &test_key_siphash) !=
++                                              test_vectors_siphash[4]) {
++              pr_info("siphash self-test 1u32: FAIL\n");
++              ret = -EINVAL;
++      }
++      if (siphash_2u32(0x03020100U, 0x07060504U, &test_key_siphash) !=
++                                              test_vectors_siphash[8]) {
++              pr_info("siphash self-test 2u32: FAIL\n");
++              ret = -EINVAL;
++      }
++      if (siphash_3u32(0x03020100U, 0x07060504U,
++                       0x0b0a0908U, &test_key_siphash) !=
++                                              test_vectors_siphash[12]) {
++              pr_info("siphash self-test 3u32: FAIL\n");
++              ret = -EINVAL;
++      }
++      if (siphash_4u32(0x03020100U, 0x07060504U,
++                       0x0b0a0908U, 0x0f0e0d0cU, &test_key_siphash) !=
++                                              test_vectors_siphash[16]) {
++              pr_info("siphash self-test 4u32: FAIL\n");
++              ret = -EINVAL;
++      }
++      if (!ret)
++              pr_info("self-tests: pass\n");
++      return ret;
++}
++
++static void __exit siphash_test_exit(void)
++{
++}
++
++module_init(siphash_test_init);
++module_exit(siphash_test_exit);
++
++MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
++MODULE_LICENSE("Dual BSD/GPL");
+-- 
+2.20.1
+
diff --git a/queue-4.4/siphash-implement-halfsiphash1-3-for-hash-tables.patch b/queue-4.4/siphash-implement-halfsiphash1-3-for-hash-tables.patch

new file mode 100644 (file)

index 0000000..dcf436c
--- /dev/null
+++ b/queue-4.4/siphash-implement-halfsiphash1-3-for-hash-tables.patch
@@ -0,0 +1,682 @@
+From 8a3e693bdec18be46a9801540e36612d81d29883 Mon Sep 17 00:00:00 2001
+From: "Jason A. Donenfeld" <Jason@zx2c4.com>
+Date: Wed, 28 Aug 2019 00:11:00 +0100
+Subject: siphash: implement HalfSipHash1-3 for hash tables
+
+commit 1ae2324f732c9c4e2fa4ebd885fa1001b70d52e1 upstream.
+
+HalfSipHash, or hsiphash, is a shortened version of SipHash, which
+generates 32-bit outputs using a weaker 64-bit key. It has *much* lower
+security margins, and shouldn't be used for anything too sensitive, but
+it could be used as a hashtable key function replacement, if the output
+is never exposed, and if the security requirement is not too high.
+
+The goal is to make this something that performance-critical jhash users
+would be willing to use.
+
+On 64-bit machines, HalfSipHash1-3 is slower than SipHash1-3, so we alias
+SipHash1-3 to HalfSipHash1-3 on those systems.
+
+64-bit x86_64:
+[    0.509409] test_siphash:     SipHash2-4 cycles: 4049181
+[    0.510650] test_siphash:     SipHash1-3 cycles: 2512884
+[    0.512205] test_siphash: HalfSipHash1-3 cycles: 3429920
+[    0.512904] test_siphash:    JenkinsHash cycles:  978267
+So, we map hsiphash() -> SipHash1-3
+
+32-bit x86:
+[    0.509868] test_siphash:     SipHash2-4 cycles: 14812892
+[    0.513601] test_siphash:     SipHash1-3 cycles:  9510710
+[    0.515263] test_siphash: HalfSipHash1-3 cycles:  3856157
+[    0.515952] test_siphash:    JenkinsHash cycles:  1148567
+So, we map hsiphash() -> HalfSipHash1-3
+
+hsiphash() is roughly 3 times slower than jhash(), but comes with a
+considerable security improvement.
+
+Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Reviewed-by: Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+[bwh: Backported to 4.4 to avoid regression for WireGuard with only half
+ the siphash API present]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/siphash.txt |  75 +++++++++
+ include/linux/siphash.h   |  57 ++++++-
+ lib/siphash.c             | 321 +++++++++++++++++++++++++++++++++++++-
+ lib/test_siphash.c        |  98 +++++++++++-
+ 4 files changed, 546 insertions(+), 5 deletions(-)
+
+diff --git a/Documentation/siphash.txt b/Documentation/siphash.txt
+index e8e6ddbbaab47..908d348ff7776 100644
+--- a/Documentation/siphash.txt
++++ b/Documentation/siphash.txt
+@@ -98,3 +98,78 @@ u64 h = siphash(&combined, offsetofend(typeof(combined), dport), &secret);
+ 
+ Read the SipHash paper if you're interested in learning more:
+ https://131002.net/siphash/siphash.pdf
++
++
++~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~
++
++HalfSipHash - SipHash's insecure younger cousin
++-----------------------------------------------
++Written by Jason A. Donenfeld <jason@zx2c4.com>
++
++On the off-chance that SipHash is not fast enough for your needs, you might be
++able to justify using HalfSipHash, a terrifying but potentially useful
++possibility. HalfSipHash cuts SipHash's rounds down from "2-4" to "1-3" and,
++even scarier, uses an easily brute-forcable 64-bit key (with a 32-bit output)
++instead of SipHash's 128-bit key. However, this may appeal to some
++high-performance `jhash` users.
++
++Danger!
++
++Do not ever use HalfSipHash except for as a hashtable key function, and only
++then when you can be absolutely certain that the outputs will never be
++transmitted out of the kernel. This is only remotely useful over `jhash` as a
++means of mitigating hashtable flooding denial of service attacks.
++
++1. Generating a key
++
++Keys should always be generated from a cryptographically secure source of
++random numbers, either using get_random_bytes or get_random_once:
++
++hsiphash_key_t key;
++get_random_bytes(&key, sizeof(key));
++
++If you're not deriving your key from here, you're doing it wrong.
++
++2. Using the functions
++
++There are two variants of the function, one that takes a list of integers, and
++one that takes a buffer:
++
++u32 hsiphash(const void *data, size_t len, const hsiphash_key_t *key);
++
++And:
++
++u32 hsiphash_1u32(u32, const hsiphash_key_t *key);
++u32 hsiphash_2u32(u32, u32, const hsiphash_key_t *key);
++u32 hsiphash_3u32(u32, u32, u32, const hsiphash_key_t *key);
++u32 hsiphash_4u32(u32, u32, u32, u32, const hsiphash_key_t *key);
++
++If you pass the generic hsiphash function something of a constant length, it
++will constant fold at compile-time and automatically choose one of the
++optimized functions.
++
++3. Hashtable key function usage:
++
++struct some_hashtable {
++      DECLARE_HASHTABLE(hashtable, 8);
++      hsiphash_key_t key;
++};
++
++void init_hashtable(struct some_hashtable *table)
++{
++      get_random_bytes(&table->key, sizeof(table->key));
++}
++
++static inline hlist_head *some_hashtable_bucket(struct some_hashtable *table, struct interesting_input *input)
++{
++      return &table->hashtable[hsiphash(input, sizeof(*input), &table->key) & (HASH_SIZE(table->hashtable) - 1)];
++}
++
++You may then iterate like usual over the returned hash bucket.
++
++4. Performance
++
++HalfSipHash is roughly 3 times slower than JenkinsHash. For many replacements,
++this will not be a problem, as the hashtable lookup isn't the bottleneck. And
++in general, this is probably a good sacrifice to make for the security and DoS
++resistance of HalfSipHash.
+diff --git a/include/linux/siphash.h b/include/linux/siphash.h
+index feeb29cd113ed..fa7a6b9cedbff 100644
+--- a/include/linux/siphash.h
++++ b/include/linux/siphash.h
+@@ -5,7 +5,9 @@
+  * SipHash: a fast short-input PRF
+  * https://131002.net/siphash/
+  *
+- * This implementation is specifically for SipHash2-4.
++ * This implementation is specifically for SipHash2-4 for a secure PRF
++ * and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for
++ * hashtables.
+  */
+ 
+ #ifndef _LINUX_SIPHASH_H
+@@ -82,4 +84,57 @@ static inline u64 siphash(const void *data, size_t len,
+       return ___siphash_aligned(data, len, key);
+ }
+ 
++#define HSIPHASH_ALIGNMENT __alignof__(unsigned long)
++typedef struct {
++      unsigned long key[2];
++} hsiphash_key_t;
++
++u32 __hsiphash_aligned(const void *data, size_t len,
++                     const hsiphash_key_t *key);
++#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
++u32 __hsiphash_unaligned(const void *data, size_t len,
++                       const hsiphash_key_t *key);
++#endif
++
++u32 hsiphash_1u32(const u32 a, const hsiphash_key_t *key);
++u32 hsiphash_2u32(const u32 a, const u32 b, const hsiphash_key_t *key);
++u32 hsiphash_3u32(const u32 a, const u32 b, const u32 c,
++                const hsiphash_key_t *key);
++u32 hsiphash_4u32(const u32 a, const u32 b, const u32 c, const u32 d,
++                const hsiphash_key_t *key);
++
++static inline u32 ___hsiphash_aligned(const __le32 *data, size_t len,
++                                    const hsiphash_key_t *key)
++{
++      if (__builtin_constant_p(len) && len == 4)
++              return hsiphash_1u32(le32_to_cpu(data[0]), key);
++      if (__builtin_constant_p(len) && len == 8)
++              return hsiphash_2u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]),
++                                   key);
++      if (__builtin_constant_p(len) && len == 12)
++              return hsiphash_3u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]),
++                                   le32_to_cpu(data[2]), key);
++      if (__builtin_constant_p(len) && len == 16)
++              return hsiphash_4u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]),
++                                   le32_to_cpu(data[2]), le32_to_cpu(data[3]),
++                                   key);
++      return __hsiphash_aligned(data, len, key);
++}
++
++/**
++ * hsiphash - compute 32-bit hsiphash PRF value
++ * @data: buffer to hash
++ * @size: size of @data
++ * @key: the hsiphash key
++ */
++static inline u32 hsiphash(const void *data, size_t len,
++                         const hsiphash_key_t *key)
++{
++#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
++      if (!IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT))
++              return __hsiphash_unaligned(data, len, key);
++#endif
++      return ___hsiphash_aligned(data, len, key);
++}
++
+ #endif /* _LINUX_SIPHASH_H */
+diff --git a/lib/siphash.c b/lib/siphash.c
+index c43cf406e71b2..3ae58b4edad61 100644
+--- a/lib/siphash.c
++++ b/lib/siphash.c
+@@ -5,7 +5,9 @@
+  * SipHash: a fast short-input PRF
+  * https://131002.net/siphash/
+  *
+- * This implementation is specifically for SipHash2-4.
++ * This implementation is specifically for SipHash2-4 for a secure PRF
++ * and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for
++ * hashtables.
+  */
+ 
+ #include <linux/siphash.h>
+@@ -230,3 +232,320 @@ u64 siphash_3u32(const u32 first, const u32 second, const u32 third,
+       POSTAMBLE
+ }
+ EXPORT_SYMBOL(siphash_3u32);
++
++#if BITS_PER_LONG == 64
++/* Note that on 64-bit, we make HalfSipHash1-3 actually be SipHash1-3, for
++ * performance reasons. On 32-bit, below, we actually implement HalfSipHash1-3.
++ */
++
++#define HSIPROUND SIPROUND
++#define HPREAMBLE(len) PREAMBLE(len)
++#define HPOSTAMBLE \
++      v3 ^= b; \
++      HSIPROUND; \
++      v0 ^= b; \
++      v2 ^= 0xff; \
++      HSIPROUND; \
++      HSIPROUND; \
++      HSIPROUND; \
++      return (v0 ^ v1) ^ (v2 ^ v3);
++
++u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
++{
++      const u8 *end = data + len - (len % sizeof(u64));
++      const u8 left = len & (sizeof(u64) - 1);
++      u64 m;
++      HPREAMBLE(len)
++      for (; data != end; data += sizeof(u64)) {
++              m = le64_to_cpup(data);
++              v3 ^= m;
++              HSIPROUND;
++              v0 ^= m;
++      }
++#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
++      if (left)
++              b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
++                                                bytemask_from_count(left)));
++#else
++      switch (left) {
++      case 7: b |= ((u64)end[6]) << 48;
++      case 6: b |= ((u64)end[5]) << 40;
++      case 5: b |= ((u64)end[4]) << 32;
++      case 4: b |= le32_to_cpup(data); break;
++      case 3: b |= ((u64)end[2]) << 16;
++      case 2: b |= le16_to_cpup(data); break;
++      case 1: b |= end[0];
++      }
++#endif
++      HPOSTAMBLE
++}
++EXPORT_SYMBOL(__hsiphash_aligned);
++
++#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
++u32 __hsiphash_unaligned(const void *data, size_t len,
++                       const hsiphash_key_t *key)
++{
++      const u8 *end = data + len - (len % sizeof(u64));
++      const u8 left = len & (sizeof(u64) - 1);
++      u64 m;
++      HPREAMBLE(len)
++      for (; data != end; data += sizeof(u64)) {
++              m = get_unaligned_le64(data);
++              v3 ^= m;
++              HSIPROUND;
++              v0 ^= m;
++      }
++#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
++      if (left)
++              b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
++                                                bytemask_from_count(left)));
++#else
++      switch (left) {
++      case 7: b |= ((u64)end[6]) << 48;
++      case 6: b |= ((u64)end[5]) << 40;
++      case 5: b |= ((u64)end[4]) << 32;
++      case 4: b |= get_unaligned_le32(end); break;
++      case 3: b |= ((u64)end[2]) << 16;
++      case 2: b |= get_unaligned_le16(end); break;
++      case 1: b |= end[0];
++      }
++#endif
++      HPOSTAMBLE
++}
++EXPORT_SYMBOL(__hsiphash_unaligned);
++#endif
++
++/**
++ * hsiphash_1u32 - compute 64-bit hsiphash PRF value of a u32
++ * @first: first u32
++ * @key: the hsiphash key
++ */
++u32 hsiphash_1u32(const u32 first, const hsiphash_key_t *key)
++{
++      HPREAMBLE(4)
++      b |= first;
++      HPOSTAMBLE
++}
++EXPORT_SYMBOL(hsiphash_1u32);
++
++/**
++ * hsiphash_2u32 - compute 32-bit hsiphash PRF value of 2 u32
++ * @first: first u32
++ * @second: second u32
++ * @key: the hsiphash key
++ */
++u32 hsiphash_2u32(const u32 first, const u32 second, const hsiphash_key_t *key)
++{
++      u64 combined = (u64)second << 32 | first;
++      HPREAMBLE(8)
++      v3 ^= combined;
++      HSIPROUND;
++      v0 ^= combined;
++      HPOSTAMBLE
++}
++EXPORT_SYMBOL(hsiphash_2u32);
++
++/**
++ * hsiphash_3u32 - compute 32-bit hsiphash PRF value of 3 u32
++ * @first: first u32
++ * @second: second u32
++ * @third: third u32
++ * @key: the hsiphash key
++ */
++u32 hsiphash_3u32(const u32 first, const u32 second, const u32 third,
++                const hsiphash_key_t *key)
++{
++      u64 combined = (u64)second << 32 | first;
++      HPREAMBLE(12)
++      v3 ^= combined;
++      HSIPROUND;
++      v0 ^= combined;
++      b |= third;
++      HPOSTAMBLE
++}
++EXPORT_SYMBOL(hsiphash_3u32);
++
++/**
++ * hsiphash_4u32 - compute 32-bit hsiphash PRF value of 4 u32
++ * @first: first u32
++ * @second: second u32
++ * @third: third u32
++ * @forth: forth u32
++ * @key: the hsiphash key
++ */
++u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third,
++                const u32 forth, const hsiphash_key_t *key)
++{
++      u64 combined = (u64)second << 32 | first;
++      HPREAMBLE(16)
++      v3 ^= combined;
++      HSIPROUND;
++      v0 ^= combined;
++      combined = (u64)forth << 32 | third;
++      v3 ^= combined;
++      HSIPROUND;
++      v0 ^= combined;
++      HPOSTAMBLE
++}
++EXPORT_SYMBOL(hsiphash_4u32);
++#else
++#define HSIPROUND \
++      do { \
++      v0 += v1; v1 = rol32(v1, 5); v1 ^= v0; v0 = rol32(v0, 16); \
++      v2 += v3; v3 = rol32(v3, 8); v3 ^= v2; \
++      v0 += v3; v3 = rol32(v3, 7); v3 ^= v0; \
++      v2 += v1; v1 = rol32(v1, 13); v1 ^= v2; v2 = rol32(v2, 16); \
++      } while (0)
++
++#define HPREAMBLE(len) \
++      u32 v0 = 0; \
++      u32 v1 = 0; \
++      u32 v2 = 0x6c796765U; \
++      u32 v3 = 0x74656462U; \
++      u32 b = ((u32)(len)) << 24; \
++      v3 ^= key->key[1]; \
++      v2 ^= key->key[0]; \
++      v1 ^= key->key[1]; \
++      v0 ^= key->key[0];
++
++#define HPOSTAMBLE \
++      v3 ^= b; \
++      HSIPROUND; \
++      v0 ^= b; \
++      v2 ^= 0xff; \
++      HSIPROUND; \
++      HSIPROUND; \
++      HSIPROUND; \
++      return v1 ^ v3;
++
++u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
++{
++      const u8 *end = data + len - (len % sizeof(u32));
++      const u8 left = len & (sizeof(u32) - 1);
++      u32 m;
++      HPREAMBLE(len)
++      for (; data != end; data += sizeof(u32)) {
++              m = le32_to_cpup(data);
++              v3 ^= m;
++              HSIPROUND;
++              v0 ^= m;
++      }
++      switch (left) {
++      case 3: b |= ((u32)end[2]) << 16;
++      case 2: b |= le16_to_cpup(data); break;
++      case 1: b |= end[0];
++      }
++      HPOSTAMBLE
++}
++EXPORT_SYMBOL(__hsiphash_aligned);
++
++#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
++u32 __hsiphash_unaligned(const void *data, size_t len,
++                       const hsiphash_key_t *key)
++{
++      const u8 *end = data + len - (len % sizeof(u32));
++      const u8 left = len & (sizeof(u32) - 1);
++      u32 m;
++      HPREAMBLE(len)
++      for (; data != end; data += sizeof(u32)) {
++              m = get_unaligned_le32(data);
++              v3 ^= m;
++              HSIPROUND;
++              v0 ^= m;
++      }
++      switch (left) {
++      case 3: b |= ((u32)end[2]) << 16;
++      case 2: b |= get_unaligned_le16(end); break;
++      case 1: b |= end[0];
++      }
++      HPOSTAMBLE
++}
++EXPORT_SYMBOL(__hsiphash_unaligned);
++#endif
++
++/**
++ * hsiphash_1u32 - compute 32-bit hsiphash PRF value of a u32
++ * @first: first u32
++ * @key: the hsiphash key
++ */
++u32 hsiphash_1u32(const u32 first, const hsiphash_key_t *key)
++{
++      HPREAMBLE(4)
++      v3 ^= first;
++      HSIPROUND;
++      v0 ^= first;
++      HPOSTAMBLE
++}
++EXPORT_SYMBOL(hsiphash_1u32);
++
++/**
++ * hsiphash_2u32 - compute 32-bit hsiphash PRF value of 2 u32
++ * @first: first u32
++ * @second: second u32
++ * @key: the hsiphash key
++ */
++u32 hsiphash_2u32(const u32 first, const u32 second, const hsiphash_key_t *key)
++{
++      HPREAMBLE(8)
++      v3 ^= first;
++      HSIPROUND;
++      v0 ^= first;
++      v3 ^= second;
++      HSIPROUND;
++      v0 ^= second;
++      HPOSTAMBLE
++}
++EXPORT_SYMBOL(hsiphash_2u32);
++
++/**
++ * hsiphash_3u32 - compute 32-bit hsiphash PRF value of 3 u32
++ * @first: first u32
++ * @second: second u32
++ * @third: third u32
++ * @key: the hsiphash key
++ */
++u32 hsiphash_3u32(const u32 first, const u32 second, const u32 third,
++                const hsiphash_key_t *key)
++{
++      HPREAMBLE(12)
++      v3 ^= first;
++      HSIPROUND;
++      v0 ^= first;
++      v3 ^= second;
++      HSIPROUND;
++      v0 ^= second;
++      v3 ^= third;
++      HSIPROUND;
++      v0 ^= third;
++      HPOSTAMBLE
++}
++EXPORT_SYMBOL(hsiphash_3u32);
++
++/**
++ * hsiphash_4u32 - compute 32-bit hsiphash PRF value of 4 u32
++ * @first: first u32
++ * @second: second u32
++ * @third: third u32
++ * @forth: forth u32
++ * @key: the hsiphash key
++ */
++u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third,
++                const u32 forth, const hsiphash_key_t *key)
++{
++      HPREAMBLE(16)
++      v3 ^= first;
++      HSIPROUND;
++      v0 ^= first;
++      v3 ^= second;
++      HSIPROUND;
++      v0 ^= second;
++      v3 ^= third;
++      HSIPROUND;
++      v0 ^= third;
++      v3 ^= forth;
++      HSIPROUND;
++      v0 ^= forth;
++      HPOSTAMBLE
++}
++EXPORT_SYMBOL(hsiphash_4u32);
++#endif
+diff --git a/lib/test_siphash.c b/lib/test_siphash.c
+index d972acfc15e4a..a6d854d933bff 100644
+--- a/lib/test_siphash.c
++++ b/lib/test_siphash.c
+@@ -7,7 +7,9 @@
+  * SipHash: a fast short-input PRF
+  * https://131002.net/siphash/
+  *
+- * This implementation is specifically for SipHash2-4.
++ * This implementation is specifically for SipHash2-4 for a secure PRF
++ * and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for
++ * hashtables.
+  */
+ 
+ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+@@ -18,8 +20,8 @@
+ #include <linux/errno.h>
+ #include <linux/module.h>
+ 
+-/* Test vectors taken from official reference source available at:
+- *     https://131002.net/siphash/siphash24.c
++/* Test vectors taken from reference source available at:
++ *     https://github.com/veorq/SipHash
+  */
+ 
+ static const siphash_key_t test_key_siphash =
+@@ -50,6 +52,64 @@ static const u64 test_vectors_siphash[64] = {
+       0x958a324ceb064572ULL
+ };
+ 
++#if BITS_PER_LONG == 64
++static const hsiphash_key_t test_key_hsiphash =
++      {{ 0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL }};
++
++static const u32 test_vectors_hsiphash[64] = {
++      0x050fc4dcU, 0x7d57ca93U, 0x4dc7d44dU,
++      0xe7ddf7fbU, 0x88d38328U, 0x49533b67U,
++      0xc59f22a7U, 0x9bb11140U, 0x8d299a8eU,
++      0x6c063de4U, 0x92ff097fU, 0xf94dc352U,
++      0x57b4d9a2U, 0x1229ffa7U, 0xc0f95d34U,
++      0x2a519956U, 0x7d908b66U, 0x63dbd80cU,
++      0xb473e63eU, 0x8d297d1cU, 0xa6cce040U,
++      0x2b45f844U, 0xa320872eU, 0xdae6c123U,
++      0x67349c8cU, 0x705b0979U, 0xca9913a5U,
++      0x4ade3b35U, 0xef6cd00dU, 0x4ab1e1f4U,
++      0x43c5e663U, 0x8c21d1bcU, 0x16a7b60dU,
++      0x7a8ff9bfU, 0x1f2a753eU, 0xbf186b91U,
++      0xada26206U, 0xa3c33057U, 0xae3a36a1U,
++      0x7b108392U, 0x99e41531U, 0x3f1ad944U,
++      0xc8138825U, 0xc28949a6U, 0xfaf8876bU,
++      0x9f042196U, 0x68b1d623U, 0x8b5114fdU,
++      0xdf074c46U, 0x12cc86b3U, 0x0a52098fU,
++      0x9d292f9aU, 0xa2f41f12U, 0x43a71ed0U,
++      0x73f0bce6U, 0x70a7e980U, 0x243c6d75U,
++      0xfdb71513U, 0xa67d8a08U, 0xb7e8f148U,
++      0xf7a644eeU, 0x0f1837f2U, 0x4b6694e0U,
++      0xb7bbb3a8U
++};
++#else
++static const hsiphash_key_t test_key_hsiphash =
++      {{ 0x03020100U, 0x07060504U }};
++
++static const u32 test_vectors_hsiphash[64] = {
++      0x5814c896U, 0xe7e864caU, 0xbc4b0e30U,
++      0x01539939U, 0x7e059ea6U, 0x88e3d89bU,
++      0xa0080b65U, 0x9d38d9d6U, 0x577999b1U,
++      0xc839caedU, 0xe4fa32cfU, 0x959246eeU,
++      0x6b28096cU, 0x66dd9cd6U, 0x16658a7cU,
++      0xd0257b04U, 0x8b31d501U, 0x2b1cd04bU,
++      0x06712339U, 0x522aca67U, 0x911bb605U,
++      0x90a65f0eU, 0xf826ef7bU, 0x62512debU,
++      0x57150ad7U, 0x5d473507U, 0x1ec47442U,
++      0xab64afd3U, 0x0a4100d0U, 0x6d2ce652U,
++      0x2331b6a3U, 0x08d8791aU, 0xbc6dda8dU,
++      0xe0f6c934U, 0xb0652033U, 0x9b9851ccU,
++      0x7c46fb7fU, 0x732ba8cbU, 0xf142997aU,
++      0xfcc9aa1bU, 0x05327eb2U, 0xe110131cU,
++      0xf9e5e7c0U, 0xa7d708a6U, 0x11795ab1U,
++      0x65671619U, 0x9f5fff91U, 0xd89c5267U,
++      0x007783ebU, 0x95766243U, 0xab639262U,
++      0x9c7e1390U, 0xc368dda6U, 0x38ddc455U,
++      0xfa13d379U, 0x979ea4e8U, 0x53ecd77eU,
++      0x2ee80657U, 0x33dbb66aU, 0xae3f0577U,
++      0x88b4c4ccU, 0x3e7f480bU, 0x74c1ebf8U,
++      0x87178304U
++};
++#endif
++
+ static int __init siphash_test_init(void)
+ {
+       u8 in[64] __aligned(SIPHASH_ALIGNMENT);
+@@ -70,6 +130,16 @@ static int __init siphash_test_init(void)
+                       pr_info("siphash self-test unaligned %u: FAIL\n", i + 1);
+                       ret = -EINVAL;
+               }
++              if (hsiphash(in, i, &test_key_hsiphash) !=
++                                              test_vectors_hsiphash[i]) {
++                      pr_info("hsiphash self-test aligned %u: FAIL\n", i + 1);
++                      ret = -EINVAL;
++              }
++              if (hsiphash(in_unaligned + 1, i, &test_key_hsiphash) !=
++                                              test_vectors_hsiphash[i]) {
++                      pr_info("hsiphash self-test unaligned %u: FAIL\n", i + 1);
++                      ret = -EINVAL;
++              }
+       }
+       if (siphash_1u64(0x0706050403020100ULL, &test_key_siphash) !=
+                                               test_vectors_siphash[8]) {
+@@ -115,6 +185,28 @@ static int __init siphash_test_init(void)
+               pr_info("siphash self-test 4u32: FAIL\n");
+               ret = -EINVAL;
+       }
++      if (hsiphash_1u32(0x03020100U, &test_key_hsiphash) !=
++                                              test_vectors_hsiphash[4]) {
++              pr_info("hsiphash self-test 1u32: FAIL\n");
++              ret = -EINVAL;
++      }
++      if (hsiphash_2u32(0x03020100U, 0x07060504U, &test_key_hsiphash) !=
++                                              test_vectors_hsiphash[8]) {
++              pr_info("hsiphash self-test 2u32: FAIL\n");
++              ret = -EINVAL;
++      }
++      if (hsiphash_3u32(0x03020100U, 0x07060504U,
++                        0x0b0a0908U, &test_key_hsiphash) !=
++                                              test_vectors_hsiphash[12]) {
++              pr_info("hsiphash self-test 3u32: FAIL\n");
++              ret = -EINVAL;
++      }
++      if (hsiphash_4u32(0x03020100U, 0x07060504U,
++                        0x0b0a0908U, 0x0f0e0d0cU, &test_key_hsiphash) !=
++                                              test_vectors_hsiphash[16]) {
++              pr_info("hsiphash self-test 4u32: FAIL\n");
++              ret = -EINVAL;
++      }
+       if (!ret)
+               pr_info("self-tests: pass\n");
+       return ret;
+-- 
+2.20.1
+
diff --git a/queue-4.4/vhost-introduce-vhost_exceeds_weight.patch b/queue-4.4/vhost-introduce-vhost_exceeds_weight.patch

new file mode 100644 (file)

index 0000000..7f4fea3
--- /dev/null
+++ b/queue-4.4/vhost-introduce-vhost_exceeds_weight.patch
@@ -0,0 +1,173 @@
+From c032e2c764db7e44a95cd4fd51634efdce82b547 Mon Sep 17 00:00:00 2001
+From: Jason Wang <jasowang@redhat.com>
+Date: Wed, 28 Aug 2019 00:10:37 +0100
+Subject: vhost: introduce vhost_exceeds_weight()
+
+commit e82b9b0727ff6d665fff2d326162b460dded554d upstream.
+
+We used to have vhost_exceeds_weight() for vhost-net to:
+
+- prevent vhost kthread from hogging the cpu
+- balance the time spent between TX and RX
+
+This function could be useful for vsock and scsi as well. So move it
+to vhost.c. Device must specify a weight which counts the number of
+requests, or it can also specific a byte_weight which counts the
+number of bytes that has been processed.
+
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+[bwh: Backported to 4.4:
+ - Drop changes to vhost_vsock
+ - In vhost_net, both Tx modes are handled in one loop in handle_tx()
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/vhost/net.c   | 18 +++++-------------
+ drivers/vhost/scsi.c  |  9 ++++++++-
+ drivers/vhost/vhost.c | 20 +++++++++++++++++++-
+ drivers/vhost/vhost.h |  6 +++++-
+ 4 files changed, 37 insertions(+), 16 deletions(-)
+
+diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
+index 38c3120f92be4..20062531f1eaa 100644
+--- a/drivers/vhost/net.c
++++ b/drivers/vhost/net.c
+@@ -293,12 +293,6 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
+       rcu_read_unlock_bh();
+ }
+ 
+-static bool vhost_exceeds_weight(int pkts, int total_len)
+-{
+-      return total_len >= VHOST_NET_WEIGHT ||
+-             pkts >= VHOST_NET_PKT_WEIGHT;
+-}
+-
+ /* Expects to be always run from workqueue - which acts as
+  * read-size critical section for our kind of RCU. */
+ static void handle_tx(struct vhost_net *net)
+@@ -421,10 +415,9 @@ static void handle_tx(struct vhost_net *net)
+                       vhost_zerocopy_signal_used(net, vq);
+               total_len += len;
+               vhost_net_tx_packet(net);
+-              if (unlikely(vhost_exceeds_weight(++sent_pkts, total_len))) {
+-                      vhost_poll_queue(&vq->poll);
++              if (unlikely(vhost_exceeds_weight(vq, ++sent_pkts,
++                                                total_len)))
+                       break;
+-              }
+       }
+ out:
+       mutex_unlock(&vq->mutex);
+@@ -645,10 +638,8 @@ static void handle_rx(struct vhost_net *net)
+               if (unlikely(vq_log))
+                       vhost_log_write(vq, vq_log, log, vhost_len);
+               total_len += vhost_len;
+-              if (unlikely(vhost_exceeds_weight(++recv_pkts, total_len))) {
+-                      vhost_poll_queue(&vq->poll);
++              if (unlikely(vhost_exceeds_weight(vq, ++recv_pkts, total_len)))
+                       break;
+-              }
+       }
+ out:
+       mutex_unlock(&vq->mutex);
+@@ -718,7 +709,8 @@ static int vhost_net_open(struct inode *inode, struct file *f)
+               n->vqs[i].vhost_hlen = 0;
+               n->vqs[i].sock_hlen = 0;
+       }
+-      vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX);
++      vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX,
++                     VHOST_NET_WEIGHT, VHOST_NET_PKT_WEIGHT);
+ 
+       vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev);
+       vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev);
+diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
+index 8fc62a03637a7..47e659eacf17e 100644
+--- a/drivers/vhost/scsi.c
++++ b/drivers/vhost/scsi.c
+@@ -58,6 +58,12 @@
+ #define VHOST_SCSI_PREALLOC_UPAGES 2048
+ #define VHOST_SCSI_PREALLOC_PROT_SGLS 512
+ 
++/* Max number of requests before requeueing the job.
++ * Using this limit prevents one virtqueue from starving others with
++ * request.
++ */
++#define VHOST_SCSI_WEIGHT 256
++
+ struct vhost_scsi_inflight {
+       /* Wait for the flush operation to finish */
+       struct completion comp;
+@@ -1443,7 +1449,8 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
+               vqs[i] = &vs->vqs[i].vq;
+               vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick;
+       }
+-      vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ);
++      vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ,
++                     VHOST_SCSI_WEIGHT, 0);
+ 
+       vhost_scsi_init_inflight(vs, NULL);
+ 
+diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
+index 2ed0a356d1d33..0f653f314876e 100644
+--- a/drivers/vhost/vhost.c
++++ b/drivers/vhost/vhost.c
+@@ -370,8 +370,24 @@ static void vhost_dev_free_iovecs(struct vhost_dev *dev)
+               vhost_vq_free_iovecs(dev->vqs[i]);
+ }
+ 
++bool vhost_exceeds_weight(struct vhost_virtqueue *vq,
++                        int pkts, int total_len)
++{
++      struct vhost_dev *dev = vq->dev;
++
++      if ((dev->byte_weight && total_len >= dev->byte_weight) ||
++          pkts >= dev->weight) {
++              vhost_poll_queue(&vq->poll);
++              return true;
++      }
++
++      return false;
++}
++EXPORT_SYMBOL_GPL(vhost_exceeds_weight);
++
+ void vhost_dev_init(struct vhost_dev *dev,
+-                  struct vhost_virtqueue **vqs, int nvqs)
++                  struct vhost_virtqueue **vqs, int nvqs,
++                  int weight, int byte_weight)
+ {
+       struct vhost_virtqueue *vq;
+       int i;
+@@ -386,6 +402,8 @@ void vhost_dev_init(struct vhost_dev *dev,
+       spin_lock_init(&dev->work_lock);
+       INIT_LIST_HEAD(&dev->work_list);
+       dev->worker = NULL;
++      dev->weight = weight;
++      dev->byte_weight = byte_weight;
+ 
+       for (i = 0; i < dev->nvqs; ++i) {
+               vq = dev->vqs[i];
+diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
+index d3f767448a72c..5ac4869705696 100644
+--- a/drivers/vhost/vhost.h
++++ b/drivers/vhost/vhost.h
+@@ -127,9 +127,13 @@ struct vhost_dev {
+       spinlock_t work_lock;
+       struct list_head work_list;
+       struct task_struct *worker;
++      int weight;
++      int byte_weight;
+ };
+ 
+-void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int nvqs);
++bool vhost_exceeds_weight(struct vhost_virtqueue *vq, int pkts, int total_len);
++void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs,
++                  int nvqs, int weight, int byte_weight);
+ long vhost_dev_set_owner(struct vhost_dev *dev);
+ bool vhost_dev_has_owner(struct vhost_dev *dev);
+ long vhost_dev_check_owner(struct vhost_dev *);
+-- 
+2.20.1
+
diff --git a/queue-4.4/vhost-net-set-packet-weight-of-tx-polling-to-2-vq-si.patch b/queue-4.4/vhost-net-set-packet-weight-of-tx-polling-to-2-vq-si.patch

new file mode 100644 (file)

index 0000000..a2f011b
--- /dev/null
+++ b/queue-4.4/vhost-net-set-packet-weight-of-tx-polling-to-2-vq-si.patch
@@ -0,0 +1,139 @@
+From 8dadc8531753e126ba94c3747cd08f63df48c3a9 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?haibinzhang=28=E5=BC=A0=E6=B5=B7=E6=96=8C=29?=
+ <haibinzhang@tencent.com>
+Date: Wed, 28 Aug 2019 00:10:17 +0100
+Subject: vhost-net: set packet weight of tx polling to 2 * vq size
+
+commit a2ac99905f1ea8b15997a6ec39af69aa28a3653b upstream.
+
+handle_tx will delay rx for tens or even hundreds of milliseconds when tx busy
+polling udp packets with small length(e.g. 1byte udp payload), because setting
+VHOST_NET_WEIGHT takes into account only sent-bytes but no single packet length.
+
+Ping-Latencies shown below were tested between two Virtual Machines using
+netperf (UDP_STREAM, len=1), and then another machine pinged the client:
+
+vq size=256
+Packet-Weight   Ping-Latencies(millisecond)
+                   min      avg       max
+Origin           3.319   18.489    57.303
+64               1.643    2.021     2.552
+128              1.825    2.600     3.224
+256              1.997    2.710     4.295
+512              1.860    3.171     4.631
+1024             2.002    4.173     9.056
+2048             2.257    5.650     9.688
+4096             2.093    8.508    15.943
+
+vq size=512
+Packet-Weight   Ping-Latencies(millisecond)
+                   min      avg       max
+Origin           6.537   29.177    66.245
+64               2.798    3.614     4.403
+128              2.861    3.820     4.775
+256              3.008    4.018     4.807
+512              3.254    4.523     5.824
+1024             3.079    5.335     7.747
+2048             3.944    8.201    12.762
+4096             4.158   11.057    19.985
+
+Seems pretty consistent, a small dip at 2 VQ sizes.
+Ring size is a hint from device about a burst size it can tolerate. Based on
+benchmarks, set the weight to 2 * vq size.
+
+To evaluate this change, another tests were done using netperf(RR, TX) between
+two machines with Intel(R) Xeon(R) Gold 6133 CPU @ 2.50GHz, and vq size was
+tweaked through qemu. Results shown below does not show obvious changes.
+
+vq size=256 TCP_RR                vq size=512 TCP_RR
+size/sessions/+thu%/+normalize%   size/sessions/+thu%/+normalize%
+   1/       1/  -7%/        -2%      1/       1/   0%/        -2%
+   1/       4/  +1%/         0%      1/       4/  +1%/         0%
+   1/       8/  +1%/        -2%      1/       8/   0%/        +1%
+  64/       1/  -6%/         0%     64/       1/  +7%/        +3%
+  64/       4/   0%/        +2%     64/       4/  -1%/        +1%
+  64/       8/   0%/         0%     64/       8/  -1%/        -2%
+ 256/       1/  -3%/        -4%    256/       1/  -4%/        -2%
+ 256/       4/  +3%/        +4%    256/       4/  +1%/        +2%
+ 256/       8/  +2%/         0%    256/       8/  +1%/        -1%
+
+vq size=256 UDP_RR                vq size=512 UDP_RR
+size/sessions/+thu%/+normalize%   size/sessions/+thu%/+normalize%
+   1/       1/  -5%/        +1%      1/       1/  -3%/        -2%
+   1/       4/  +4%/        +1%      1/       4/  -2%/        +2%
+   1/       8/  -1%/        -1%      1/       8/  -1%/         0%
+  64/       1/  -2%/        -3%     64/       1/  +1%/        +1%
+  64/       4/  -5%/        -1%     64/       4/  +2%/         0%
+  64/       8/   0%/        -1%     64/       8/  -2%/        +1%
+ 256/       1/  +7%/        +1%    256/       1/  -7%/         0%
+ 256/       4/  +1%/        +1%    256/       4/  -3%/        -4%
+ 256/       8/  +2%/        +2%    256/       8/  +1%/        +1%
+
+vq size=256 TCP_STREAM            vq size=512 TCP_STREAM
+size/sessions/+thu%/+normalize%   size/sessions/+thu%/+normalize%
+  64/       1/   0%/        -3%     64/       1/   0%/         0%
+  64/       4/  +3%/        -1%     64/       4/  -2%/        +4%
+  64/       8/  +9%/        -4%     64/       8/  -1%/        +2%
+ 256/       1/  +1%/        -4%    256/       1/  +1%/        +1%
+ 256/       4/  -1%/        -1%    256/       4/  -3%/         0%
+ 256/       8/  +7%/        +5%    256/       8/  -3%/         0%
+ 512/       1/  +1%/         0%    512/       1/  -1%/        -1%
+ 512/       4/  +1%/        -1%    512/       4/   0%/         0%
+ 512/       8/  +7%/        -5%    512/       8/  +6%/        -1%
+1024/       1/   0%/        -1%   1024/       1/   0%/        +1%
+1024/       4/  +3%/         0%   1024/       4/  +1%/         0%
+1024/       8/  +8%/        +5%   1024/       8/  -1%/         0%
+2048/       1/  +2%/        +2%   2048/       1/  -1%/         0%
+2048/       4/  +1%/         0%   2048/       4/   0%/        -1%
+2048/       8/  -2%/         0%   2048/       8/   5%/        -1%
+4096/       1/  -2%/         0%   4096/       1/  -2%/         0%
+4096/       4/  +2%/         0%   4096/       4/   0%/         0%
+4096/       8/  +9%/        -2%   4096/       8/  -5%/        -1%
+
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Haibin Zhang <haibinzhang@tencent.com>
+Signed-off-by: Yunfang Tai <yunfangtai@tencent.com>
+Signed-off-by: Lidong Chen <lidongchen@tencent.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/vhost/net.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
+index f463171352245..b8496f713bc62 100644
+--- a/drivers/vhost/net.c
++++ b/drivers/vhost/net.c
+@@ -39,6 +39,10 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"
+  * Using this limit prevents one virtqueue from starving others. */
+ #define VHOST_NET_WEIGHT 0x80000
+ 
++/* Max number of packets transferred before requeueing the job.
++ * Using this limit prevents one virtqueue from starving rx. */
++#define VHOST_NET_PKT_WEIGHT(vq) ((vq)->num * 2)
++
+ /* MAX number of TX used buffers for outstanding zerocopy */
+ #define VHOST_MAX_PEND 128
+ #define VHOST_GOODCOPY_LEN 256
+@@ -308,6 +312,7 @@ static void handle_tx(struct vhost_net *net)
+       struct socket *sock;
+       struct vhost_net_ubuf_ref *uninitialized_var(ubufs);
+       bool zcopy, zcopy_used;
++      int sent_pkts = 0;
+ 
+       mutex_lock(&vq->mutex);
+       sock = vq->private_data;
+@@ -408,7 +413,8 @@ static void handle_tx(struct vhost_net *net)
+                       vhost_zerocopy_signal_used(net, vq);
+               total_len += len;
+               vhost_net_tx_packet(net);
+-              if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
++              if (unlikely(total_len >= VHOST_NET_WEIGHT) ||
++                  unlikely(++sent_pkts >= VHOST_NET_PKT_WEIGHT(vq))) {
+                       vhost_poll_queue(&vq->poll);
+                       break;
+               }
+-- 
+2.20.1
+
diff --git a/queue-4.4/vhost-scsi-add-weight-support.patch b/queue-4.4/vhost-scsi-add-weight-support.patch

new file mode 100644 (file)

index 0000000..c249b4c
--- /dev/null
+++ b/queue-4.4/vhost-scsi-add-weight-support.patch
@@ -0,0 +1,63 @@
+From 2bd3544c306beeb4bbc5b9a487c902d22118e539 Mon Sep 17 00:00:00 2001
+From: Jason Wang <jasowang@redhat.com>
+Date: Wed, 28 Aug 2019 00:10:49 +0100
+Subject: vhost: scsi: add weight support
+
+commit c1ea02f15ab5efb3e93fc3144d895410bf79fcf2 upstream.
+
+This patch will check the weight and exit the loop if we exceeds the
+weight. This is useful for preventing scsi kthread from hogging cpu
+which is guest triggerable.
+
+This addresses CVE-2019-3900.
+
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Stefan Hajnoczi <stefanha@redhat.com>
+Fixes: 057cbf49a1f0 ("tcm_vhost: Initial merge for vhost level target fabric driver")
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
+[bwh: Backported to 4.4:
+ - Drop changes in vhost_scsi_ctl_handle_vq()
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/vhost/scsi.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
+index 47e659eacf17e..269cfdd2958de 100644
+--- a/drivers/vhost/scsi.c
++++ b/drivers/vhost/scsi.c
+@@ -861,7 +861,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
+       u64 tag;
+       u32 exp_data_len, data_direction;
+       unsigned out, in;
+-      int head, ret, prot_bytes;
++      int head, ret, prot_bytes, c = 0;
+       size_t req_size, rsp_size = sizeof(struct virtio_scsi_cmd_resp);
+       size_t out_size, in_size;
+       u16 lun;
+@@ -880,7 +880,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
+ 
+       vhost_disable_notify(&vs->dev, vq);
+ 
+-      for (;;) {
++      do {
+               head = vhost_get_vq_desc(vq, vq->iov,
+                                        ARRAY_SIZE(vq->iov), &out, &in,
+                                        NULL, NULL);
+@@ -1096,7 +1096,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
+                */
+               INIT_WORK(&cmd->work, vhost_scsi_submission_work);
+               queue_work(vhost_scsi_workqueue, &cmd->work);
+-      }
++      } while (likely(!vhost_exceeds_weight(vq, ++c, 0)));
+ out:
+       mutex_unlock(&vq->mutex);
+ }
+-- 
+2.20.1
+
diff --git a/queue-4.4/vhost_net-fix-possible-infinite-loop.patch b/queue-4.4/vhost_net-fix-possible-infinite-loop.patch

new file mode 100644 (file)

index 0000000..665b7df
--- /dev/null
+++ b/queue-4.4/vhost_net-fix-possible-infinite-loop.patch
@@ -0,0 +1,116 @@
+From 9375fcb85e4d62400b974321991ea7be1503094c Mon Sep 17 00:00:00 2001
+From: Jason Wang <jasowang@redhat.com>
+Date: Wed, 28 Aug 2019 00:10:43 +0100
+Subject: vhost_net: fix possible infinite loop
+
+commit e2412c07f8f3040593dfb88207865a3cd58680c0 upstream.
+
+When the rx buffer is too small for a packet, we will discard the vq
+descriptor and retry it for the next packet:
+
+while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk,
+                                             &busyloop_intr))) {
+...
+       /* On overrun, truncate and discard */
+       if (unlikely(headcount > UIO_MAXIOV)) {
+               iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1);
+               err = sock->ops->recvmsg(sock, &msg,
+                                        1, MSG_DONTWAIT | MSG_TRUNC);
+               pr_debug("Discarded rx packet: len %zd\n", sock_len);
+               continue;
+       }
+...
+}
+
+This makes it possible to trigger a infinite while..continue loop
+through the co-opreation of two VMs like:
+
+1) Malicious VM1 allocate 1 byte rx buffer and try to slow down the
+   vhost process as much as possible e.g using indirect descriptors or
+   other.
+2) Malicious VM2 generate packets to VM1 as fast as possible
+
+Fixing this by checking against weight at the end of RX and TX
+loop. This also eliminate other similar cases when:
+
+- userspace is consuming the packets in the meanwhile
+- theoretical TOCTOU attack if guest moving avail index back and forth
+  to hit the continue after vhost find guest just add new buffers
+
+This addresses CVE-2019-3900.
+
+Fixes: d8316f3991d20 ("vhost: fix total length when packets are too short")
+Fixes: 3a4d5c94e9593 ("vhost_net: a kernel-level virtio server")
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+[bwh: Backported to 4.4:
+ - Both Tx modes are handled in one loop in handle_tx()
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/vhost/net.c | 19 +++++++++----------
+ 1 file changed, 9 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
+index 20062531f1eaa..1459dc9fd7010 100644
+--- a/drivers/vhost/net.c
++++ b/drivers/vhost/net.c
+@@ -326,7 +326,7 @@ static void handle_tx(struct vhost_net *net)
+       hdr_size = nvq->vhost_hlen;
+       zcopy = nvq->ubufs;
+ 
+-      for (;;) {
++      do {
+               /* Release DMAs done buffers first */
+               if (zcopy)
+                       vhost_zerocopy_signal_used(net, vq);
+@@ -415,10 +415,7 @@ static void handle_tx(struct vhost_net *net)
+                       vhost_zerocopy_signal_used(net, vq);
+               total_len += len;
+               vhost_net_tx_packet(net);
+-              if (unlikely(vhost_exceeds_weight(vq, ++sent_pkts,
+-                                                total_len)))
+-                      break;
+-      }
++      } while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len)));
+ out:
+       mutex_unlock(&vq->mutex);
+ }
+@@ -560,7 +557,10 @@ static void handle_rx(struct vhost_net *net)
+               vq->log : NULL;
+       mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF);
+ 
+-      while ((sock_len = peek_head_len(sock->sk))) {
++      do {
++              sock_len = peek_head_len(sock->sk);
++              if (!sock_len)
++                      break;
+               sock_len += sock_hlen;
+               vhost_len = sock_len + vhost_hlen;
+               headcount = get_rx_bufs(vq, vq->heads, vhost_len,
+@@ -638,9 +638,8 @@ static void handle_rx(struct vhost_net *net)
+               if (unlikely(vq_log))
+                       vhost_log_write(vq, vq_log, log, vhost_len);
+               total_len += vhost_len;
+-              if (unlikely(vhost_exceeds_weight(vq, ++recv_pkts, total_len)))
+-                      break;
+-      }
++      } while (likely(!vhost_exceeds_weight(vq, ++recv_pkts, total_len)));
++
+ out:
+       mutex_unlock(&vq->mutex);
+ }
+@@ -710,7 +709,7 @@ static int vhost_net_open(struct inode *inode, struct file *f)
+               n->vqs[i].sock_hlen = 0;
+       }
+       vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX,
+-                     VHOST_NET_WEIGHT, VHOST_NET_PKT_WEIGHT);
++                     VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT);
+ 
+       vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev);
+       vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev);
+-- 
+2.20.1
+
diff --git a/queue-4.4/vhost_net-introduce-vhost_exceeds_weight.patch b/queue-4.4/vhost_net-introduce-vhost_exceeds_weight.patch

new file mode 100644 (file)

index 0000000..510f54b
--- /dev/null
+++ b/queue-4.4/vhost_net-introduce-vhost_exceeds_weight.patch
@@ -0,0 +1,56 @@
+From feb7f6818e198f75bc874f861625b7ed4168c945 Mon Sep 17 00:00:00 2001
+From: Jason Wang <jasowang@redhat.com>
+Date: Wed, 28 Aug 2019 00:10:30 +0100
+Subject: vhost_net: introduce vhost_exceeds_weight()
+
+commit 272f35cba53d088085e5952fd81d7a133ab90789 upstream.
+
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+[bwh: Backported to 4.4: adjust context]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/vhost/net.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
+index c1b5bccab293f..38c3120f92be4 100644
+--- a/drivers/vhost/net.c
++++ b/drivers/vhost/net.c
+@@ -293,6 +293,12 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
+       rcu_read_unlock_bh();
+ }
+ 
++static bool vhost_exceeds_weight(int pkts, int total_len)
++{
++      return total_len >= VHOST_NET_WEIGHT ||
++             pkts >= VHOST_NET_PKT_WEIGHT;
++}
++
+ /* Expects to be always run from workqueue - which acts as
+  * read-size critical section for our kind of RCU. */
+ static void handle_tx(struct vhost_net *net)
+@@ -415,8 +421,7 @@ static void handle_tx(struct vhost_net *net)
+                       vhost_zerocopy_signal_used(net, vq);
+               total_len += len;
+               vhost_net_tx_packet(net);
+-              if (unlikely(total_len >= VHOST_NET_WEIGHT) ||
+-                  unlikely(++sent_pkts >= VHOST_NET_PKT_WEIGHT)) {
++              if (unlikely(vhost_exceeds_weight(++sent_pkts, total_len))) {
+                       vhost_poll_queue(&vq->poll);
+                       break;
+               }
+@@ -640,8 +645,7 @@ static void handle_rx(struct vhost_net *net)
+               if (unlikely(vq_log))
+                       vhost_log_write(vq, vq_log, log, vhost_len);
+               total_len += vhost_len;
+-              if (unlikely(total_len >= VHOST_NET_WEIGHT) ||
+-                  unlikely(++recv_pkts >= VHOST_NET_PKT_WEIGHT)) {
++              if (unlikely(vhost_exceeds_weight(++recv_pkts, total_len))) {
+                       vhost_poll_queue(&vq->poll);
+                       break;
+               }
+-- 
+2.20.1
+
diff --git a/queue-4.4/vhost_net-use-packet-weight-for-rx-handler-too.patch b/queue-4.4/vhost_net-use-packet-weight-for-rx-handler-too.patch

new file mode 100644 (file)

index 0000000..0455458
--- /dev/null
+++ b/queue-4.4/vhost_net-use-packet-weight-for-rx-handler-too.patch
@@ -0,0 +1,94 @@
+From 8ddf1038b0ef7e392d6a9e42d4d5dfd708147026 Mon Sep 17 00:00:00 2001
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Wed, 28 Aug 2019 00:10:23 +0100
+Subject: vhost_net: use packet weight for rx handler, too
+
+commit db688c24eada63b1efe6d0d7d835e5c3bdd71fd3 upstream.
+
+Similar to commit a2ac99905f1e ("vhost-net: set packet weight of
+tx polling to 2 * vq size"), we need a packet-based limit for
+handler_rx, too - elsewhere, under rx flood with small packets,
+tx can be delayed for a very long time, even without busypolling.
+
+The pkt limit applied to handle_rx must be the same applied by
+handle_tx, or we will get unfair scheduling between rx and tx.
+Tying such limit to the queue length makes it less effective for
+large queue length values and can introduce large process
+scheduler latencies, so a constant valued is used - likewise
+the existing bytes limit.
+
+The selected limit has been validated with PVP[1] performance
+test with different queue sizes:
+
+queue size             256     512     1024
+
+baseline               366     354     362
+weight 128             715     723     670
+weight 256             740     745     733
+weight 512             600     460     583
+weight 1024            423     427     418
+
+A packet weight of 256 gives peek performances in under all the
+tested scenarios.
+
+No measurable regression in unidirectional performance tests has
+been detected.
+
+[1] https://developers.redhat.com/blog/2017/06/05/measuring-and-comparing-open-vswitch-performance/
+
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/vhost/net.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
+index b8496f713bc62..c1b5bccab293f 100644
+--- a/drivers/vhost/net.c
++++ b/drivers/vhost/net.c
+@@ -40,8 +40,10 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"
+ #define VHOST_NET_WEIGHT 0x80000
+ 
+ /* Max number of packets transferred before requeueing the job.
+- * Using this limit prevents one virtqueue from starving rx. */
+-#define VHOST_NET_PKT_WEIGHT(vq) ((vq)->num * 2)
++ * Using this limit prevents one virtqueue from starving others with small
++ * pkts.
++ */
++#define VHOST_NET_PKT_WEIGHT 256
+ 
+ /* MAX number of TX used buffers for outstanding zerocopy */
+ #define VHOST_MAX_PEND 128
+@@ -414,7 +416,7 @@ static void handle_tx(struct vhost_net *net)
+               total_len += len;
+               vhost_net_tx_packet(net);
+               if (unlikely(total_len >= VHOST_NET_WEIGHT) ||
+-                  unlikely(++sent_pkts >= VHOST_NET_PKT_WEIGHT(vq))) {
++                  unlikely(++sent_pkts >= VHOST_NET_PKT_WEIGHT)) {
+                       vhost_poll_queue(&vq->poll);
+                       break;
+               }
+@@ -545,6 +547,7 @@ static void handle_rx(struct vhost_net *net)
+       struct socket *sock;
+       struct iov_iter fixup;
+       __virtio16 num_buffers;
++      int recv_pkts = 0;
+ 
+       mutex_lock(&vq->mutex);
+       sock = vq->private_data;
+@@ -637,7 +640,8 @@ static void handle_rx(struct vhost_net *net)
+               if (unlikely(vq_log))
+                       vhost_log_write(vq, vq_log, log, vhost_len);
+               total_len += vhost_len;
+-              if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
++              if (unlikely(total_len >= VHOST_NET_WEIGHT) ||
++                  unlikely(++recv_pkts >= VHOST_NET_PKT_WEIGHT)) {
+                       vhost_poll_queue(&vq->poll);
+                       break;
+               }
+-- 
+2.20.1
+
author	Sasha Levin <sashal@kernel.org>
	Wed, 28 Aug 2019 03:02:35 +0000 (23:02 -0400)
committer	Sasha Levin <sashal@kernel.org>
	Wed, 28 Aug 2019 03:02:35 +0000 (23:02 -0400)
queue-4.4/gfs2-don-t-set-rgrp-gl_object-until-it-s-inserted-in.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/inet-switch-ip-id-generator-to-siphash.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/net-arc_emac-fix-koops-caused-by-sk_buff-free.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/netfilter-conntrack-use-consistent-ct-id-hash-calcul.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/netfilter-ctnetlink-don-t-use-conntrack-expect-objec.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/revert-perf-test-6-fix-missing-kvm-module-load-for-s.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/series		patch \| blob \| blame \| history
queue-4.4/siphash-add-cryptographically-secure-prf.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/siphash-implement-halfsiphash1-3-for-hash-tables.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/vhost-introduce-vhost_exceeds_weight.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/vhost-net-set-packet-weight-of-tx-polling-to-2-vq-si.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/vhost-scsi-add-weight-support.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/vhost_net-fix-possible-infinite-loop.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/vhost_net-introduce-vhost_exceeds_weight.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/vhost_net-use-packet-weight-for-rx-handler-too.patch	[new file with mode: 0644]	patch \| blob