3.18 patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 11 Apr 2017 18:45:45 +0000 (20:45 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 11 Apr 2017 18:45:45 +0000 (20:45 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 11 Apr 2017 18:45:45 +0000 (20:45 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 11 Apr 2017 18:45:45 +0000 (20:45 +0200)
diff --git a/queue-3.18/cpmac-remove-hopeless-warning.patch b/queue-3.18/cpmac-remove-hopeless-warning.patch

new file mode 100644 (file)

index 0000000..6aeeea3
--- /dev/null
+++ b/queue-3.18/cpmac-remove-hopeless-warning.patch
@@ -0,0 +1,36 @@
+From d43e6fb4ac4abfe4ef7c102833ed02330ad701e0 Mon Sep 17 00:00:00 2001
+From: Arnd Bergmann <arnd@arndb.de>
+Date: Mon, 16 Jan 2017 14:20:54 +0100
+Subject: cpmac: remove hopeless #warning
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+commit d43e6fb4ac4abfe4ef7c102833ed02330ad701e0 upstream.
+
+The #warning was present 10 years ago when the driver first got merged.
+As the platform is rather obsolete by now, it seems very unlikely that
+the warning will cause anyone to fix the code properly.
+
+kernelci.org reports the warning for every build in the meantime, so
+I think it's better to just turn it into a code comment to reduce
+noise.
+
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/ethernet/ti/cpmac.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/ti/cpmac.c
++++ b/drivers/net/ethernet/ti/cpmac.c
+@@ -1235,7 +1235,7 @@ int cpmac_init(void)
+               goto fail_alloc;
+       }
+ 
+-#warning FIXME: unhardcode gpio&reset bits
++      /* FIXME: unhardcode gpio&reset bits */
+       ar7_gpio_disable(26);
+       ar7_gpio_disable(27);
+       ar7_device_reset(AR7_RESET_BIT_CPMAC_LO);
diff --git a/queue-3.18/dccp-unlock-sock-before-calling-sk_free.patch b/queue-3.18/dccp-unlock-sock-before-calling-sk_free.patch

new file mode 100644 (file)

index 0000000..f18b731
--- /dev/null
+++ b/queue-3.18/dccp-unlock-sock-before-calling-sk_free.patch
@@ -0,0 +1,81 @@
+From d5afb6f9b6bb2c57bd0c05e76e12489dc0d037d9 Mon Sep 17 00:00:00 2001
+From: Arnaldo Carvalho de Melo <acme@redhat.com>
+Date: Wed, 1 Mar 2017 16:35:07 -0300
+Subject: dccp: Unlock sock before calling sk_free()
+
+From: Arnaldo Carvalho de Melo <acme@redhat.com>
+
+commit d5afb6f9b6bb2c57bd0c05e76e12489dc0d037d9 upstream.
+
+The code where sk_clone() came from created a new socket and locked it,
+but then, on the error path didn't unlock it.
+
+This problem stayed there for a long while, till b0691c8ee7c2 ("net:
+Unlock sock before calling sk_free()") fixed it, but unfortunately the
+callers of sk_clone() (now sk_clone_locked()) were not audited and the
+one in dccp_create_openreq_child() remained.
+
+Now in the age of the syskaller fuzzer, this was finally uncovered, as
+reported by Dmitry:
+
+ ---- 8< ----
+
+I've got the following report while running syzkaller fuzzer on
+86292b33d4b7 ("Merge branch 'akpm' (patches from Andrew)")
+
+  [ BUG: held lock freed! ]
+  4.10.0+ #234 Not tainted
+  -------------------------
+  syz-executor6/6898 is freeing memory
+  ffff88006286cac0-ffff88006286d3b7, with a lock still held there!
+   (slock-AF_INET6){+.-...}, at: [<ffffffff8362c2c9>] spin_lock
+  include/linux/spinlock.h:299 [inline]
+   (slock-AF_INET6){+.-...}, at: [<ffffffff8362c2c9>]
+  sk_clone_lock+0x3d9/0x12c0 net/core/sock.c:1504
+  5 locks held by syz-executor6/6898:
+   #0:  (sk_lock-AF_INET6){+.+.+.}, at: [<ffffffff839a34b4>] lock_sock
+  include/net/sock.h:1460 [inline]
+   #0:  (sk_lock-AF_INET6){+.+.+.}, at: [<ffffffff839a34b4>]
+  inet_stream_connect+0x44/0xa0 net/ipv4/af_inet.c:681
+   #1:  (rcu_read_lock){......}, at: [<ffffffff83bc1c2a>]
+  inet6_csk_xmit+0x12a/0x5d0 net/ipv6/inet6_connection_sock.c:126
+   #2:  (rcu_read_lock){......}, at: [<ffffffff8369b424>] __skb_unlink
+  include/linux/skbuff.h:1767 [inline]
+   #2:  (rcu_read_lock){......}, at: [<ffffffff8369b424>] __skb_dequeue
+  include/linux/skbuff.h:1783 [inline]
+   #2:  (rcu_read_lock){......}, at: [<ffffffff8369b424>]
+  process_backlog+0x264/0x730 net/core/dev.c:4835
+   #3:  (rcu_read_lock){......}, at: [<ffffffff83aeb5c0>]
+  ip6_input_finish+0x0/0x1700 net/ipv6/ip6_input.c:59
+   #4:  (slock-AF_INET6){+.-...}, at: [<ffffffff8362c2c9>] spin_lock
+  include/linux/spinlock.h:299 [inline]
+   #4:  (slock-AF_INET6){+.-...}, at: [<ffffffff8362c2c9>]
+  sk_clone_lock+0x3d9/0x12c0 net/core/sock.c:1504
+
+Fix it just like was done by b0691c8ee7c2 ("net: Unlock sock before calling
+sk_free()").
+
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Cc: Cong Wang <xiyou.wangcong@gmail.com>
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Gerrit Renker <gerrit@erg.abdn.ac.uk>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/20170301153510.GE15145@kernel.org
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/dccp/minisocks.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/dccp/minisocks.c
++++ b/net/dccp/minisocks.c
+@@ -135,6 +135,7 @@ struct sock *dccp_create_openreq_child(s
+                       /* It is still raw copy of parent, so invalidate
+                        * destructor and make plain sk_free() */
+                       newsk->sk_destruct = NULL;
++                      bh_unlock_sock(newsk);
+                       sk_free(newsk);
+                       return NULL;
+               }
diff --git a/queue-3.18/drm-ast-call-open_key-before-enable_mmio-in-post-code.patch b/queue-3.18/drm-ast-call-open_key-before-enable_mmio-in-post-code.patch

new file mode 100644 (file)

index 0000000..d1dbe23
--- /dev/null
+++ b/queue-3.18/drm-ast-call-open_key-before-enable_mmio-in-post-code.patch
@@ -0,0 +1,34 @@
+From 9bb92f51558f2ef5f56c257bdcea0588f31d857e Mon Sep 17 00:00:00 2001
+From: "Y.C. Chen" <yc_chen@aspeedtech.com>
+Date: Wed, 22 Feb 2017 15:14:19 +1100
+Subject: drm/ast: Call open_key before enable_mmio in POST code
+
+From: Y.C. Chen <yc_chen@aspeedtech.com>
+
+commit 9bb92f51558f2ef5f56c257bdcea0588f31d857e upstream.
+
+open_key enables access the registers used by enable_mmio
+
+Signed-off-by: Y.C. Chen <yc_chen@aspeedtech.com>
+Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Acked-by: Joel Stanley <joel@jms.id.au>
+Tested-by: Y.C. Chen <yc_chen@aspeedtech.com>
+Signed-off-by: Dave Airlie <airlied@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/ast/ast_post.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/ast/ast_post.c
++++ b/drivers/gpu/drm/ast/ast_post.c
+@@ -371,8 +371,8 @@ void ast_post_gpu(struct drm_device *dev
+       pci_write_config_dword(ast->dev->pdev, 0x04, reg);
+ 
+       ast_enable_vga(dev);
+-      ast_enable_mmio(dev);
+       ast_open_key(ast);
++      ast_enable_mmio(dev);
+       ast_set_def_ext_reg(dev);
+ 
+       if (ast->chip == AST2300 || ast->chip == AST2400)
diff --git a/queue-3.18/drm-ast-fix-ast2400-post-failure-without-bmc-fw-or-vbios.patch b/queue-3.18/drm-ast-fix-ast2400-post-failure-without-bmc-fw-or-vbios.patch

new file mode 100644 (file)

index 0000000..6074ef1
--- /dev/null
+++ b/queue-3.18/drm-ast-fix-ast2400-post-failure-without-bmc-fw-or-vbios.patch
@@ -0,0 +1,74 @@
+From 3856081eede297b617560b85e948cfb00bb395ec Mon Sep 17 00:00:00 2001
+From: "Y.C. Chen" <yc_chen@aspeedtech.com>
+Date: Thu, 23 Feb 2017 15:52:33 +0800
+Subject: drm/ast: Fix AST2400 POST failure without BMC FW or VBIOS
+
+From: Y.C. Chen <yc_chen@aspeedtech.com>
+
+commit 3856081eede297b617560b85e948cfb00bb395ec upstream.
+
+The current POST code for the AST2300/2400 family doesn't work properly
+if the chip hasn't been initialized previously by either the BMC own FW
+or the VBIOS. This fixes it.
+
+Signed-off-by: Y.C. Chen <yc_chen@aspeedtech.com>
+Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Tested-by: Y.C. Chen <yc_chen@aspeedtech.com>
+Acked-by: Joel Stanley <joel@jms.id.au>
+Signed-off-by: Dave Airlie <airlied@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/ast/ast_post.c |   38 +++++++++++++++++++++++++++++++++++---
+ 1 file changed, 35 insertions(+), 3 deletions(-)
+
+--- a/drivers/gpu/drm/ast/ast_post.c
++++ b/drivers/gpu/drm/ast/ast_post.c
+@@ -1626,12 +1626,44 @@ static void ast_init_dram_2300(struct dr
+               temp |= 0x73;
+               ast_write32(ast, 0x12008, temp);
+ 
++              param.dram_freq = 396;
+               param.dram_type = AST_DDR3;
++              temp = ast_mindwm(ast, 0x1e6e2070);
+               if (temp & 0x01000000)
+                       param.dram_type = AST_DDR2;
+-              param.dram_chipid = ast->dram_type;
+-              param.dram_freq = ast->mclk;
+-              param.vram_size = ast->vram_size;
++                switch (temp & 0x18000000) {
++              case 0:
++                      param.dram_chipid = AST_DRAM_512Mx16;
++                      break;
++              default:
++              case 0x08000000:
++                      param.dram_chipid = AST_DRAM_1Gx16;
++                      break;
++              case 0x10000000:
++                      param.dram_chipid = AST_DRAM_2Gx16;
++                      break;
++              case 0x18000000:
++                      param.dram_chipid = AST_DRAM_4Gx16;
++                      break;
++              }
++                switch (temp & 0x0c) {
++                default:
++              case 0x00:
++                      param.vram_size = AST_VIDMEM_SIZE_8M;
++                      break;
++
++              case 0x04:
++                      param.vram_size = AST_VIDMEM_SIZE_16M;
++                      break;
++
++              case 0x08:
++                      param.vram_size = AST_VIDMEM_SIZE_32M;
++                      break;
++
++              case 0x0c:
++                      param.vram_size = AST_VIDMEM_SIZE_64M;
++                      break;
++              }
+ 
+               if (param.dram_type == AST_DDR3) {
+                       get_ddr3_info(ast, &param);
diff --git a/queue-3.18/drm-ast-fix-test-for-vga-enabled.patch b/queue-3.18/drm-ast-fix-test-for-vga-enabled.patch

new file mode 100644 (file)

index 0000000..43df6ed
--- /dev/null
+++ b/queue-3.18/drm-ast-fix-test-for-vga-enabled.patch
@@ -0,0 +1,42 @@
+From 905f21a49d388de3e99438235f3301cabf0c0ef4 Mon Sep 17 00:00:00 2001
+From: "Y.C. Chen" <yc_chen@aspeedtech.com>
+Date: Wed, 22 Feb 2017 15:10:50 +1100
+Subject: drm/ast: Fix test for VGA enabled
+
+From: Y.C. Chen <yc_chen@aspeedtech.com>
+
+commit 905f21a49d388de3e99438235f3301cabf0c0ef4 upstream.
+
+The test to see if VGA was already enabled is doing an unnecessary
+second test from a register that may or may not have been initialized
+to a valid value. Remove it.
+
+Signed-off-by: Y.C. Chen <yc_chen@aspeedtech.com>
+Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Acked-by: Joel Stanley <joel@jms.id.au>
+Tested-by: Y.C. Chen <yc_chen@aspeedtech.com>
+Signed-off-by: Dave Airlie <airlied@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/ast/ast_post.c |    8 ++------
+ 1 file changed, 2 insertions(+), 6 deletions(-)
+
+--- a/drivers/gpu/drm/ast/ast_post.c
++++ b/drivers/gpu/drm/ast/ast_post.c
+@@ -58,13 +58,9 @@ bool ast_is_vga_enabled(struct drm_devic
+               /* TODO 1180 */
+       } else {
+               ch = ast_io_read8(ast, AST_IO_VGA_ENABLE_PORT);
+-              if (ch) {
+-                      ast_open_key(ast);
+-                      ch = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb6, 0xff);
+-                      return ch & 0x04;
+-              }
++              return !!(ch & 0x01);
+       }
+-      return 0;
++      return false;
+ }
+ 
+ static const u8 extreginfo[] = { 0x0f, 0x04, 0x1c, 0xff };
diff --git a/queue-3.18/drm-ttm-make-sure-bos-being-swapped-out-are-cacheable.patch b/queue-3.18/drm-ttm-make-sure-bos-being-swapped-out-are-cacheable.patch

new file mode 100644 (file)

index 0000000..b9ae1de
--- /dev/null
+++ b/queue-3.18/drm-ttm-make-sure-bos-being-swapped-out-are-cacheable.patch
@@ -0,0 +1,51 @@
+From 239ac65fa5ffab71adf66e642750f940e7241d99 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer@amd.com>
+Date: Wed, 25 Jan 2017 17:21:31 +0900
+Subject: drm/ttm: Make sure BOs being swapped out are cacheable
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 239ac65fa5ffab71adf66e642750f940e7241d99 upstream.
+
+The current caching state may not be tt_cached, even though the
+placement contains TTM_PL_FLAG_CACHED, because placement can contain
+multiple caching flags. Trying to swap out such a BO would trip up the
+
+       BUG_ON(ttm->caching_state != tt_cached);
+
+in ttm_tt_swapout.
+
+Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
+Reviewed-by: Thomas Hellstrom <thellstrom@vmware.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>.
+Reviewed-by: Sinclair Yeh <syeh@vmware.com>
+Signed-off-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/ttm/ttm_bo.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/ttm/ttm_bo.c
++++ b/drivers/gpu/drm/ttm/ttm_bo.c
+@@ -1617,7 +1617,6 @@ static int ttm_bo_swapout(struct ttm_mem
+       struct ttm_buffer_object *bo;
+       int ret = -EBUSY;
+       int put_count;
+-      uint32_t swap_placement = (TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM);
+ 
+       spin_lock(&glob->lru_lock);
+       list_for_each_entry(bo, &glob->swap_lru, swap) {
+@@ -1653,7 +1652,8 @@ static int ttm_bo_swapout(struct ttm_mem
+       if (unlikely(ret != 0))
+               goto out;
+ 
+-      if ((bo->mem.placement & swap_placement) != swap_placement) {
++      if (bo->mem.mem_type != TTM_PL_SYSTEM ||
++          bo->ttm->caching_state != tt_cached) {
+               struct ttm_mem_reg evict_mem;
+ 
+               evict_mem = bo->mem;
diff --git a/queue-3.18/ipv4-mask-tos-for-input-route.patch b/queue-3.18/ipv4-mask-tos-for-input-route.patch

new file mode 100644 (file)

index 0000000..394163c
--- /dev/null
+++ b/queue-3.18/ipv4-mask-tos-for-input-route.patch
@@ -0,0 +1,35 @@
+From 6e28099d38c0e50d62c1afc054e37e573adf3d21 Mon Sep 17 00:00:00 2001
+From: Julian Anastasov <ja@ssi.bg>
+Date: Sun, 26 Feb 2017 17:14:35 +0200
+Subject: ipv4: mask tos for input route
+
+From: Julian Anastasov <ja@ssi.bg>
+
+commit 6e28099d38c0e50d62c1afc054e37e573adf3d21 upstream.
+
+Restore the lost masking of TOS in input route code to
+allow ip rules to match it properly.
+
+Problem [1] noticed by Shmulik Ladkani <shmulik.ladkani@gmail.com>
+
+[1] http://marc.info/?t=137331755300040&r=1&w=2
+
+Fixes: 89aef8921bfb ("ipv4: Delete routing cache.")
+Signed-off-by: Julian Anastasov <ja@ssi.bg>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/ipv4/route.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -1835,6 +1835,7 @@ int ip_route_input_noref(struct sk_buff
+ {
+       int res;
+ 
++      tos &= IPTOS_RT_MASK;
+       rcu_read_lock();
+ 
+       /* Multicast recognition logic is moved from route cache to here.
diff --git a/queue-3.18/l2tp-avoid-use-after-free-caused-by-l2tp_ip_backlog_recv.patch b/queue-3.18/l2tp-avoid-use-after-free-caused-by-l2tp_ip_backlog_recv.patch

new file mode 100644 (file)

index 0000000..9afb48c
--- /dev/null
+++ b/queue-3.18/l2tp-avoid-use-after-free-caused-by-l2tp_ip_backlog_recv.patch
@@ -0,0 +1,35 @@
+From 51fb60eb162ab84c5edf2ae9c63cf0b878e5547e Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Paul=20H=C3=BCber?= <phueber@kernsp.in>
+Date: Sun, 26 Feb 2017 17:58:19 +0100
+Subject: l2tp: avoid use-after-free caused by l2tp_ip_backlog_recv
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Paul Hüber <phueber@kernsp.in>
+
+commit 51fb60eb162ab84c5edf2ae9c63cf0b878e5547e upstream.
+
+l2tp_ip_backlog_recv may not return -1 if the packet gets dropped.
+The return value is passed up to ip_local_deliver_finish, which treats
+negative values as an IP protocol number for resubmission.
+
+Signed-off-by: Paul Hüber <phueber@kernsp.in>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/l2tp/l2tp_ip.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/l2tp/l2tp_ip.c
++++ b/net/l2tp/l2tp_ip.c
+@@ -382,7 +382,7 @@ static int l2tp_ip_backlog_recv(struct s
+ drop:
+       IP_INC_STATS(sock_net(sk), IPSTATS_MIB_INDISCARDS);
+       kfree_skb(skb);
+-      return -1;
++      return 0;
+ }
+ 
+ /* Userspace will call sendmsg() on the tunnel socket to send L2TP
diff --git a/queue-3.18/mbox_todo b/queue-3.18/mbox_todo

index d406246b6c5c24bf240a0fafa0694954aa2a8046..221e6def33add8aa6322b3a088fbcb4221a16e14 100644 (file)
--- a/queue-3.18/mbox_todo
+++ b/queue-3.18/mbox_todo
@@ -674,893 +674,11 @@ index 0fcdbe7ca648..623f01772bec 100644
  -- 
  2.12.2
  
-From 944690cdb5f48d03842365b7359fe090d6c2b1fa Mon Sep 17 00:00:00 2001
-From: Bart Van Assche <bart.vanassche@sandisk.com>
-Date: Tue, 14 Feb 2017 10:56:30 -0800
-Subject: [PATCH 016/251] IB/srp: Avoid that duplicate responses trigger a
- kernel bug
-Content-Length: 1979
-Lines: 54
-
-commit 6cb72bc1b40bb2c1750ee7a5ebade93bed49a5fb upstream.
-
-After srp_process_rsp() returns there is a short time during which
-the scsi_host_find_tag() call will return a pointer to the SCSI
-command that is being completed. If during that time a duplicate
-response is received, avoid that the following call stack appears:
-
-BUG: unable to handle kernel NULL pointer dereference at           (null)
-IP: srp_recv_done+0x450/0x6b0 [ib_srp]
-Oops: 0000 [#1] SMP
-CPU: 10 PID: 0 Comm: swapper/10 Not tainted 4.10.0-rc7-dbg+ #1
-Call Trace:
- <IRQ>
- __ib_process_cq+0x4b/0xd0 [ib_core]
- ib_poll_handler+0x1d/0x70 [ib_core]
- irq_poll_softirq+0xba/0x120
- __do_softirq+0xba/0x4c0
- irq_exit+0xbe/0xd0
- smp_apic_timer_interrupt+0x38/0x50
- apic_timer_interrupt+0x90/0xa0
- </IRQ>
-RIP: srp_recv_done+0x450/0x6b0 [ib_srp] RSP: ffff88046f483e20
-
-Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
-Cc: Israel Rukshin <israelr@mellanox.com>
-Cc: Max Gurtovoy <maxg@mellanox.com>
-Cc: Laurence Oberman <loberman@redhat.com>
-Cc: Steve Feeley <Steve.Feeley@sandisk.com>
-Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
-Signed-off-by: Doug Ledford <dledford@redhat.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/infiniband/ulp/srp/ib_srp.c | 4 +++-
- 1 file changed, 3 insertions(+), 1 deletion(-)
-
-diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
-index 5f0f4fc58f43..57a8a200e741 100644
---- a/drivers/infiniband/ulp/srp/ib_srp.c
-+++ b/drivers/infiniband/ulp/srp/ib_srp.c
-@@ -1795,9 +1795,11 @@ static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
-               complete(&ch->tsk_mgmt_done);
-       } else {
-               scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
--              if (scmnd) {
-+              if (scmnd && scmnd->host_scribble) {
-                       req = (void *)scmnd->host_scribble;
-                       scmnd = srp_claim_req(ch, req, NULL, scmnd);
-+              } else {
-+                      scmnd = NULL;
-               }
-               if (!scmnd) {
-                       shost_printk(KERN_ERR, target->scsi_host,
--- 
-2.12.2
-
-From 696255449b89af5487bce53b1a65eddedc72aeff Mon Sep 17 00:00:00 2001
-From: Bart Van Assche <bart.vanassche@sandisk.com>
-Date: Tue, 14 Feb 2017 10:56:31 -0800
-Subject: [PATCH 017/251] IB/srp: Fix race conditions related to task
- management
-Content-Length: 5896
-Lines: 169
-
-commit 0a6fdbdeb1c25e31763c1fb333fa2723a7d2aba6 upstream.
-
-Avoid that srp_process_rsp() overwrites the status information
-in ch if the SRP target response timed out and processing of
-another task management function has already started. Avoid that
-issuing multiple task management functions concurrently triggers
-list corruption. This patch prevents that the following stack
-trace appears in the system log:
-
-WARNING: CPU: 8 PID: 9269 at lib/list_debug.c:52 __list_del_entry_valid+0xbc/0xc0
-list_del corruption. prev->next should be ffffc90004bb7b00, but was ffff8804052ecc68
-CPU: 8 PID: 9269 Comm: sg_reset Tainted: G        W       4.10.0-rc7-dbg+ #3
-Call Trace:
- dump_stack+0x68/0x93
- __warn+0xc6/0xe0
- warn_slowpath_fmt+0x4a/0x50
- __list_del_entry_valid+0xbc/0xc0
- wait_for_completion_timeout+0x12e/0x170
- srp_send_tsk_mgmt+0x1ef/0x2d0 [ib_srp]
- srp_reset_device+0x5b/0x110 [ib_srp]
- scsi_ioctl_reset+0x1c7/0x290
- scsi_ioctl+0x12a/0x420
- sd_ioctl+0x9d/0x100
- blkdev_ioctl+0x51e/0x9f0
- block_ioctl+0x38/0x40
- do_vfs_ioctl+0x8f/0x700
- SyS_ioctl+0x3c/0x70
- entry_SYSCALL_64_fastpath+0x18/0xad
-
-Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
-Cc: Israel Rukshin <israelr@mellanox.com>
-Cc: Max Gurtovoy <maxg@mellanox.com>
-Cc: Laurence Oberman <loberman@redhat.com>
-Cc: Steve Feeley <Steve.Feeley@sandisk.com>
-Signed-off-by: Doug Ledford <dledford@redhat.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/infiniband/ulp/srp/ib_srp.c | 45 ++++++++++++++++++++++++-------------
- drivers/infiniband/ulp/srp/ib_srp.h |  1 +
- 2 files changed, 30 insertions(+), 16 deletions(-)
-
-diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
-index 57a8a200e741..e397f1b0af09 100644
---- a/drivers/infiniband/ulp/srp/ib_srp.c
-+++ b/drivers/infiniband/ulp/srp/ib_srp.c
-@@ -1787,12 +1787,17 @@ static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
-       if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
-               spin_lock_irqsave(&ch->lock, flags);
-               ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
-+              if (rsp->tag == ch->tsk_mgmt_tag) {
-+                      ch->tsk_mgmt_status = -1;
-+                      if (be32_to_cpu(rsp->resp_data_len) >= 4)
-+                              ch->tsk_mgmt_status = rsp->data[3];
-+                      complete(&ch->tsk_mgmt_done);
-+              } else {
-+                      shost_printk(KERN_ERR, target->scsi_host,
-+                                   "Received tsk mgmt response too late for tag %#llx\n",
-+                                   rsp->tag);
-+              }
-               spin_unlock_irqrestore(&ch->lock, flags);
--
--              ch->tsk_mgmt_status = -1;
--              if (be32_to_cpu(rsp->resp_data_len) >= 4)
--                      ch->tsk_mgmt_status = rsp->data[3];
--              complete(&ch->tsk_mgmt_done);
-       } else {
-               scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
-               if (scmnd && scmnd->host_scribble) {
-@@ -2471,19 +2476,18 @@ srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
- }
- 
- static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
--                           u8 func)
-+                           u8 func, u8 *status)
- {
-       struct srp_target_port *target = ch->target;
-       struct srp_rport *rport = target->rport;
-       struct ib_device *dev = target->srp_host->srp_dev->dev;
-       struct srp_iu *iu;
-       struct srp_tsk_mgmt *tsk_mgmt;
-+      int res;
- 
-       if (!ch->connected || target->qp_in_error)
-               return -1;
- 
--      init_completion(&ch->tsk_mgmt_done);
--
-       /*
-        * Lock the rport mutex to avoid that srp_create_ch_ib() is
-        * invoked while a task management function is being sent.
-@@ -2506,10 +2510,16 @@ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
- 
-       tsk_mgmt->opcode        = SRP_TSK_MGMT;
-       int_to_scsilun(lun, &tsk_mgmt->lun);
--      tsk_mgmt->tag           = req_tag | SRP_TAG_TSK_MGMT;
-       tsk_mgmt->tsk_mgmt_func = func;
-       tsk_mgmt->task_tag      = req_tag;
- 
-+      spin_lock_irq(&ch->lock);
-+      ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT;
-+      tsk_mgmt->tag = ch->tsk_mgmt_tag;
-+      spin_unlock_irq(&ch->lock);
-+
-+      init_completion(&ch->tsk_mgmt_done);
-+
-       ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
-                                     DMA_TO_DEVICE);
-       if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
-@@ -2518,13 +2528,15 @@ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
- 
-               return -1;
-       }
-+      res = wait_for_completion_timeout(&ch->tsk_mgmt_done,
-+                                      msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS));
-+      if (res > 0 && status)
-+              *status = ch->tsk_mgmt_status;
-       mutex_unlock(&rport->mutex);
- 
--      if (!wait_for_completion_timeout(&ch->tsk_mgmt_done,
--                                       msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
--              return -1;
-+      WARN_ON_ONCE(res < 0);
- 
--      return 0;
-+      return res > 0 ? 0 : -1;
- }
- 
- static int srp_abort(struct scsi_cmnd *scmnd)
-@@ -2550,7 +2562,7 @@ static int srp_abort(struct scsi_cmnd *scmnd)
-       shost_printk(KERN_ERR, target->scsi_host,
-                    "Sending SRP abort for tag %#x\n", tag);
-       if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
--                            SRP_TSK_ABORT_TASK) == 0)
-+                            SRP_TSK_ABORT_TASK, NULL) == 0)
-               ret = SUCCESS;
-       else if (target->rport->state == SRP_RPORT_LOST)
-               ret = FAST_IO_FAIL;
-@@ -2568,14 +2580,15 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)
-       struct srp_target_port *target = host_to_target(scmnd->device->host);
-       struct srp_rdma_ch *ch;
-       int i;
-+      u8 status;
- 
-       shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
- 
-       ch = &target->ch[0];
-       if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
--                            SRP_TSK_LUN_RESET))
-+                            SRP_TSK_LUN_RESET, &status))
-               return FAILED;
--      if (ch->tsk_mgmt_status)
-+      if (status)
-               return FAILED;
- 
-       for (i = 0; i < target->ch_count; i++) {
-diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
-index f6af531f9f32..109eea94d0f9 100644
---- a/drivers/infiniband/ulp/srp/ib_srp.h
-+++ b/drivers/infiniband/ulp/srp/ib_srp.h
-@@ -168,6 +168,7 @@ struct srp_rdma_ch {
-       int                     max_ti_iu_len;
-       int                     comp_vector;
- 
-+      u64                     tsk_mgmt_tag;
-       struct completion       tsk_mgmt_done;
-       u8                      tsk_mgmt_status;
-       bool                    connected;
--- 
-2.12.2
-
-From ca739e3fd7dc803d526ea5bb9b80c0d07fbca55f Mon Sep 17 00:00:00 2001
-From: Nicholas Bellinger <nab@linux-iscsi.org>
-Date: Wed, 22 Feb 2017 22:06:32 -0800
-Subject: [PATCH 020/251] target: Fix NULL dereference during LUN lookup +
- active I/O shutdown
-Content-Length: 6768
-Lines: 191
-
-commit bd4e2d2907fa23a11d46217064ecf80470ddae10 upstream.
-
-When transport_clear_lun_ref() is shutting down a se_lun via
-configfs with new I/O in-flight, it's possible to trigger a
-NULL pointer dereference in transport_lookup_cmd_lun() due
-to the fact percpu_ref_get() doesn't do any __PERCPU_REF_DEAD
-checking before incrementing lun->lun_ref.count after
-lun->lun_ref has switched to atomic_t mode.
-
-This results in a NULL pointer dereference as LUN shutdown
-code in core_tpg_remove_lun() continues running after the
-existing ->release() -> core_tpg_lun_ref_release() callback
-completes, and clears the RCU protected se_lun->lun_se_dev
-pointer.
-
-During the OOPs, the state of lun->lun_ref in the process
-which triggered the NULL pointer dereference looks like
-the following on v4.1.y stable code:
-
-struct se_lun {
-  lun_link_magic = 4294932337,
-  lun_status = TRANSPORT_LUN_STATUS_FREE,
-
-  .....
-
-  lun_se_dev = 0x0,
-  lun_sep = 0x0,
-
-  .....
-
-  lun_ref = {
-    count = {
-      counter = 1
-    },
-    percpu_count_ptr = 3,
-    release = 0xffffffffa02fa1e0 <core_tpg_lun_ref_release>,
-    confirm_switch = 0x0,
-    force_atomic = false,
-    rcu = {
-      next = 0xffff88154fa1a5d0,
-      func = 0xffffffff8137c4c0 <percpu_ref_switch_to_atomic_rcu>
-    }
-  }
-}
-
-To address this bug, use percpu_ref_tryget_live() to ensure
-once __PERCPU_REF_DEAD is visable on all CPUs and ->lun_ref
-has switched to atomic_t, all new I/Os will fail to obtain
-a new lun->lun_ref reference.
-
-Also use an explicit percpu_ref_kill_and_confirm() callback
-to block on ->lun_ref_comp to allow the first stage and
-associated RCU grace period to complete, and then block on
-->lun_ref_shutdown waiting for the final percpu_ref_put()
-to drop the last reference via transport_lun_remove_cmd()
-before continuing with core_tpg_remove_lun() shutdown.
-
-Reported-by: Rob Millner <rlm@daterainc.com>
-Tested-by: Rob Millner <rlm@daterainc.com>
-Cc: Rob Millner <rlm@daterainc.com>
-Tested-by: Vaibhav Tandon <vst@datera.io>
-Cc: Vaibhav Tandon <vst@datera.io>
-Tested-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
-Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/target/target_core_device.c    | 10 ++++++++--
- drivers/target/target_core_tpg.c       |  3 ++-
- drivers/target/target_core_transport.c | 31 ++++++++++++++++++++++++++++++-
- include/target/target_core_base.h      |  1 +
- 4 files changed, 41 insertions(+), 4 deletions(-)
-
-diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
-index 356c80fbb304..bb6a6c35324a 100644
---- a/drivers/target/target_core_device.c
-+++ b/drivers/target/target_core_device.c
-@@ -77,12 +77,16 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u64 unpacked_lun)
-                                       &deve->read_bytes);
- 
-               se_lun = rcu_dereference(deve->se_lun);
-+
-+              if (!percpu_ref_tryget_live(&se_lun->lun_ref)) {
-+                      se_lun = NULL;
-+                      goto out_unlock;
-+              }
-+
-               se_cmd->se_lun = rcu_dereference(deve->se_lun);
-               se_cmd->pr_res_key = deve->pr_res_key;
-               se_cmd->orig_fe_lun = unpacked_lun;
-               se_cmd->se_cmd_flags |= SCF_SE_LUN_CMD;
--
--              percpu_ref_get(&se_lun->lun_ref);
-               se_cmd->lun_ref_active = true;
- 
-               if ((se_cmd->data_direction == DMA_TO_DEVICE) &&
-@@ -96,6 +100,7 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u64 unpacked_lun)
-                       goto ref_dev;
-               }
-       }
-+out_unlock:
-       rcu_read_unlock();
- 
-       if (!se_lun) {
-@@ -826,6 +831,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
-       xcopy_lun = &dev->xcopy_lun;
-       rcu_assign_pointer(xcopy_lun->lun_se_dev, dev);
-       init_completion(&xcopy_lun->lun_ref_comp);
-+      init_completion(&xcopy_lun->lun_shutdown_comp);
-       INIT_LIST_HEAD(&xcopy_lun->lun_deve_list);
-       INIT_LIST_HEAD(&xcopy_lun->lun_dev_link);
-       mutex_init(&xcopy_lun->lun_tg_pt_md_mutex);
-diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
-index 028854cda97b..2794c6ec5c3c 100644
---- a/drivers/target/target_core_tpg.c
-+++ b/drivers/target/target_core_tpg.c
-@@ -539,7 +539,7 @@ static void core_tpg_lun_ref_release(struct percpu_ref *ref)
- {
-       struct se_lun *lun = container_of(ref, struct se_lun, lun_ref);
- 
--      complete(&lun->lun_ref_comp);
-+      complete(&lun->lun_shutdown_comp);
- }
- 
- int core_tpg_register(
-@@ -666,6 +666,7 @@ struct se_lun *core_tpg_alloc_lun(
-       lun->lun_link_magic = SE_LUN_LINK_MAGIC;
-       atomic_set(&lun->lun_acl_count, 0);
-       init_completion(&lun->lun_ref_comp);
-+      init_completion(&lun->lun_shutdown_comp);
-       INIT_LIST_HEAD(&lun->lun_deve_list);
-       INIT_LIST_HEAD(&lun->lun_dev_link);
-       atomic_set(&lun->lun_tg_pt_secondary_offline, 0);
-diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
-index befe22744802..df2059984e14 100644
---- a/drivers/target/target_core_transport.c
-+++ b/drivers/target/target_core_transport.c
-@@ -2680,10 +2680,39 @@ void target_wait_for_sess_cmds(struct se_session *se_sess)
- }
- EXPORT_SYMBOL(target_wait_for_sess_cmds);
- 
-+static void target_lun_confirm(struct percpu_ref *ref)
-+{
-+      struct se_lun *lun = container_of(ref, struct se_lun, lun_ref);
-+
-+      complete(&lun->lun_ref_comp);
-+}
-+
- void transport_clear_lun_ref(struct se_lun *lun)
- {
--      percpu_ref_kill(&lun->lun_ref);
-+      /*
-+       * Mark the percpu-ref as DEAD, switch to atomic_t mode, drop
-+       * the initial reference and schedule confirm kill to be
-+       * executed after one full RCU grace period has completed.
-+       */
-+      percpu_ref_kill_and_confirm(&lun->lun_ref, target_lun_confirm);
-+      /*
-+       * The first completion waits for percpu_ref_switch_to_atomic_rcu()
-+       * to call target_lun_confirm after lun->lun_ref has been marked
-+       * as __PERCPU_REF_DEAD on all CPUs, and switches to atomic_t
-+       * mode so that percpu_ref_tryget_live() lookup of lun->lun_ref
-+       * fails for all new incoming I/O.
-+       */
-       wait_for_completion(&lun->lun_ref_comp);
-+      /*
-+       * The second completion waits for percpu_ref_put_many() to
-+       * invoke ->release() after lun->lun_ref has switched to
-+       * atomic_t mode, and lun->lun_ref.count has reached zero.
-+       *
-+       * At this point all target-core lun->lun_ref references have
-+       * been dropped via transport_lun_remove_cmd(), and it's safe
-+       * to proceed with the remaining LUN shutdown.
-+       */
-+      wait_for_completion(&lun->lun_shutdown_comp);
- }
- 
- static bool
-diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
-index 800fe16cc36f..ed66414b91f0 100644
---- a/include/target/target_core_base.h
-+++ b/include/target/target_core_base.h
-@@ -740,6 +740,7 @@ struct se_lun {
-       struct config_group     lun_group;
-       struct se_port_stat_grps port_stat_grps;
-       struct completion       lun_ref_comp;
-+      struct completion       lun_shutdown_comp;
-       struct percpu_ref       lun_ref;
-       struct list_head        lun_dev_link;
-       struct hlist_node       link;
--- 
-2.12.2
-
-From 0d80ac62b609bce00b78a656b7cdde2d8f587345 Mon Sep 17 00:00:00 2001
-From: Alex Deucher <alexander.deucher@amd.com>
-Date: Fri, 10 Feb 2017 00:00:52 -0500
-Subject: [PATCH 025/251] drm/amdgpu: add more cases to DCE11 possible crtc
- mask setup
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-Content-Length: 1129
-Lines: 38
-
-commit 4ce3bd45b351633f2a0512c587f7fcba2ce044e8 upstream.
-
-Add cases for asics with 3 and 5 crtcs.  Fixes an artificial
-limitation on asics with 3 or 5 crtcs.
-
-Fixes:
-https://bugs.freedesktop.org/show_bug.cgi?id=99744
-
-Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
-Reviewed-by: Christian König <christian.koenig@amd.com>
-Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | 6 ++++++
- 1 file changed, 6 insertions(+)
-
-diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
-index c161eeda417b..267749a94c5a 100644
---- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
-+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
-@@ -3704,9 +3704,15 @@ static void dce_v11_0_encoder_add(struct amdgpu_device *adev,
-       default:
-               encoder->possible_crtcs = 0x3;
-               break;
-+      case 3:
-+              encoder->possible_crtcs = 0x7;
-+              break;
-       case 4:
-               encoder->possible_crtcs = 0xf;
-               break;
-+      case 5:
-+              encoder->possible_crtcs = 0x1f;
-+              break;
-       case 6:
-               encoder->possible_crtcs = 0x3f;
-               break;
--- 
-2.12.2
-
-From 8b787652386e26c7974092f11bd477126b0d53ce Mon Sep 17 00:00:00 2001
-From: "Y.C. Chen" <yc_chen@aspeedtech.com>
-Date: Wed, 22 Feb 2017 15:10:50 +1100
-Subject: [PATCH 026/251] drm/ast: Fix test for VGA enabled
-Content-Length: 1240
-Lines: 38
-
-commit 905f21a49d388de3e99438235f3301cabf0c0ef4 upstream.
-
-The test to see if VGA was already enabled is doing an unnecessary
-second test from a register that may or may not have been initialized
-to a valid value. Remove it.
-
-Signed-off-by: Y.C. Chen <yc_chen@aspeedtech.com>
-Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-Acked-by: Joel Stanley <joel@jms.id.au>
-Tested-by: Y.C. Chen <yc_chen@aspeedtech.com>
-Signed-off-by: Dave Airlie <airlied@redhat.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/gpu/drm/ast/ast_post.c | 8 ++------
- 1 file changed, 2 insertions(+), 6 deletions(-)
-
-diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c
-index 810c51d92b99..4e8aaab5dd52 100644
---- a/drivers/gpu/drm/ast/ast_post.c
-+++ b/drivers/gpu/drm/ast/ast_post.c
-@@ -58,13 +58,9 @@ bool ast_is_vga_enabled(struct drm_device *dev)
-               /* TODO 1180 */
-       } else {
-               ch = ast_io_read8(ast, AST_IO_VGA_ENABLE_PORT);
--              if (ch) {
--                      ast_open_key(ast);
--                      ch = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb6, 0xff);
--                      return ch & 0x04;
--              }
-+              return !!(ch & 0x01);
-       }
--      return 0;
-+      return false;
- }
- 
- static const u8 extreginfo[] = { 0x0f, 0x04, 0x1c, 0xff };
--- 
-2.12.2
-
-From 93eab4f5259485e9cad0339a298b6da1dd2e6e40 Mon Sep 17 00:00:00 2001
-From: "Y.C. Chen" <yc_chen@aspeedtech.com>
-Date: Wed, 22 Feb 2017 15:14:19 +1100
-Subject: [PATCH 027/251] drm/ast: Call open_key before enable_mmio in POST
- code
-Content-Length: 1014
-Lines: 30
-
-commit 9bb92f51558f2ef5f56c257bdcea0588f31d857e upstream.
-
-open_key enables access the registers used by enable_mmio
-
-Signed-off-by: Y.C. Chen <yc_chen@aspeedtech.com>
-Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-Acked-by: Joel Stanley <joel@jms.id.au>
-Tested-by: Y.C. Chen <yc_chen@aspeedtech.com>
-Signed-off-by: Dave Airlie <airlied@redhat.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/gpu/drm/ast/ast_post.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c
-index 4e8aaab5dd52..50836e549867 100644
---- a/drivers/gpu/drm/ast/ast_post.c
-+++ b/drivers/gpu/drm/ast/ast_post.c
-@@ -371,8 +371,8 @@ void ast_post_gpu(struct drm_device *dev)
-       pci_write_config_dword(ast->dev->pdev, 0x04, reg);
- 
-       ast_enable_vga(dev);
--      ast_enable_mmio(dev);
-       ast_open_key(ast);
-+      ast_enable_mmio(dev);
-       ast_set_def_ext_reg(dev);
- 
-       if (ast->chip == AST2300 || ast->chip == AST2400)
--- 
-2.12.2
-
-From b9cfd5517b309513e50d80b89eaae98a82a2c3b1 Mon Sep 17 00:00:00 2001
-From: "Y.C. Chen" <yc_chen@aspeedtech.com>
-Date: Thu, 23 Feb 2017 15:52:33 +0800
-Subject: [PATCH 028/251] drm/ast: Fix AST2400 POST failure without BMC FW or
- VBIOS
-Content-Length: 2034
-Lines: 70
-
-commit 3856081eede297b617560b85e948cfb00bb395ec upstream.
-
-The current POST code for the AST2300/2400 family doesn't work properly
-if the chip hasn't been initialized previously by either the BMC own FW
-or the VBIOS. This fixes it.
-
-Signed-off-by: Y.C. Chen <yc_chen@aspeedtech.com>
-Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-Tested-by: Y.C. Chen <yc_chen@aspeedtech.com>
-Acked-by: Joel Stanley <joel@jms.id.au>
-Signed-off-by: Dave Airlie <airlied@redhat.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/gpu/drm/ast/ast_post.c | 38 +++++++++++++++++++++++++++++++++++---
- 1 file changed, 35 insertions(+), 3 deletions(-)
-
-diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c
-index 50836e549867..30672a3df8a9 100644
---- a/drivers/gpu/drm/ast/ast_post.c
-+++ b/drivers/gpu/drm/ast/ast_post.c
-@@ -1626,12 +1626,44 @@ static void ast_init_dram_2300(struct drm_device *dev)
-               temp |= 0x73;
-               ast_write32(ast, 0x12008, temp);
- 
-+              param.dram_freq = 396;
-               param.dram_type = AST_DDR3;
-+              temp = ast_mindwm(ast, 0x1e6e2070);
-               if (temp & 0x01000000)
-                       param.dram_type = AST_DDR2;
--              param.dram_chipid = ast->dram_type;
--              param.dram_freq = ast->mclk;
--              param.vram_size = ast->vram_size;
-+                switch (temp & 0x18000000) {
-+              case 0:
-+                      param.dram_chipid = AST_DRAM_512Mx16;
-+                      break;
-+              default:
-+              case 0x08000000:
-+                      param.dram_chipid = AST_DRAM_1Gx16;
-+                      break;
-+              case 0x10000000:
-+                      param.dram_chipid = AST_DRAM_2Gx16;
-+                      break;
-+              case 0x18000000:
-+                      param.dram_chipid = AST_DRAM_4Gx16;
-+                      break;
-+              }
-+                switch (temp & 0x0c) {
-+                default:
-+              case 0x00:
-+                      param.vram_size = AST_VIDMEM_SIZE_8M;
-+                      break;
-+
-+              case 0x04:
-+                      param.vram_size = AST_VIDMEM_SIZE_16M;
-+                      break;
-+
-+              case 0x08:
-+                      param.vram_size = AST_VIDMEM_SIZE_32M;
-+                      break;
-+
-+              case 0x0c:
-+                      param.vram_size = AST_VIDMEM_SIZE_64M;
-+                      break;
-+              }
- 
-               if (param.dram_type == AST_DDR3) {
-                       get_ddr3_info(ast, &param);
--- 
-2.12.2
-
-From 36fd36b900b9382af54a1e49a81cd99663b83eda Mon Sep 17 00:00:00 2001
-From: Tomeu Vizoso <tomeu.vizoso@collabora.com>
-Date: Mon, 20 Feb 2017 16:25:45 +0100
-Subject: [PATCH 029/251] drm/edid: Add EDID_QUIRK_FORCE_8BPC quirk for Rotel
- RSX-1058
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-Content-Length: 1596
-Lines: 44
-
-commit 36fc579761b50784b63dafd0f2e796b659e0f5ee upstream.
-
-Rotel RSX-1058 is a receiver with 4 HDMI inputs and a HDMI output, all
-1.1.
-
-When a sink that supports deep color is connected to the output, the
-receiver will send EDIDs that advertise this capability, even if it
-isn't possible with HDMI versions earlier than 1.3.
-
-Currently the kernel is assuming that deep color is possible and the
-sink displays an error.
-
-This quirk will make sure that deep color isn't used with this
-particular receiver.
-
-Fixes: 7a0baa623446 ("Revert "drm/i915: Disable 12bpc hdmi for now"")
-Signed-off-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
-Link: http://patchwork.freedesktop.org/patch/msgid/20170220152545.13153-1-tomeu.vizoso@collabora.com
-Cc: Matt Horan <matt@matthoran.com>
-Tested-by: Matt Horan <matt@matthoran.com>
-Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99869
-Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
-Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/gpu/drm/drm_edid.c | 3 +++
- 1 file changed, 3 insertions(+)
-
-diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
-index 8c9ac021608f..cc1e16fd7e76 100644
---- a/drivers/gpu/drm/drm_edid.c
-+++ b/drivers/gpu/drm/drm_edid.c
-@@ -144,6 +144,9 @@ static struct edid_quirk {
- 
-       /* Panel in Samsung NP700G7A-S01PL notebook reports 6bpc */
-       { "SEC", 0xd033, EDID_QUIRK_FORCE_8BPC },
-+
-+      /* Rotel RSX-1058 forwards sink's EDID but only does HDMI 1.1*/
-+      { "ETR", 13896, EDID_QUIRK_FORCE_8BPC },
- };
- 
- /*
--- 
-2.12.2
-
-From 59fc34fc69066bfabf8bed21f4ce5bf312e68bb3 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer@amd.com>
-Date: Wed, 25 Jan 2017 17:21:31 +0900
-Subject: [PATCH 030/251] drm/ttm: Make sure BOs being swapped out are
- cacheable
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-Content-Length: 1580
-Lines: 44
-
-commit 239ac65fa5ffab71adf66e642750f940e7241d99 upstream.
-
-The current caching state may not be tt_cached, even though the
-placement contains TTM_PL_FLAG_CACHED, because placement can contain
-multiple caching flags. Trying to swap out such a BO would trip up the
-
-       BUG_ON(ttm->caching_state != tt_cached);
-
-in ttm_tt_swapout.
-
-Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
-Reviewed-by: Thomas Hellstrom <thellstrom@vmware.com>
-Reviewed-by: Christian König <christian.koenig@amd.com>.
-Reviewed-by: Sinclair Yeh <syeh@vmware.com>
-Signed-off-by: Christian König <christian.koenig@amd.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/gpu/drm/ttm/ttm_bo.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
-index 4ae8b56b1847..037c38bb5333 100644
---- a/drivers/gpu/drm/ttm/ttm_bo.c
-+++ b/drivers/gpu/drm/ttm/ttm_bo.c
-@@ -1621,7 +1621,6 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
-       struct ttm_buffer_object *bo;
-       int ret = -EBUSY;
-       int put_count;
--      uint32_t swap_placement = (TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM);
- 
-       spin_lock(&glob->lru_lock);
-       list_for_each_entry(bo, &glob->swap_lru, swap) {
-@@ -1657,7 +1656,8 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
-       if (unlikely(ret != 0))
-               goto out;
- 
--      if ((bo->mem.placement & swap_placement) != swap_placement) {
-+      if (bo->mem.mem_type != TTM_PL_SYSTEM ||
-+          bo->ttm->caching_state != tt_cached) {
-               struct ttm_mem_reg evict_mem;
- 
-               evict_mem = bo->mem;
--- 
-2.12.2
-
-From bb5b96344ed378a1d5b8cf3bd149bb86919f3b9f Mon Sep 17 00:00:00 2001
-From: Dan Carpenter <dan.carpenter@oracle.com>
-Date: Wed, 8 Feb 2017 02:46:01 +0300
-Subject: [PATCH 031/251] drm/atomic: fix an error code in mode_fixup()
-Content-Length: 1297
-Lines: 34
-
-commit f9ad86e42d0303eeb8e0d41bb208153022ebd9d2 upstream.
-
-Having "ret" be a bool type works for everything except
-ret = funcs->atomic_check().  The other functions all return zero on
-error but ->atomic_check() returns negative error codes.  We want to
-propagate the error code but instead we return 1.
-
-I found this bug with static analysis and I don't know if it affects
-run time.
-
-Fixes: 4cd4df8080a3 ("drm/atomic: Add ->atomic_check() to encoder helpers")
-Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
-Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-Link: http://patchwork.freedesktop.org/patch/msgid/20170207234601.GA23981@mwanda
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/gpu/drm/drm_atomic_helper.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
-index 1ac29d703c12..ea443fafb934 100644
---- a/drivers/gpu/drm/drm_atomic_helper.c
-+++ b/drivers/gpu/drm/drm_atomic_helper.c
-@@ -265,7 +265,7 @@ mode_fixup(struct drm_atomic_state *state)
-       struct drm_connector *connector;
-       struct drm_connector_state *conn_state;
-       int i;
--      bool ret;
-+      int ret;
- 
-       for_each_crtc_in_state(state, crtc, crtc_state, i) {
-               if (!crtc_state->mode_changed &&
--- 
-2.12.2
-
-From 7952b6490bbce45e078c8c0e669df7a0a8f8948a Mon Sep 17 00:00:00 2001
-From: Hans de Goede <hdegoede@redhat.com>
-Date: Fri, 2 Dec 2016 15:29:04 +0100
-Subject: [PATCH 033/251] drm/i915/dsi: Do not clear DPOUNIT_CLOCK_GATE_DISABLE
- from vlv_init_display_clock_gating
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-Content-Length: 2606
-Lines: 62
-
-commit bb98e72adaf9d19719aba35f802d4836f5d5176c upstream.
-
-On my Cherrytrail CUBE iwork8 Air tablet PIPE-A would get stuck on loading
-i915 at boot 1 out of every 3 boots, resulting in a non functional LCD.
-Once the i915 driver has successfully loaded, the panel can be disabled /
-enabled without hitting this issue.
-
-The getting stuck is caused by vlv_init_display_clock_gating() clearing
-the DPOUNIT_CLOCK_GATE_DISABLE bit in DSPCLK_GATE_D when called from
-chv_pipe_power_well_ops.enable() on driver load, while a pipe is enabled
-driving the DSI LCD by the BIOS.
-
-Clearing this bit while DSI is in use is a known issue and
-intel_dsi_pre_enable() / intel_dsi_post_disable() already set / clear it
-as appropriate.
-
-This commit modifies vlv_init_display_clock_gating() to leave the
-DPOUNIT_CLOCK_GATE_DISABLE bit alone fixing the pipe getting stuck.
-
-Changes in v2:
--Replace PIPE-A with "a pipe" or "the pipe" in the commit msg and
-comment
-
-Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97330
-Signed-off-by: Hans de Goede <hdegoede@redhat.com>
-Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
-Link: http://patchwork.freedesktop.org/patch/msgid/20161202142904.25613-1-hdegoede@redhat.com
-Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
-(cherry picked from commit 721d484563e1a51ada760089c490cbc47e909756)
-Signed-off-by: Jani Nikula <jani.nikula@intel.com>
-Signed-off-by: River Zhou <riverzhou2000@163.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/gpu/drm/i915/intel_pm.c | 13 ++++++++++++-
- 1 file changed, 12 insertions(+), 1 deletion(-)
-
-diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
-index 3f802163f7d4..e7c18519274a 100644
---- a/drivers/gpu/drm/i915/intel_pm.c
-+++ b/drivers/gpu/drm/i915/intel_pm.c
-@@ -6803,7 +6803,18 @@ static void ivybridge_init_clock_gating(struct drm_device *dev)
- 
- static void vlv_init_display_clock_gating(struct drm_i915_private *dev_priv)
- {
--      I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE);
-+        u32 val;
-+
-+        /*
-+        * On driver load, a pipe may be active and driving a DSI display.
-+        * Preserve DPOUNIT_CLOCK_GATE_DISABLE to avoid the pipe getting stuck
-+        * (and never recovering) in this case. intel_dsi_post_disable() will
-+        * clear it when we turn off the display.
-+        */
-+        val = I915_READ(DSPCLK_GATE_D);
-+        val &= DPOUNIT_CLOCK_GATE_DISABLE;
-+        val |= VRHUNIT_CLOCK_GATE_DISABLE;
-+        I915_WRITE(DSPCLK_GATE_D, val);
- 
-       /*
-        * Disable trickle feed and enable pnd deadline calculation
--- 
-2.12.2
-
  From 804a935963a91acd1764ba914f825dd2a29c5871 Mon Sep 17 00:00:00 2001
  From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
  Date: Wed, 15 Mar 2017 09:57:56 +0800
  Subject: [PATCH 037/251] Linux 4.4.54
+Status: RO
  Content-Length: 301
  Lines: 18
  
@@ -2065,160 +1183,33 @@ index 582995aaaf4e..f42834c7f007 100644
  -- 
  2.12.2
  
-From 5e45d834f762312e3031a8b6bba3bc2b1f9481ec Mon Sep 17 00:00:00 2001
-From: Arnd Bergmann <arnd@arndb.de>
-Date: Mon, 16 Jan 2017 14:20:54 +0100
-Subject: [PATCH 049/251] cpmac: remove hopeless #warning
-Content-Length: 1108
-Lines: 32
+From 2e4aff2405af6a4573299dee361a44903c9bb717 Mon Sep 17 00:00:00 2001
+From: Ralf Baechle <ralf@linux-mips.org>
+Date: Tue, 20 Sep 2016 14:33:01 +0200
+Subject: [PATCH 051/251] MIPS: DEC: Avoid la pseudo-instruction in delay slots
+Content-Length: 2448
+Lines: 81
+
+commit 3021773c7c3e75e20b693931a19362681e744ea9 upstream.
  
-commit d43e6fb4ac4abfe4ef7c102833ed02330ad701e0 upstream.
+When expanding the la or dla pseudo-instruction in a delay slot the GNU
+assembler will complain should the pseudo-instruction expand to multiple
+actual instructions, since only the first of them will be in the delay
+slot leading to the pseudo-instruction being only partially executed if
+the branch is taken. Use of PTR_LA in the dec int-handler.S leads to
+such warnings:
  
-The #warning was present 10 years ago when the driver first got merged.
-As the platform is rather obsolete by now, it seems very unlikely that
-the warning will cause anyone to fix the code properly.
+  arch/mips/dec/int-handler.S: Assembler messages:
+  arch/mips/dec/int-handler.S:149: Warning: macro instruction expanded into multiple instructions in a branch delay slot
+  arch/mips/dec/int-handler.S:198: Warning: macro instruction expanded into multiple instructions in a branch delay slot
  
-kernelci.org reports the warning for every build in the meantime, so
-I think it's better to just turn it into a code comment to reduce
-noise.
+Avoid this by open coding the PTR_LA macros.
  
-Signed-off-by: Arnd Bergmann <arnd@arndb.de>
-Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
  Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
  ---
- drivers/net/ethernet/ti/cpmac.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c
-index d52ea3008946..7e8bce46e6b4 100644
---- a/drivers/net/ethernet/ti/cpmac.c
-+++ b/drivers/net/ethernet/ti/cpmac.c
-@@ -1237,7 +1237,7 @@ int cpmac_init(void)
-               goto fail_alloc;
-       }
- 
--#warning FIXME: unhardcode gpio&reset bits
-+      /* FIXME: unhardcode gpio&reset bits */
-       ar7_gpio_disable(26);
-       ar7_gpio_disable(27);
-       ar7_device_reset(AR7_RESET_BIT_CPMAC_LO);
--- 
-2.12.2
-
-From 5fad17434465a9e9ddddfb38a162e9e2e53e33a1 Mon Sep 17 00:00:00 2001
-From: Arnd Bergmann <arnd@arndb.de>
-Date: Thu, 25 Aug 2016 15:17:08 -0700
-Subject: [PATCH 050/251] mm: memcontrol: avoid unused function warning
-Content-Length: 2551
-Lines: 79
-
-commit 358c07fcc3b60ab08d77f1684de8bd81bcf49a1a upstream.
-
-A bugfix in v4.8-rc2 introduced a harmless warning when
-CONFIG_MEMCG_SWAP is disabled but CONFIG_MEMCG is enabled:
-
-  mm/memcontrol.c:4085:27: error: 'mem_cgroup_id_get_online' defined but not used [-Werror=unused-function]
-   static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
-
-This moves the function inside of the #ifdef block that hides the
-calling function, to avoid the warning.
-
-Fixes: 1f47b61fb407 ("mm: memcontrol: fix swap counter leak on swapout from offline cgroup")
-Link: http://lkml.kernel.org/r/20160824113733.2776701-1-arnd@arndb.de
-Signed-off-by: Arnd Bergmann <arnd@arndb.de>
-Acked-by: Michal Hocko <mhocko@suse.com>
-Acked-by: Vladimir Davydov <vdavydov@virtuozzo.com>
-Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- mm/memcontrol.c | 36 ++++++++++++++++++------------------
- 1 file changed, 18 insertions(+), 18 deletions(-)
-
-diff --git a/mm/memcontrol.c b/mm/memcontrol.c
-index 43eefe9d834c..e25b93a4267d 100644
---- a/mm/memcontrol.c
-+++ b/mm/memcontrol.c
-@@ -4150,24 +4150,6 @@ static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n)
-       atomic_add(n, &memcg->id.ref);
- }
- 
--static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
--{
--      while (!atomic_inc_not_zero(&memcg->id.ref)) {
--              /*
--               * The root cgroup cannot be destroyed, so it's refcount must
--               * always be >= 1.
--               */
--              if (WARN_ON_ONCE(memcg == root_mem_cgroup)) {
--                      VM_BUG_ON(1);
--                      break;
--              }
--              memcg = parent_mem_cgroup(memcg);
--              if (!memcg)
--                      memcg = root_mem_cgroup;
--      }
--      return memcg;
--}
--
- static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n)
- {
-       if (atomic_sub_and_test(n, &memcg->id.ref)) {
-@@ -5751,6 +5733,24 @@ static int __init mem_cgroup_init(void)
- subsys_initcall(mem_cgroup_init);
- 
- #ifdef CONFIG_MEMCG_SWAP
-+static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
-+{
-+      while (!atomic_inc_not_zero(&memcg->id.ref)) {
-+              /*
-+               * The root cgroup cannot be destroyed, so it's refcount must
-+               * always be >= 1.
-+               */
-+              if (WARN_ON_ONCE(memcg == root_mem_cgroup)) {
-+                      VM_BUG_ON(1);
-+                      break;
-+              }
-+              memcg = parent_mem_cgroup(memcg);
-+              if (!memcg)
-+                      memcg = root_mem_cgroup;
-+      }
-+      return memcg;
-+}
-+
- /**
-  * mem_cgroup_swapout - transfer a memsw charge to swap
-  * @page: page whose memsw charge to transfer
--- 
-2.12.2
-
-From 2e4aff2405af6a4573299dee361a44903c9bb717 Mon Sep 17 00:00:00 2001
-From: Ralf Baechle <ralf@linux-mips.org>
-Date: Tue, 20 Sep 2016 14:33:01 +0200
-Subject: [PATCH 051/251] MIPS: DEC: Avoid la pseudo-instruction in delay slots
-Content-Length: 2448
-Lines: 81
-
-commit 3021773c7c3e75e20b693931a19362681e744ea9 upstream.
-
-When expanding the la or dla pseudo-instruction in a delay slot the GNU
-assembler will complain should the pseudo-instruction expand to multiple
-actual instructions, since only the first of them will be in the delay
-slot leading to the pseudo-instruction being only partially executed if
-the branch is taken. Use of PTR_LA in the dec int-handler.S leads to
-such warnings:
-
-  arch/mips/dec/int-handler.S: Assembler messages:
-  arch/mips/dec/int-handler.S:149: Warning: macro instruction expanded into multiple instructions in a branch delay slot
-  arch/mips/dec/int-handler.S:198: Warning: macro instruction expanded into multiple instructions in a branch delay slot
-
-Avoid this by open coding the PTR_LA macros.
-
-Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- arch/mips/dec/int-handler.S | 40 ++++++++++++++++++++++++++++++++++++++--
- 1 file changed, 38 insertions(+), 2 deletions(-)
+ arch/mips/dec/int-handler.S | 40 ++++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 38 insertions(+), 2 deletions(-)
  
  diff --git a/arch/mips/dec/int-handler.S b/arch/mips/dec/int-handler.S
  index 8c6f508e59de..554d1da97743 100644
@@ -2395,54 +1386,12 @@ index 805355b0bd05..f0cc4c9de2bb 100644
  -- 
  2.12.2
  
-From 074893495b72c043a108797ffd6297db3e4af1dc Mon Sep 17 00:00:00 2001
-From: Rik van Riel <riel@redhat.com>
-Date: Wed, 28 Sep 2016 22:55:54 -0400
-Subject: [PATCH 053/251] tracing: Add #undef to fix compile error
-Content-Length: 1319
-Lines: 35
-
-commit bf7165cfa23695c51998231c4efa080fe1d3548d upstream.
-
-There are several trace include files that define TRACE_INCLUDE_FILE.
-
-Include several of them in the same .c file (as I currently have in
-some code I am working on), and the compile will blow up with a
-"warning: "TRACE_INCLUDE_FILE" redefined #define TRACE_INCLUDE_FILE syscalls"
-
-Every other include file in include/trace/events/ avoids that issue
-by having a #undef TRACE_INCLUDE_FILE before the #define; syscalls.h
-should have one, too.
-
-Link: http://lkml.kernel.org/r/20160928225554.13bd7ac6@annuminas.surriel.com
-
-Fixes: b8007ef74222 ("tracing: Separate raw syscall from syscall tracer")
-Signed-off-by: Rik van Riel <riel@redhat.com>
-Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- include/trace/events/syscalls.h | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h
-index 14e49c798135..b35533b94277 100644
---- a/include/trace/events/syscalls.h
-+++ b/include/trace/events/syscalls.h
-@@ -1,5 +1,6 @@
- #undef TRACE_SYSTEM
- #define TRACE_SYSTEM raw_syscalls
-+#undef TRACE_INCLUDE_FILE
- #define TRACE_INCLUDE_FILE syscalls
- 
- #if !defined(_TRACE_EVENTS_SYSCALLS_H) || defined(TRACE_HEADER_MULTI_READ)
--- 
-2.12.2
-
  From 2ca39d1300152e70977797c3e39c105adfcc0e0b Mon Sep 17 00:00:00 2001
  From: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
  Date: Tue, 14 Feb 2017 14:46:42 +0530
  Subject: [PATCH 054/251] powerpc: Emulation support for load/store
   instructions on LE
+Status: RO
  Content-Length: 3197
  Lines: 106
  
@@ -2558,6 +1507,7 @@ From: Janosch Frank <frankja@linux.vnet.ibm.com>
  Date: Thu, 2 Mar 2017 15:23:42 +0100
  Subject: [PATCH 068/251] KVM: s390: Fix guest migration for huge guests
   resulting in panic
+Status: RO
  Content-Length: 1904
  Lines: 58
  
@@ -2620,123 +1570,6 @@ index 8345ae1f117d..05ae254f84cf 100644
  -- 
  2.12.2
  
-From a084aeef5633db4f649b699785f79676cb71ba6c Mon Sep 17 00:00:00 2001
-From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
-Date: Tue, 7 Feb 2017 18:09:14 +0100
-Subject: [PATCH 069/251] s390/kdump: Use "LINUX" ELF note name instead of
- "CORE"
-Content-Length: 3784
-Lines: 108
-
-commit a4a81d8eebdc1d209d034f62a082a5131e4242b5 upstream.
-
-In binutils/libbfd (bfd/elf.c) it is enforced that all s390 specific ELF
-notes like e.g. NT_S390_PREFIX or NT_S390_CTRS have "LINUX" specified
-as note name. Otherwise the notes are ignored.
-
-For /proc/vmcore we currently use "CORE" for these notes.
-
-Up to now this has not been a real problem because the dump analysis tool
-"crash" does not check the note name. But it will break all programs that
-use libbfd for processing ELF notes.
-
-So fix this and use "LINUX" for all s390 specific notes to comply with
-libbfd.
-
-Reported-by: Philipp Rudo <prudo@linux.vnet.ibm.com>
-Reviewed-by: Philipp Rudo <prudo@linux.vnet.ibm.com>
-Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
-Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- arch/s390/kernel/crash_dump.c | 18 ++++++++++--------
- 1 file changed, 10 insertions(+), 8 deletions(-)
-
-diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
-index 171e09bb8ea2..f7c3a61040bd 100644
---- a/arch/s390/kernel/crash_dump.c
-+++ b/arch/s390/kernel/crash_dump.c
-@@ -23,6 +23,8 @@
- #define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y)))
- #define PTR_DIFF(x, y) ((unsigned long)(((char *) (x)) - ((unsigned long) (y))))
- 
-+#define LINUX_NOTE_NAME "LINUX"
-+
- static struct memblock_region oldmem_region;
- 
- static struct memblock_type oldmem_type = {
-@@ -312,7 +314,7 @@ static void *nt_fpregset(void *ptr, struct save_area *sa)
- static void *nt_s390_timer(void *ptr, struct save_area *sa)
- {
-       return nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer),
--                       KEXEC_CORE_NOTE_NAME);
-+                       LINUX_NOTE_NAME);
- }
- 
- /*
-@@ -321,7 +323,7 @@ static void *nt_s390_timer(void *ptr, struct save_area *sa)
- static void *nt_s390_tod_cmp(void *ptr, struct save_area *sa)
- {
-       return nt_init(ptr, NT_S390_TODCMP, &sa->clk_cmp,
--                     sizeof(sa->clk_cmp), KEXEC_CORE_NOTE_NAME);
-+                     sizeof(sa->clk_cmp), LINUX_NOTE_NAME);
- }
- 
- /*
-@@ -330,7 +332,7 @@ static void *nt_s390_tod_cmp(void *ptr, struct save_area *sa)
- static void *nt_s390_tod_preg(void *ptr, struct save_area *sa)
- {
-       return nt_init(ptr, NT_S390_TODPREG, &sa->tod_reg,
--                     sizeof(sa->tod_reg), KEXEC_CORE_NOTE_NAME);
-+                     sizeof(sa->tod_reg), LINUX_NOTE_NAME);
- }
- 
- /*
-@@ -339,7 +341,7 @@ static void *nt_s390_tod_preg(void *ptr, struct save_area *sa)
- static void *nt_s390_ctrs(void *ptr, struct save_area *sa)
- {
-       return nt_init(ptr, NT_S390_CTRS, &sa->ctrl_regs,
--                     sizeof(sa->ctrl_regs), KEXEC_CORE_NOTE_NAME);
-+                     sizeof(sa->ctrl_regs), LINUX_NOTE_NAME);
- }
- 
- /*
-@@ -348,7 +350,7 @@ static void *nt_s390_ctrs(void *ptr, struct save_area *sa)
- static void *nt_s390_prefix(void *ptr, struct save_area *sa)
- {
-       return nt_init(ptr, NT_S390_PREFIX, &sa->pref_reg,
--                       sizeof(sa->pref_reg), KEXEC_CORE_NOTE_NAME);
-+                       sizeof(sa->pref_reg), LINUX_NOTE_NAME);
- }
- 
- /*
-@@ -357,7 +359,7 @@ static void *nt_s390_prefix(void *ptr, struct save_area *sa)
- static void *nt_s390_vx_high(void *ptr, __vector128 *vx_regs)
- {
-       return nt_init(ptr, NT_S390_VXRS_HIGH, &vx_regs[16],
--                     16 * sizeof(__vector128), KEXEC_CORE_NOTE_NAME);
-+                     16 * sizeof(__vector128), LINUX_NOTE_NAME);
- }
- 
- /*
-@@ -370,12 +372,12 @@ static void *nt_s390_vx_low(void *ptr, __vector128 *vx_regs)
-       int i;
- 
-       note = (Elf64_Nhdr *)ptr;
--      note->n_namesz = strlen(KEXEC_CORE_NOTE_NAME) + 1;
-+      note->n_namesz = strlen(LINUX_NOTE_NAME) + 1;
-       note->n_descsz = 16 * 8;
-       note->n_type = NT_S390_VXRS_LOW;
-       len = sizeof(Elf64_Nhdr);
- 
--      memcpy(ptr + len, KEXEC_CORE_NOTE_NAME, note->n_namesz);
-+      memcpy(ptr + len, LINUX_NOTE_NAME, note->n_namesz);
-       len = roundup(len + note->n_namesz, 4);
- 
-       ptr += len;
--- 
-2.12.2
-
  From 28ec98bc2e4a175b60f45d505e715a33b93dd077 Mon Sep 17 00:00:00 2001
  From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
  Date: Sat, 18 Mar 2017 19:10:23 +0800
@@ -2763,1899 +1596,6 @@ index 7f54ac081cf3..d9cc21df444d 100644
  -- 
  2.12.2
  
-From 0c0be310ba29e4a053e8aac934aebe590c5da909 Mon Sep 17 00:00:00 2001
-From: Florian Westphal <fw@strlen.de>
-Date: Thu, 18 Feb 2016 15:03:24 +0100
-Subject: [PATCH 074/251] netlink: remove mmapped netlink support
-Content-Length: 42335
-Lines: 1432
-
-commit d1b4c689d4130bcfd3532680b64db562300716b6 upstream.
-
-mmapped netlink has a number of unresolved issues:
-
-- TX zerocopy support had to be disabled more than a year ago via
-  commit 4682a0358639b29cf ("netlink: Always copy on mmap TX.")
-  because the content of the mmapped area can change after netlink
-  attribute validation but before message processing.
-
-- RX support was implemented mainly to speed up nfqueue dumping packet
-  payload to userspace.  However, since commit ae08ce0021087a5d812d2
-  ("netfilter: nfnetlink_queue: zero copy support") we avoid one copy
-  with the socket-based interface too (via the skb_zerocopy helper).
-
-The other problem is that skbs attached to mmaped netlink socket
-behave different from normal skbs:
-
-- they don't have a shinfo area, so all functions that use skb_shinfo()
-(e.g. skb_clone) cannot be used.
-
-- reserving headroom prevents userspace from seeing the content as
-it expects message to start at skb->head.
-See for instance
-commit aa3a022094fa ("netlink: not trim skb for mmaped socket when dump").
-
-- skbs handed e.g. to netlink_ack must have non-NULL skb->sk, else we
-crash because it needs the sk to check if a tx ring is attached.
-
-Also not obvious, leads to non-intuitive bug fixes such as 7c7bdf359
-("netfilter: nfnetlink: use original skbuff when acking batches").
-
-mmaped netlink also didn't play nicely with the skb_zerocopy helper
-used by nfqueue and openvswitch.  Daniel Borkmann fixed this via
-commit 6bb0fef489f6 ("netlink, mmap: fix edge-case leakages in nf queue
-zero-copy")' but at the cost of also needing to provide remaining
-length to the allocation function.
-
-nfqueue also has problems when used with mmaped rx netlink:
-- mmaped netlink doesn't allow use of nfqueue batch verdict messages.
-  Problem is that in the mmap case, the allocation time also determines
-  the ordering in which the frame will be seen by userspace (A
-  allocating before B means that A is located in earlier ring slot,
-  but this also means that B might get a lower sequence number then A
-  since seqno is decided later.  To fix this we would need to extend the
-  spinlocked region to also cover the allocation and message setup which
-  isn't desirable.
-- nfqueue can now be configured to queue large (GSO) skbs to userspace.
-  Queing GSO packets is faster than having to force a software segmentation
-  in the kernel, so this is a desirable option.  However, with a mmap based
-  ring one has to use 64kb per ring slot element, else mmap has to fall back
-  to the socket path (NL_MMAP_STATUS_COPY) for all large packets.
-
-To use the mmap interface, userspace not only has to probe for mmap netlink
-support, it also has to implement a recv/socket receive path in order to
-handle messages that exceed the size of an rx ring element.
-
-Cc: Daniel Borkmann <daniel@iogearbox.net>
-Cc: Ken-ichirou MATSUZAWA <chamaken@gmail.com>
-Cc: Pablo Neira Ayuso <pablo@netfilter.org>
-Cc: Patrick McHardy <kaber@trash.net>
-Cc: Thomas Graf <tgraf@suug.ch>
-Signed-off-by: Florian Westphal <fw@strlen.de>
-Signed-off-by: David S. Miller <davem@davemloft.net>
-Cc: Shi Yuejie <shiyuejie@outlook.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- Documentation/networking/netlink_mmap.txt | 332 -------------
- include/uapi/linux/netlink.h              |   4 +
- include/uapi/linux/netlink_diag.h         |   2 +
- net/netlink/Kconfig                       |   9 -
- net/netlink/af_netlink.c                  | 751 +-----------------------------
- net/netlink/af_netlink.h                  |  15 -
- net/netlink/diag.c                        |  39 --
- 7 files changed, 14 insertions(+), 1138 deletions(-)
- delete mode 100644 Documentation/networking/netlink_mmap.txt
-
-diff --git a/Documentation/networking/netlink_mmap.txt b/Documentation/networking/netlink_mmap.txt
-deleted file mode 100644
-index 54f10478e8e3..000000000000
---- a/Documentation/networking/netlink_mmap.txt
-+++ /dev/null
-@@ -1,332 +0,0 @@
--This file documents how to use memory mapped I/O with netlink.
--
--Author: Patrick McHardy <kaber@trash.net>
--
--Overview
----------
--
--Memory mapped netlink I/O can be used to increase throughput and decrease
--overhead of unicast receive and transmit operations. Some netlink subsystems
--require high throughput, these are mainly the netfilter subsystems
--nfnetlink_queue and nfnetlink_log, but it can also help speed up large
--dump operations of f.i. the routing database.
--
--Memory mapped netlink I/O used two circular ring buffers for RX and TX which
--are mapped into the processes address space.
--
--The RX ring is used by the kernel to directly construct netlink messages into
--user-space memory without copying them as done with regular socket I/O,
--additionally as long as the ring contains messages no recvmsg() or poll()
--syscalls have to be issued by user-space to get more message.
--
--The TX ring is used to process messages directly from user-space memory, the
--kernel processes all messages contained in the ring using a single sendmsg()
--call.
--
--Usage overview
----------------
--
--In order to use memory mapped netlink I/O, user-space needs three main changes:
--
--- ring setup
--- conversion of the RX path to get messages from the ring instead of recvmsg()
--- conversion of the TX path to construct messages into the ring
--
--Ring setup is done using setsockopt() to provide the ring parameters to the
--kernel, then a call to mmap() to map the ring into the processes address space:
--
--- setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, &params, sizeof(params));
--- setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, &params, sizeof(params));
--- ring = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0)
--
--Usage of either ring is optional, but even if only the RX ring is used the
--mapping still needs to be writable in order to update the frame status after
--processing.
--
--Conversion of the reception path involves calling poll() on the file
--descriptor, once the socket is readable the frames from the ring are
--processed in order until no more messages are available, as indicated by
--a status word in the frame header.
--
--On kernel side, in order to make use of memory mapped I/O on receive, the
--originating netlink subsystem needs to support memory mapped I/O, otherwise
--it will use an allocated socket buffer as usual and the contents will be
-- copied to the ring on transmission, nullifying most of the performance gains.
--Dumps of kernel databases automatically support memory mapped I/O.
--
--Conversion of the transmit path involves changing message construction to
--use memory from the TX ring instead of (usually) a buffer declared on the
--stack and setting up the frame header appropriately. Optionally poll() can
--be used to wait for free frames in the TX ring.
--
--Structured and definitions for using memory mapped I/O are contained in
--<linux/netlink.h>.
--
--RX and TX rings
------------------
--
--Each ring contains a number of continuous memory blocks, containing frames of
--fixed size dependent on the parameters used for ring setup.
--
--Ring: [ block 0 ]
--              [ frame 0 ]
--              [ frame 1 ]
--      [ block 1 ]
--              [ frame 2 ]
--              [ frame 3 ]
--      ...
--      [ block n ]
--              [ frame 2 * n ]
--              [ frame 2 * n + 1 ]
--
--The blocks are only visible to the kernel, from the point of view of user-space
--the ring just contains the frames in a continuous memory zone.
--
--The ring parameters used for setting up the ring are defined as follows:
--
--struct nl_mmap_req {
--      unsigned int    nm_block_size;
--      unsigned int    nm_block_nr;
--      unsigned int    nm_frame_size;
--      unsigned int    nm_frame_nr;
--};
--
--Frames are grouped into blocks, where each block is a continuous region of memory
--and holds nm_block_size / nm_frame_size frames. The total number of frames in
--the ring is nm_frame_nr. The following invariants hold:
--
--- frames_per_block = nm_block_size / nm_frame_size
--
--- nm_frame_nr = frames_per_block * nm_block_nr
--
--Some parameters are constrained, specifically:
--
--- nm_block_size must be a multiple of the architectures memory page size.
--  The getpagesize() function can be used to get the page size.
--
--- nm_frame_size must be equal or larger to NL_MMAP_HDRLEN, IOW a frame must be
--  able to hold at least the frame header
--
--- nm_frame_size must be smaller or equal to nm_block_size
--
--- nm_frame_size must be a multiple of NL_MMAP_MSG_ALIGNMENT
--
--- nm_frame_nr must equal the actual number of frames as specified above.
--
--When the kernel can't allocate physically continuous memory for a ring block,
--it will fall back to use physically discontinuous memory. This might affect
--performance negatively, in order to avoid this the nm_frame_size parameter
--should be chosen to be as small as possible for the required frame size and
--the number of blocks should be increased instead.
--
--Ring frames
--------------
--
--Each frames contain a frame header, consisting of a synchronization word and some
--meta-data, and the message itself.
--
--Frame:        [ header message ]
--
--The frame header is defined as follows:
--
--struct nl_mmap_hdr {
--      unsigned int    nm_status;
--      unsigned int    nm_len;
--      __u32           nm_group;
--      /* credentials */
--      __u32           nm_pid;
--      __u32           nm_uid;
--      __u32           nm_gid;
--};
--
--- nm_status is used for synchronizing processing between the kernel and user-
--  space and specifies ownership of the frame as well as the operation to perform
--
--- nm_len contains the length of the message contained in the data area
--
--- nm_group specified the destination multicast group of message
--
--- nm_pid, nm_uid and nm_gid contain the netlink pid, UID and GID of the sending
--  process. These values correspond to the data available using SOCK_PASSCRED in
--  the SCM_CREDENTIALS cmsg.
--
--The possible values in the status word are:
--
--- NL_MMAP_STATUS_UNUSED:
--      RX ring:        frame belongs to the kernel and contains no message
--                      for user-space. Approriate action is to invoke poll()
--                      to wait for new messages.
--
--      TX ring:        frame belongs to user-space and can be used for
--                      message construction.
--
--- NL_MMAP_STATUS_RESERVED:
--      RX ring only:   frame is currently used by the kernel for message
--                      construction and contains no valid message yet.
--                      Appropriate action is to invoke poll() to wait for
--                      new messages.
--
--- NL_MMAP_STATUS_VALID:
--      RX ring:        frame contains a valid message. Approriate action is
--                      to process the message and release the frame back to
--                      the kernel by setting the status to
--                      NL_MMAP_STATUS_UNUSED or queue the frame by setting the
--                      status to NL_MMAP_STATUS_SKIP.
--
--      TX ring:        the frame contains a valid message from user-space to
--                      be processed by the kernel. After completing processing
--                      the kernel will release the frame back to user-space by
--                      setting the status to NL_MMAP_STATUS_UNUSED.
--
--- NL_MMAP_STATUS_COPY:
--      RX ring only:   a message is ready to be processed but could not be
--                      stored in the ring, either because it exceeded the
--                      frame size or because the originating subsystem does
--                      not support memory mapped I/O. Appropriate action is
--                      to invoke recvmsg() to receive the message and release
--                      the frame back to the kernel by setting the status to
--                      NL_MMAP_STATUS_UNUSED.
--
--- NL_MMAP_STATUS_SKIP:
--      RX ring only:   user-space queued the message for later processing, but
--                      processed some messages following it in the ring. The
--                      kernel should skip this frame when looking for unused
--                      frames.
--
--The data area of a frame begins at a offset of NL_MMAP_HDRLEN relative to the
--frame header.
--
--TX limitations
----------------
--
--As of Jan 2015 the message is always copied from the ring frame to an
--allocated buffer due to unresolved security concerns.
--See commit 4682a0358639b29cf ("netlink: Always copy on mmap TX.").
--
--Example
---------
--
--Ring setup:
--
--      unsigned int block_size = 16 * getpagesize();
--      struct nl_mmap_req req = {
--              .nm_block_size          = block_size,
--              .nm_block_nr            = 64,
--              .nm_frame_size          = 16384,
--              .nm_frame_nr            = 64 * block_size / 16384,
--      };
--      unsigned int ring_size;
--      void *rx_ring, *tx_ring;
--
--      /* Configure ring parameters */
--      if (setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, &req, sizeof(req)) < 0)
--              exit(1);
--      if (setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, &req, sizeof(req)) < 0)
--              exit(1)
--
--      /* Calculate size of each individual ring */
--      ring_size = req.nm_block_nr * req.nm_block_size;
--
--      /* Map RX/TX rings. The TX ring is located after the RX ring */
--      rx_ring = mmap(NULL, 2 * ring_size, PROT_READ | PROT_WRITE,
--                     MAP_SHARED, fd, 0);
--      if ((long)rx_ring == -1L)
--              exit(1);
--      tx_ring = rx_ring + ring_size:
--
--Message reception:
--
--This example assumes some ring parameters of the ring setup are available.
--
--      unsigned int frame_offset = 0;
--      struct nl_mmap_hdr *hdr;
--      struct nlmsghdr *nlh;
--      unsigned char buf[16384];
--      ssize_t len;
--
--      while (1) {
--              struct pollfd pfds[1];
--
--              pfds[0].fd      = fd;
--              pfds[0].events  = POLLIN | POLLERR;
--              pfds[0].revents = 0;
--
--              if (poll(pfds, 1, -1) < 0 && errno != -EINTR)
--                      exit(1);
--
--              /* Check for errors. Error handling omitted */
--              if (pfds[0].revents & POLLERR)
--                      <handle error>
--
--              /* If no new messages, poll again */
--              if (!(pfds[0].revents & POLLIN))
--                      continue;
--
--              /* Process all frames */
--              while (1) {
--                      /* Get next frame header */
--                      hdr = rx_ring + frame_offset;
--
--                      if (hdr->nm_status == NL_MMAP_STATUS_VALID) {
--                              /* Regular memory mapped frame */
--                              nlh = (void *)hdr + NL_MMAP_HDRLEN;
--                              len = hdr->nm_len;
--
--                              /* Release empty message immediately. May happen
--                               * on error during message construction.
--                               */
--                              if (len == 0)
--                                      goto release;
--                      } else if (hdr->nm_status == NL_MMAP_STATUS_COPY) {
--                              /* Frame queued to socket receive queue */
--                              len = recv(fd, buf, sizeof(buf), MSG_DONTWAIT);
--                              if (len <= 0)
--                                      break;
--                              nlh = buf;
--                      } else
--                              /* No more messages to process, continue polling */
--                              break;
--
--                      process_msg(nlh);
--release:
--                      /* Release frame back to the kernel */
--                      hdr->nm_status = NL_MMAP_STATUS_UNUSED;
--
--                      /* Advance frame offset to next frame */
--                      frame_offset = (frame_offset + frame_size) % ring_size;
--              }
--      }
--
--Message transmission:
--
--This example assumes some ring parameters of the ring setup are available.
--A single message is constructed and transmitted, to send multiple messages
--at once they would be constructed in consecutive frames before a final call
--to sendto().
--
--      unsigned int frame_offset = 0;
--      struct nl_mmap_hdr *hdr;
--      struct nlmsghdr *nlh;
--      struct sockaddr_nl addr = {
--              .nl_family      = AF_NETLINK,
--      };
--
--      hdr = tx_ring + frame_offset;
--      if (hdr->nm_status != NL_MMAP_STATUS_UNUSED)
--              /* No frame available. Use poll() to avoid. */
--              exit(1);
--
--      nlh = (void *)hdr + NL_MMAP_HDRLEN;
--
--      /* Build message */
--      build_message(nlh);
--
--      /* Fill frame header: length and status need to be set */
--      hdr->nm_len     = nlh->nlmsg_len;
--      hdr->nm_status  = NL_MMAP_STATUS_VALID;
--
--      if (sendto(fd, NULL, 0, 0, &addr, sizeof(addr)) < 0)
--              exit(1);
--
--      /* Advance frame offset to next frame */
--      frame_offset = (frame_offset + frame_size) % ring_size;
-diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
-index f095155d8749..0dba4e4ed2be 100644
---- a/include/uapi/linux/netlink.h
-+++ b/include/uapi/linux/netlink.h
-@@ -107,8 +107,10 @@ struct nlmsgerr {
- #define NETLINK_PKTINFO                       3
- #define NETLINK_BROADCAST_ERROR               4
- #define NETLINK_NO_ENOBUFS            5
-+#ifndef __KERNEL__
- #define NETLINK_RX_RING                       6
- #define NETLINK_TX_RING                       7
-+#endif
- #define NETLINK_LISTEN_ALL_NSID               8
- #define NETLINK_LIST_MEMBERSHIPS      9
- #define NETLINK_CAP_ACK                       10
-@@ -134,6 +136,7 @@ struct nl_mmap_hdr {
-       __u32           nm_gid;
- };
- 
-+#ifndef __KERNEL__
- enum nl_mmap_status {
-       NL_MMAP_STATUS_UNUSED,
-       NL_MMAP_STATUS_RESERVED,
-@@ -145,6 +148,7 @@ enum nl_mmap_status {
- #define NL_MMAP_MSG_ALIGNMENT         NLMSG_ALIGNTO
- #define NL_MMAP_MSG_ALIGN(sz)         __ALIGN_KERNEL(sz, NL_MMAP_MSG_ALIGNMENT)
- #define NL_MMAP_HDRLEN                        NL_MMAP_MSG_ALIGN(sizeof(struct nl_mmap_hdr))
-+#endif
- 
- #define NET_MAJOR 36          /* Major 36 is reserved for networking                                          */
- 
-diff --git a/include/uapi/linux/netlink_diag.h b/include/uapi/linux/netlink_diag.h
-index f2159d30d1f5..d79399394b46 100644
---- a/include/uapi/linux/netlink_diag.h
-+++ b/include/uapi/linux/netlink_diag.h
-@@ -48,6 +48,8 @@ enum {
- 
- #define NDIAG_SHOW_MEMINFO    0x00000001 /* show memory info of a socket */
- #define NDIAG_SHOW_GROUPS     0x00000002 /* show groups of a netlink socket */
-+#ifndef __KERNEL__
- #define NDIAG_SHOW_RING_CFG   0x00000004 /* show ring configuration */
-+#endif
- 
- #endif
-diff --git a/net/netlink/Kconfig b/net/netlink/Kconfig
-index 2c5e95e9bfbd..5d6e8c05b3d4 100644
---- a/net/netlink/Kconfig
-+++ b/net/netlink/Kconfig
-@@ -2,15 +2,6 @@
- # Netlink Sockets
- #
- 
--config NETLINK_MMAP
--      bool "NETLINK: mmaped IO"
--      ---help---
--        This option enables support for memory mapped netlink IO. This
--        reduces overhead by avoiding copying data between kernel- and
--        userspace.
--
--        If unsure, say N.
--
- config NETLINK_DIAG
-       tristate "NETLINK: socket monitoring interface"
-       default n
-diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
-index 360700a2f46c..8e33019d8e7b 100644
---- a/net/netlink/af_netlink.c
-+++ b/net/netlink/af_netlink.c
-@@ -225,7 +225,7 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb,
- 
-       dev_hold(dev);
- 
--      if (netlink_skb_is_mmaped(skb) || is_vmalloc_addr(skb->head))
-+      if (is_vmalloc_addr(skb->head))
-               nskb = netlink_to_full_skb(skb, GFP_ATOMIC);
-       else
-               nskb = skb_clone(skb, GFP_ATOMIC);
-@@ -300,610 +300,8 @@ static void netlink_rcv_wake(struct sock *sk)
-               wake_up_interruptible(&nlk->wait);
- }
- 
--#ifdef CONFIG_NETLINK_MMAP
--static bool netlink_rx_is_mmaped(struct sock *sk)
--{
--      return nlk_sk(sk)->rx_ring.pg_vec != NULL;
--}
--
--static bool netlink_tx_is_mmaped(struct sock *sk)
--{
--      return nlk_sk(sk)->tx_ring.pg_vec != NULL;
--}
--
--static __pure struct page *pgvec_to_page(const void *addr)
--{
--      if (is_vmalloc_addr(addr))
--              return vmalloc_to_page(addr);
--      else
--              return virt_to_page(addr);
--}
--
--static void free_pg_vec(void **pg_vec, unsigned int order, unsigned int len)
--{
--      unsigned int i;
--
--      for (i = 0; i < len; i++) {
--              if (pg_vec[i] != NULL) {
--                      if (is_vmalloc_addr(pg_vec[i]))
--                              vfree(pg_vec[i]);
--                      else
--                              free_pages((unsigned long)pg_vec[i], order);
--              }
--      }
--      kfree(pg_vec);
--}
--
--static void *alloc_one_pg_vec_page(unsigned long order)
--{
--      void *buffer;
--      gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO |
--                        __GFP_NOWARN | __GFP_NORETRY;
--
--      buffer = (void *)__get_free_pages(gfp_flags, order);
--      if (buffer != NULL)
--              return buffer;
--
--      buffer = vzalloc((1 << order) * PAGE_SIZE);
--      if (buffer != NULL)
--              return buffer;
--
--      gfp_flags &= ~__GFP_NORETRY;
--      return (void *)__get_free_pages(gfp_flags, order);
--}
--
--static void **alloc_pg_vec(struct netlink_sock *nlk,
--                         struct nl_mmap_req *req, unsigned int order)
--{
--      unsigned int block_nr = req->nm_block_nr;
--      unsigned int i;
--      void **pg_vec;
--
--      pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL);
--      if (pg_vec == NULL)
--              return NULL;
--
--      for (i = 0; i < block_nr; i++) {
--              pg_vec[i] = alloc_one_pg_vec_page(order);
--              if (pg_vec[i] == NULL)
--                      goto err1;
--      }
--
--      return pg_vec;
--err1:
--      free_pg_vec(pg_vec, order, block_nr);
--      return NULL;
--}
--
--
--static void
--__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec,
--                 unsigned int order)
--{
--      struct netlink_sock *nlk = nlk_sk(sk);
--      struct sk_buff_head *queue;
--      struct netlink_ring *ring;
--
--      queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
--      ring  = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
--
--      spin_lock_bh(&queue->lock);
--
--      ring->frame_max         = req->nm_frame_nr - 1;
--      ring->head              = 0;
--      ring->frame_size        = req->nm_frame_size;
--      ring->pg_vec_pages      = req->nm_block_size / PAGE_SIZE;
--
--      swap(ring->pg_vec_len, req->nm_block_nr);
--      swap(ring->pg_vec_order, order);
--      swap(ring->pg_vec, pg_vec);
--
--      __skb_queue_purge(queue);
--      spin_unlock_bh(&queue->lock);
--
--      WARN_ON(atomic_read(&nlk->mapped));
--
--      if (pg_vec)
--              free_pg_vec(pg_vec, order, req->nm_block_nr);
--}
--
--static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
--                          bool tx_ring)
--{
--      struct netlink_sock *nlk = nlk_sk(sk);
--      struct netlink_ring *ring;
--      void **pg_vec = NULL;
--      unsigned int order = 0;
--
--      ring  = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
--
--      if (atomic_read(&nlk->mapped))
--              return -EBUSY;
--      if (atomic_read(&ring->pending))
--              return -EBUSY;
--
--      if (req->nm_block_nr) {
--              if (ring->pg_vec != NULL)
--                      return -EBUSY;
--
--              if ((int)req->nm_block_size <= 0)
--                      return -EINVAL;
--              if (!PAGE_ALIGNED(req->nm_block_size))
--                      return -EINVAL;
--              if (req->nm_frame_size < NL_MMAP_HDRLEN)
--                      return -EINVAL;
--              if (!IS_ALIGNED(req->nm_frame_size, NL_MMAP_MSG_ALIGNMENT))
--                      return -EINVAL;
--
--              ring->frames_per_block = req->nm_block_size /
--                                       req->nm_frame_size;
--              if (ring->frames_per_block == 0)
--                      return -EINVAL;
--              if (ring->frames_per_block * req->nm_block_nr !=
--                  req->nm_frame_nr)
--                      return -EINVAL;
--
--              order = get_order(req->nm_block_size);
--              pg_vec = alloc_pg_vec(nlk, req, order);
--              if (pg_vec == NULL)
--                      return -ENOMEM;
--      } else {
--              if (req->nm_frame_nr)
--                      return -EINVAL;
--      }
--
--      mutex_lock(&nlk->pg_vec_lock);
--      if (atomic_read(&nlk->mapped) == 0) {
--              __netlink_set_ring(sk, req, tx_ring, pg_vec, order);
--              mutex_unlock(&nlk->pg_vec_lock);
--              return 0;
--      }
--
--      mutex_unlock(&nlk->pg_vec_lock);
--
--      if (pg_vec)
--              free_pg_vec(pg_vec, order, req->nm_block_nr);
--
--      return -EBUSY;
--}
--
--static void netlink_mm_open(struct vm_area_struct *vma)
--{
--      struct file *file = vma->vm_file;
--      struct socket *sock = file->private_data;
--      struct sock *sk = sock->sk;
--
--      if (sk)
--              atomic_inc(&nlk_sk(sk)->mapped);
--}
--
--static void netlink_mm_close(struct vm_area_struct *vma)
--{
--      struct file *file = vma->vm_file;
--      struct socket *sock = file->private_data;
--      struct sock *sk = sock->sk;
--
--      if (sk)
--              atomic_dec(&nlk_sk(sk)->mapped);
--}
--
--static const struct vm_operations_struct netlink_mmap_ops = {
--      .open   = netlink_mm_open,
--      .close  = netlink_mm_close,
--};
--
--static int netlink_mmap(struct file *file, struct socket *sock,
--                      struct vm_area_struct *vma)
--{
--      struct sock *sk = sock->sk;
--      struct netlink_sock *nlk = nlk_sk(sk);
--      struct netlink_ring *ring;
--      unsigned long start, size, expected;
--      unsigned int i;
--      int err = -EINVAL;
--
--      if (vma->vm_pgoff)
--              return -EINVAL;
--
--      mutex_lock(&nlk->pg_vec_lock);
--
--      expected = 0;
--      for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
--              if (ring->pg_vec == NULL)
--                      continue;
--              expected += ring->pg_vec_len * ring->pg_vec_pages * PAGE_SIZE;
--      }
--
--      if (expected == 0)
--              goto out;
--
--      size = vma->vm_end - vma->vm_start;
--      if (size != expected)
--              goto out;
--
--      start = vma->vm_start;
--      for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
--              if (ring->pg_vec == NULL)
--                      continue;
--
--              for (i = 0; i < ring->pg_vec_len; i++) {
--                      struct page *page;
--                      void *kaddr = ring->pg_vec[i];
--                      unsigned int pg_num;
--
--                      for (pg_num = 0; pg_num < ring->pg_vec_pages; pg_num++) {
--                              page = pgvec_to_page(kaddr);
--                              err = vm_insert_page(vma, start, page);
--                              if (err < 0)
--                                      goto out;
--                              start += PAGE_SIZE;
--                              kaddr += PAGE_SIZE;
--                      }
--              }
--      }
--
--      atomic_inc(&nlk->mapped);
--      vma->vm_ops = &netlink_mmap_ops;
--      err = 0;
--out:
--      mutex_unlock(&nlk->pg_vec_lock);
--      return err;
--}
--
--static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr, unsigned int nm_len)
--{
--#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
--      struct page *p_start, *p_end;
--
--      /* First page is flushed through netlink_{get,set}_status */
--      p_start = pgvec_to_page(hdr + PAGE_SIZE);
--      p_end   = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + nm_len - 1);
--      while (p_start <= p_end) {
--              flush_dcache_page(p_start);
--              p_start++;
--      }
--#endif
--}
--
--static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr)
--{
--      smp_rmb();
--      flush_dcache_page(pgvec_to_page(hdr));
--      return hdr->nm_status;
--}
--
--static void netlink_set_status(struct nl_mmap_hdr *hdr,
--                             enum nl_mmap_status status)
--{
--      smp_mb();
--      hdr->nm_status = status;
--      flush_dcache_page(pgvec_to_page(hdr));
--}
--
--static struct nl_mmap_hdr *
--__netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos)
--{
--      unsigned int pg_vec_pos, frame_off;
--
--      pg_vec_pos = pos / ring->frames_per_block;
--      frame_off  = pos % ring->frames_per_block;
--
--      return ring->pg_vec[pg_vec_pos] + (frame_off * ring->frame_size);
--}
--
--static struct nl_mmap_hdr *
--netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos,
--                   enum nl_mmap_status status)
--{
--      struct nl_mmap_hdr *hdr;
--
--      hdr = __netlink_lookup_frame(ring, pos);
--      if (netlink_get_status(hdr) != status)
--              return NULL;
--
--      return hdr;
--}
--
--static struct nl_mmap_hdr *
--netlink_current_frame(const struct netlink_ring *ring,
--                    enum nl_mmap_status status)
--{
--      return netlink_lookup_frame(ring, ring->head, status);
--}
--
--static void netlink_increment_head(struct netlink_ring *ring)
--{
--      ring->head = ring->head != ring->frame_max ? ring->head + 1 : 0;
--}
--
--static void netlink_forward_ring(struct netlink_ring *ring)
--{
--      unsigned int head = ring->head;
--      const struct nl_mmap_hdr *hdr;
--
--      do {
--              hdr = __netlink_lookup_frame(ring, ring->head);
--              if (hdr->nm_status == NL_MMAP_STATUS_UNUSED)
--                      break;
--              if (hdr->nm_status != NL_MMAP_STATUS_SKIP)
--                      break;
--              netlink_increment_head(ring);
--      } while (ring->head != head);
--}
--
--static bool netlink_has_valid_frame(struct netlink_ring *ring)
--{
--      unsigned int head = ring->head, pos = head;
--      const struct nl_mmap_hdr *hdr;
--
--      do {
--              hdr = __netlink_lookup_frame(ring, pos);
--              if (hdr->nm_status == NL_MMAP_STATUS_VALID)
--                      return true;
--              pos = pos != 0 ? pos - 1 : ring->frame_max;
--      } while (pos != head);
--
--      return false;
--}
--
--static bool netlink_dump_space(struct netlink_sock *nlk)
--{
--      struct netlink_ring *ring = &nlk->rx_ring;
--      struct nl_mmap_hdr *hdr;
--      unsigned int n;
--
--      hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
--      if (hdr == NULL)
--              return false;
--
--      n = ring->head + ring->frame_max / 2;
--      if (n > ring->frame_max)
--              n -= ring->frame_max;
--
--      hdr = __netlink_lookup_frame(ring, n);
--
--      return hdr->nm_status == NL_MMAP_STATUS_UNUSED;
--}
--
--static unsigned int netlink_poll(struct file *file, struct socket *sock,
--                               poll_table *wait)
--{
--      struct sock *sk = sock->sk;
--      struct netlink_sock *nlk = nlk_sk(sk);
--      unsigned int mask;
--      int err;
--
--      if (nlk->rx_ring.pg_vec != NULL) {
--              /* Memory mapped sockets don't call recvmsg(), so flow control
--               * for dumps is performed here. A dump is allowed to continue
--               * if at least half the ring is unused.
--               */
--              while (nlk->cb_running && netlink_dump_space(nlk)) {
--                      err = netlink_dump(sk);
--                      if (err < 0) {
--                              sk->sk_err = -err;
--                              sk->sk_error_report(sk);
--                              break;
--                      }
--              }
--              netlink_rcv_wake(sk);
--      }
--
--      mask = datagram_poll(file, sock, wait);
--
--      /* We could already have received frames in the normal receive
--       * queue, that will show up as NL_MMAP_STATUS_COPY in the ring,
--       * so if mask contains pollin/etc already, there's no point
--       * walking the ring.
--       */
--      if ((mask & (POLLIN | POLLRDNORM)) != (POLLIN | POLLRDNORM)) {
--              spin_lock_bh(&sk->sk_receive_queue.lock);
--              if (nlk->rx_ring.pg_vec) {
--                      if (netlink_has_valid_frame(&nlk->rx_ring))
--                              mask |= POLLIN | POLLRDNORM;
--              }
--              spin_unlock_bh(&sk->sk_receive_queue.lock);
--      }
--
--      spin_lock_bh(&sk->sk_write_queue.lock);
--      if (nlk->tx_ring.pg_vec) {
--              if (netlink_current_frame(&nlk->tx_ring, NL_MMAP_STATUS_UNUSED))
--                      mask |= POLLOUT | POLLWRNORM;
--      }
--      spin_unlock_bh(&sk->sk_write_queue.lock);
--
--      return mask;
--}
--
--static struct nl_mmap_hdr *netlink_mmap_hdr(struct sk_buff *skb)
--{
--      return (struct nl_mmap_hdr *)(skb->head - NL_MMAP_HDRLEN);
--}
--
--static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk,
--                                 struct netlink_ring *ring,
--                                 struct nl_mmap_hdr *hdr)
--{
--      unsigned int size;
--      void *data;
--
--      size = ring->frame_size - NL_MMAP_HDRLEN;
--      data = (void *)hdr + NL_MMAP_HDRLEN;
--
--      skb->head       = data;
--      skb->data       = data;
--      skb_reset_tail_pointer(skb);
--      skb->end        = skb->tail + size;
--      skb->len        = 0;
--
--      skb->destructor = netlink_skb_destructor;
--      NETLINK_CB(skb).flags |= NETLINK_SKB_MMAPED;
--      NETLINK_CB(skb).sk = sk;
--}
--
--static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg,
--                              u32 dst_portid, u32 dst_group,
--                              struct scm_cookie *scm)
--{
--      struct netlink_sock *nlk = nlk_sk(sk);
--      struct netlink_ring *ring;
--      struct nl_mmap_hdr *hdr;
--      struct sk_buff *skb;
--      unsigned int maxlen;
--      int err = 0, len = 0;
--
--      mutex_lock(&nlk->pg_vec_lock);
--
--      ring   = &nlk->tx_ring;
--      maxlen = ring->frame_size - NL_MMAP_HDRLEN;
--
--      do {
--              unsigned int nm_len;
--
--              hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID);
--              if (hdr == NULL) {
--                      if (!(msg->msg_flags & MSG_DONTWAIT) &&
--                          atomic_read(&nlk->tx_ring.pending))
--                              schedule();
--                      continue;
--              }
--
--              nm_len = ACCESS_ONCE(hdr->nm_len);
--              if (nm_len > maxlen) {
--                      err = -EINVAL;
--                      goto out;
--              }
--
--              netlink_frame_flush_dcache(hdr, nm_len);
--
--              skb = alloc_skb(nm_len, GFP_KERNEL);
--              if (skb == NULL) {
--                      err = -ENOBUFS;
--                      goto out;
--              }
--              __skb_put(skb, nm_len);
--              memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, nm_len);
--              netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
--
--              netlink_increment_head(ring);
--
--              NETLINK_CB(skb).portid    = nlk->portid;
--              NETLINK_CB(skb).dst_group = dst_group;
--              NETLINK_CB(skb).creds     = scm->creds;
--
--              err = security_netlink_send(sk, skb);
--              if (err) {
--                      kfree_skb(skb);
--                      goto out;
--              }
--
--              if (unlikely(dst_group)) {
--                      atomic_inc(&skb->users);
--                      netlink_broadcast(sk, skb, dst_portid, dst_group,
--                                        GFP_KERNEL);
--              }
--              err = netlink_unicast(sk, skb, dst_portid,
--                                    msg->msg_flags & MSG_DONTWAIT);
--              if (err < 0)
--                      goto out;
--              len += err;
--
--      } while (hdr != NULL ||
--               (!(msg->msg_flags & MSG_DONTWAIT) &&
--                atomic_read(&nlk->tx_ring.pending)));
--
--      if (len > 0)
--              err = len;
--out:
--      mutex_unlock(&nlk->pg_vec_lock);
--      return err;
--}
--
--static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb)
--{
--      struct nl_mmap_hdr *hdr;
--
--      hdr = netlink_mmap_hdr(skb);
--      hdr->nm_len     = skb->len;
--      hdr->nm_group   = NETLINK_CB(skb).dst_group;
--      hdr->nm_pid     = NETLINK_CB(skb).creds.pid;
--      hdr->nm_uid     = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
--      hdr->nm_gid     = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
--      netlink_frame_flush_dcache(hdr, hdr->nm_len);
--      netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
--
--      NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED;
--      kfree_skb(skb);
--}
--
--static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb)
--{
--      struct netlink_sock *nlk = nlk_sk(sk);
--      struct netlink_ring *ring = &nlk->rx_ring;
--      struct nl_mmap_hdr *hdr;
--
--      spin_lock_bh(&sk->sk_receive_queue.lock);
--      hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
--      if (hdr == NULL) {
--              spin_unlock_bh(&sk->sk_receive_queue.lock);
--              kfree_skb(skb);
--              netlink_overrun(sk);
--              return;
--      }
--      netlink_increment_head(ring);
--      __skb_queue_tail(&sk->sk_receive_queue, skb);
--      spin_unlock_bh(&sk->sk_receive_queue.lock);
--
--      hdr->nm_len     = skb->len;
--      hdr->nm_group   = NETLINK_CB(skb).dst_group;
--      hdr->nm_pid     = NETLINK_CB(skb).creds.pid;
--      hdr->nm_uid     = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
--      hdr->nm_gid     = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
--      netlink_set_status(hdr, NL_MMAP_STATUS_COPY);
--}
--
--#else /* CONFIG_NETLINK_MMAP */
--#define netlink_rx_is_mmaped(sk)      false
--#define netlink_tx_is_mmaped(sk)      false
--#define netlink_mmap                  sock_no_mmap
--#define netlink_poll                  datagram_poll
--#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, scm)     0
--#endif /* CONFIG_NETLINK_MMAP */
--
- static void netlink_skb_destructor(struct sk_buff *skb)
- {
--#ifdef CONFIG_NETLINK_MMAP
--      struct nl_mmap_hdr *hdr;
--      struct netlink_ring *ring;
--      struct sock *sk;
--
--      /* If a packet from the kernel to userspace was freed because of an
--       * error without being delivered to userspace, the kernel must reset
--       * the status. In the direction userspace to kernel, the status is
--       * always reset here after the packet was processed and freed.
--       */
--      if (netlink_skb_is_mmaped(skb)) {
--              hdr = netlink_mmap_hdr(skb);
--              sk = NETLINK_CB(skb).sk;
--
--              if (NETLINK_CB(skb).flags & NETLINK_SKB_TX) {
--                      netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
--                      ring = &nlk_sk(sk)->tx_ring;
--              } else {
--                      if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) {
--                              hdr->nm_len = 0;
--                              netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
--                      }
--                      ring = &nlk_sk(sk)->rx_ring;
--              }
--
--              WARN_ON(atomic_read(&ring->pending) == 0);
--              atomic_dec(&ring->pending);
--              sock_put(sk);
--
--              skb->head = NULL;
--      }
--#endif
-       if (is_vmalloc_addr(skb->head)) {
-               if (!skb->cloned ||
-                   !atomic_dec_return(&(skb_shinfo(skb)->dataref)))
-@@ -936,18 +334,6 @@ static void netlink_sock_destruct(struct sock *sk)
-       }
- 
-       skb_queue_purge(&sk->sk_receive_queue);
--#ifdef CONFIG_NETLINK_MMAP
--      if (1) {
--              struct nl_mmap_req req;
--
--              memset(&req, 0, sizeof(req));
--              if (nlk->rx_ring.pg_vec)
--                      __netlink_set_ring(sk, &req, false, NULL, 0);
--              memset(&req, 0, sizeof(req));
--              if (nlk->tx_ring.pg_vec)
--                      __netlink_set_ring(sk, &req, true, NULL, 0);
--      }
--#endif /* CONFIG_NETLINK_MMAP */
- 
-       if (!sock_flag(sk, SOCK_DEAD)) {
-               printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
-@@ -1201,9 +587,6 @@ static int __netlink_create(struct net *net, struct socket *sock,
-               mutex_init(nlk->cb_mutex);
-       }
-       init_waitqueue_head(&nlk->wait);
--#ifdef CONFIG_NETLINK_MMAP
--      mutex_init(&nlk->pg_vec_lock);
--#endif
- 
-       sk->sk_destruct = netlink_sock_destruct;
-       sk->sk_protocol = protocol;
-@@ -1745,8 +1128,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
-       nlk = nlk_sk(sk);
- 
-       if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
--           test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
--          !netlink_skb_is_mmaped(skb)) {
-+           test_bit(NETLINK_S_CONGESTED, &nlk->state))) {
-               DECLARE_WAITQUEUE(wait, current);
-               if (!*timeo) {
-                       if (!ssk || netlink_is_kernel(ssk))
-@@ -1784,14 +1166,7 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
- 
-       netlink_deliver_tap(skb);
- 
--#ifdef CONFIG_NETLINK_MMAP
--      if (netlink_skb_is_mmaped(skb))
--              netlink_queue_mmaped_skb(sk, skb);
--      else if (netlink_rx_is_mmaped(sk))
--              netlink_ring_set_copied(sk, skb);
--      else
--#endif /* CONFIG_NETLINK_MMAP */
--              skb_queue_tail(&sk->sk_receive_queue, skb);
-+      skb_queue_tail(&sk->sk_receive_queue, skb);
-       sk->sk_data_ready(sk);
-       return len;
- }
-@@ -1815,9 +1190,6 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
-       int delta;
- 
-       WARN_ON(skb->sk != NULL);
--      if (netlink_skb_is_mmaped(skb))
--              return skb;
--
-       delta = skb->end - skb->tail;
-       if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize)
-               return skb;
-@@ -1897,71 +1269,6 @@ struct sk_buff *__netlink_alloc_skb(struct sock *ssk, unsigned int size,
-                                   unsigned int ldiff, u32 dst_portid,
-                                   gfp_t gfp_mask)
- {
--#ifdef CONFIG_NETLINK_MMAP
--      unsigned int maxlen, linear_size;
--      struct sock *sk = NULL;
--      struct sk_buff *skb;
--      struct netlink_ring *ring;
--      struct nl_mmap_hdr *hdr;
--
--      sk = netlink_getsockbyportid(ssk, dst_portid);
--      if (IS_ERR(sk))
--              goto out;
--
--      ring = &nlk_sk(sk)->rx_ring;
--      /* fast-path without atomic ops for common case: non-mmaped receiver */
--      if (ring->pg_vec == NULL)
--              goto out_put;
--
--      /* We need to account the full linear size needed as a ring
--       * slot cannot have non-linear parts.
--       */
--      linear_size = size + ldiff;
--      if (ring->frame_size - NL_MMAP_HDRLEN < linear_size)
--              goto out_put;
--
--      skb = alloc_skb_head(gfp_mask);
--      if (skb == NULL)
--              goto err1;
--
--      spin_lock_bh(&sk->sk_receive_queue.lock);
--      /* check again under lock */
--      if (ring->pg_vec == NULL)
--              goto out_free;
--
--      /* check again under lock */
--      maxlen = ring->frame_size - NL_MMAP_HDRLEN;
--      if (maxlen < linear_size)
--              goto out_free;
--
--      netlink_forward_ring(ring);
--      hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
--      if (hdr == NULL)
--              goto err2;
--
--      netlink_ring_setup_skb(skb, sk, ring, hdr);
--      netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
--      atomic_inc(&ring->pending);
--      netlink_increment_head(ring);
--
--      spin_unlock_bh(&sk->sk_receive_queue.lock);
--      return skb;
--
--err2:
--      kfree_skb(skb);
--      spin_unlock_bh(&sk->sk_receive_queue.lock);
--      netlink_overrun(sk);
--err1:
--      sock_put(sk);
--      return NULL;
--
--out_free:
--      kfree_skb(skb);
--      spin_unlock_bh(&sk->sk_receive_queue.lock);
--out_put:
--      sock_put(sk);
--out:
--#endif
-       return alloc_skb(size, gfp_mask);
- }
- EXPORT_SYMBOL_GPL(__netlink_alloc_skb);
-@@ -2242,8 +1549,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
-       if (level != SOL_NETLINK)
-               return -ENOPROTOOPT;
- 
--      if (optname != NETLINK_RX_RING && optname != NETLINK_TX_RING &&
--          optlen >= sizeof(int) &&
-+      if (optlen >= sizeof(int) &&
-           get_user(val, (unsigned int __user *)optval))
-               return -EFAULT;
- 
-@@ -2296,25 +1602,6 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
-               }
-               err = 0;
-               break;
--#ifdef CONFIG_NETLINK_MMAP
--      case NETLINK_RX_RING:
--      case NETLINK_TX_RING: {
--              struct nl_mmap_req req;
--
--              /* Rings might consume more memory than queue limits, require
--               * CAP_NET_ADMIN.
--               */
--              if (!capable(CAP_NET_ADMIN))
--                      return -EPERM;
--              if (optlen < sizeof(req))
--                      return -EINVAL;
--              if (copy_from_user(&req, optval, sizeof(req)))
--                      return -EFAULT;
--              err = netlink_set_ring(sk, &req,
--                                     optname == NETLINK_TX_RING);
--              break;
--      }
--#endif /* CONFIG_NETLINK_MMAP */
-       case NETLINK_LISTEN_ALL_NSID:
-               if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST))
-                       return -EPERM;
-@@ -2484,18 +1771,6 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
-               smp_rmb();
-       }
- 
--      /* It's a really convoluted way for userland to ask for mmaped
--       * sendmsg(), but that's what we've got...
--       */
--      if (netlink_tx_is_mmaped(sk) &&
--          iter_is_iovec(&msg->msg_iter) &&
--          msg->msg_iter.nr_segs == 1 &&
--          msg->msg_iter.iov->iov_base == NULL) {
--              err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group,
--                                         &scm);
--              goto out;
--      }
--
-       err = -EMSGSIZE;
-       if (len > sk->sk_sndbuf - 32)
-               goto out;
-@@ -2812,8 +2087,7 @@ static int netlink_dump(struct sock *sk)
-               goto errout_skb;
-       }
- 
--      if (!netlink_rx_is_mmaped(sk) &&
--          atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
-+      if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
-               goto errout_skb;
- 
-       /* NLMSG_GOODSIZE is small to avoid high order allocations being
-@@ -2902,16 +2176,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
-       struct netlink_sock *nlk;
-       int ret;
- 
--      /* Memory mapped dump requests need to be copied to avoid looping
--       * on the pending state in netlink_mmap_sendmsg() while the CB hold
--       * a reference to the skb.
--       */
--      if (netlink_skb_is_mmaped(skb)) {
--              skb = skb_copy(skb, GFP_KERNEL);
--              if (skb == NULL)
--                      return -ENOBUFS;
--      } else
--              atomic_inc(&skb->users);
-+      atomic_inc(&skb->users);
- 
-       sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid);
-       if (sk == NULL) {
-@@ -3255,7 +2520,7 @@ static const struct proto_ops netlink_ops = {
-       .socketpair =   sock_no_socketpair,
-       .accept =       sock_no_accept,
-       .getname =      netlink_getname,
--      .poll =         netlink_poll,
-+      .poll =         datagram_poll,
-       .ioctl =        sock_no_ioctl,
-       .listen =       sock_no_listen,
-       .shutdown =     sock_no_shutdown,
-@@ -3263,7 +2528,7 @@ static const struct proto_ops netlink_ops = {
-       .getsockopt =   netlink_getsockopt,
-       .sendmsg =      netlink_sendmsg,
-       .recvmsg =      netlink_recvmsg,
--      .mmap =         netlink_mmap,
-+      .mmap =         sock_no_mmap,
-       .sendpage =     sock_no_sendpage,
- };
- 
-diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
-index df32cb92d9fc..ea4600aea6b0 100644
---- a/net/netlink/af_netlink.h
-+++ b/net/netlink/af_netlink.h
-@@ -45,12 +45,6 @@ struct netlink_sock {
-       int                     (*netlink_bind)(struct net *net, int group);
-       void                    (*netlink_unbind)(struct net *net, int group);
-       struct module           *module;
--#ifdef CONFIG_NETLINK_MMAP
--      struct mutex            pg_vec_lock;
--      struct netlink_ring     rx_ring;
--      struct netlink_ring     tx_ring;
--      atomic_t                mapped;
--#endif /* CONFIG_NETLINK_MMAP */
- 
-       struct rhash_head       node;
-       struct rcu_head         rcu;
-@@ -62,15 +56,6 @@ static inline struct netlink_sock *nlk_sk(struct sock *sk)
-       return container_of(sk, struct netlink_sock, sk);
- }
- 
--static inline bool netlink_skb_is_mmaped(const struct sk_buff *skb)
--{
--#ifdef CONFIG_NETLINK_MMAP
--      return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED;
--#else
--      return false;
--#endif /* CONFIG_NETLINK_MMAP */
--}
--
- struct netlink_table {
-       struct rhashtable       hash;
-       struct hlist_head       mc_list;
-diff --git a/net/netlink/diag.c b/net/netlink/diag.c
-index 3ee63a3cff30..8dd836a8dd60 100644
---- a/net/netlink/diag.c
-+++ b/net/netlink/diag.c
-@@ -8,41 +8,6 @@
- 
- #include "af_netlink.h"
- 
--#ifdef CONFIG_NETLINK_MMAP
--static int sk_diag_put_ring(struct netlink_ring *ring, int nl_type,
--                          struct sk_buff *nlskb)
--{
--      struct netlink_diag_ring ndr;
--
--      ndr.ndr_block_size = ring->pg_vec_pages << PAGE_SHIFT;
--      ndr.ndr_block_nr   = ring->pg_vec_len;
--      ndr.ndr_frame_size = ring->frame_size;
--      ndr.ndr_frame_nr   = ring->frame_max + 1;
--
--      return nla_put(nlskb, nl_type, sizeof(ndr), &ndr);
--}
--
--static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb)
--{
--      struct netlink_sock *nlk = nlk_sk(sk);
--      int ret;
--
--      mutex_lock(&nlk->pg_vec_lock);
--      ret = sk_diag_put_ring(&nlk->rx_ring, NETLINK_DIAG_RX_RING, nlskb);
--      if (!ret)
--              ret = sk_diag_put_ring(&nlk->tx_ring, NETLINK_DIAG_TX_RING,
--                                     nlskb);
--      mutex_unlock(&nlk->pg_vec_lock);
--
--      return ret;
--}
--#else
--static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb)
--{
--      return 0;
--}
--#endif
--
- static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb)
- {
-       struct netlink_sock *nlk = nlk_sk(sk);
-@@ -87,10 +52,6 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
-           sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO))
-               goto out_nlmsg_trim;
- 
--      if ((req->ndiag_show & NDIAG_SHOW_RING_CFG) &&
--          sk_diag_put_rings_cfg(sk, skb))
--              goto out_nlmsg_trim;
--
-       nlmsg_end(skb, nlh);
-       return 0;
- 
--- 
-2.12.2
-
-From 51a219a1371ed26ce45acc8209d6064257d00f70 Mon Sep 17 00:00:00 2001
-From: Matthias Schiffer <mschiffer@universe-factory.net>
-Date: Thu, 23 Feb 2017 17:19:41 +0100
-Subject: [PATCH 075/251] vxlan: correctly validate VXLAN ID against
- VXLAN_N_VID
-Content-Length: 915
-Lines: 29
-
-[ Upstream commit 4e37d6911f36545b286d15073f6f2222f840e81c ]
-
-The incorrect check caused an off-by-one error: the maximum VID 0xffffff
-was unusable.
-
-Fixes: d342894c5d2f ("vxlan: virtual extensible lan")
-Signed-off-by: Matthias Schiffer <mschiffer@universe-factory.net>
-Acked-by: Jiri Benc <jbenc@redhat.com>
-Signed-off-by: David S. Miller <davem@davemloft.net>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/net/vxlan.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
-index 6fa8e165878e..590750ab6564 100644
---- a/drivers/net/vxlan.c
-+++ b/drivers/net/vxlan.c
-@@ -2600,7 +2600,7 @@ static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[])
- 
-       if (data[IFLA_VXLAN_ID]) {
-               __u32 id = nla_get_u32(data[IFLA_VXLAN_ID]);
--              if (id >= VXLAN_VID_MASK)
-+              if (id >= VXLAN_N_VID)
-                       return -ERANGE;
-       }
- 
--- 
-2.12.2
-
-From f1b3aae1f1bfdbec1956670aa3aa28d25f88d4b3 Mon Sep 17 00:00:00 2001
-From: David Forster <dforster@brocade.com>
-Date: Fri, 24 Feb 2017 14:20:32 +0000
-Subject: [PATCH 076/251] vti6: return GRE_KEY for vti6
-Content-Length: 884
-Lines: 29
-
-[ Upstream commit 7dcdf941cdc96692ab99fd790c8cc68945514851 ]
-
-Align vti6 with vti by returning GRE_KEY flag. This enables iproute2
-to display tunnel keys on "ip -6 tunnel show"
-
-Signed-off-by: David Forster <dforster@brocade.com>
-Signed-off-by: David S. Miller <davem@davemloft.net>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- net/ipv6/ip6_vti.c | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
-index 0a8610b33d79..bdcc4d9cedd3 100644
---- a/net/ipv6/ip6_vti.c
-+++ b/net/ipv6/ip6_vti.c
-@@ -680,6 +680,10 @@ vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p)
-       u->link = p->link;
-       u->i_key = p->i_key;
-       u->o_key = p->o_key;
-+      if (u->i_key)
-+              u->i_flags |= GRE_KEY;
-+      if (u->o_key)
-+              u->o_flags |= GRE_KEY;
-       u->proto = p->proto;
- 
-       memcpy(u->name, p->name, sizeof(u->name));
--- 
-2.12.2
-
-From 354f79125f12bcd7352704e770c0b10c4a4b424e Mon Sep 17 00:00:00 2001
-From: Julian Anastasov <ja@ssi.bg>
-Date: Sun, 26 Feb 2017 17:14:35 +0200
-Subject: [PATCH 077/251] ipv4: mask tos for input route
-Content-Length: 916
-Lines: 31
-
-[ Upstream commit 6e28099d38c0e50d62c1afc054e37e573adf3d21 ]
-
-Restore the lost masking of TOS in input route code to
-allow ip rules to match it properly.
-
-Problem [1] noticed by Shmulik Ladkani <shmulik.ladkani@gmail.com>
-
-[1] http://marc.info/?t=137331755300040&r=1&w=2
-
-Fixes: 89aef8921bfb ("ipv4: Delete routing cache.")
-Signed-off-by: Julian Anastasov <ja@ssi.bg>
-Signed-off-by: David S. Miller <davem@davemloft.net>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- net/ipv4/route.c | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/net/ipv4/route.c b/net/ipv4/route.c
-index ef2f527a119b..da4d68d78590 100644
---- a/net/ipv4/route.c
-+++ b/net/ipv4/route.c
-@@ -1958,6 +1958,7 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- {
-       int res;
- 
-+      tos &= IPTOS_RT_MASK;
-       rcu_read_lock();
- 
-       /* Multicast recognition logic is moved from route cache to here.
--- 
-2.12.2
-
-From 2cd0afc64e333f2ef62444300418883cff0e79da Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Paul=20H=C3=BCber?= <phueber@kernsp.in>
-Date: Sun, 26 Feb 2017 17:58:19 +0100
-Subject: [PATCH 078/251] l2tp: avoid use-after-free caused by
- l2tp_ip_backlog_recv
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-Content-Length: 923
-Lines: 28
-
-[ Upstream commit 51fb60eb162ab84c5edf2ae9c63cf0b878e5547e ]
-
-l2tp_ip_backlog_recv may not return -1 if the packet gets dropped.
-The return value is passed up to ip_local_deliver_finish, which treats
-negative values as an IP protocol number for resubmission.
-
-Signed-off-by: Paul Hüber <phueber@kernsp.in>
-Signed-off-by: David S. Miller <davem@davemloft.net>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- net/l2tp/l2tp_ip.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
-index 445b7cd0826a..48ab93842322 100644
---- a/net/l2tp/l2tp_ip.c
-+++ b/net/l2tp/l2tp_ip.c
-@@ -383,7 +383,7 @@ static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb)
- drop:
-       IP_INC_STATS(sock_net(sk), IPSTATS_MIB_INDISCARDS);
-       kfree_skb(skb);
--      return -1;
-+      return 0;
- }
- 
- /* Userspace will call sendmsg() on the tunnel socket to send L2TP
--- 
-2.12.2
-
-From f331d6445a3e4013428b06169acf3ae33614e69b Mon Sep 17 00:00:00 2001
-From: Alexander Potapenko <glider@google.com>
-Date: Wed, 1 Mar 2017 12:57:20 +0100
-Subject: [PATCH 079/251] net: don't call strlen() on the user buffer in
- packet_bind_spkt()
-Content-Length: 3957
-Lines: 104
-
-[ Upstream commit 540e2894f7905538740aaf122bd8e0548e1c34a4 ]
-
-KMSAN (KernelMemorySanitizer, a new error detection tool) reports use of
-uninitialized memory in packet_bind_spkt():
-Acked-by: Eric Dumazet <edumazet@google.com>
-
-==================================================================
-BUG: KMSAN: use of unitialized memory
-CPU: 0 PID: 1074 Comm: packet Not tainted 4.8.0-rc6+ #1891
-Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs
-01/01/2011
- 0000000000000000 ffff88006b6dfc08 ffffffff82559ae8 ffff88006b6dfb48
- ffffffff818a7c91 ffffffff85b9c870 0000000000000092 ffffffff85b9c550
- 0000000000000000 0000000000000092 00000000ec400911 0000000000000002
-Call Trace:
- [<     inline     >] __dump_stack lib/dump_stack.c:15
- [<ffffffff82559ae8>] dump_stack+0x238/0x290 lib/dump_stack.c:51
- [<ffffffff818a6626>] kmsan_report+0x276/0x2e0 mm/kmsan/kmsan.c:1003
- [<ffffffff818a783b>] __msan_warning+0x5b/0xb0
-mm/kmsan/kmsan_instr.c:424
- [<     inline     >] strlen lib/string.c:484
- [<ffffffff8259b58d>] strlcpy+0x9d/0x200 lib/string.c:144
- [<ffffffff84b2eca4>] packet_bind_spkt+0x144/0x230
-net/packet/af_packet.c:3132
- [<ffffffff84242e4d>] SYSC_bind+0x40d/0x5f0 net/socket.c:1370
- [<ffffffff84242a22>] SyS_bind+0x82/0xa0 net/socket.c:1356
- [<ffffffff8515991b>] entry_SYSCALL_64_fastpath+0x13/0x8f
-arch/x86/entry/entry_64.o:?
-chained origin: 00000000eba00911
- [<ffffffff810bb787>] save_stack_trace+0x27/0x50
-arch/x86/kernel/stacktrace.c:67
- [<     inline     >] kmsan_save_stack_with_flags mm/kmsan/kmsan.c:322
- [<     inline     >] kmsan_save_stack mm/kmsan/kmsan.c:334
- [<ffffffff818a59f8>] kmsan_internal_chain_origin+0x118/0x1e0
-mm/kmsan/kmsan.c:527
- [<ffffffff818a7773>] __msan_set_alloca_origin4+0xc3/0x130
-mm/kmsan/kmsan_instr.c:380
- [<ffffffff84242b69>] SYSC_bind+0x129/0x5f0 net/socket.c:1356
- [<ffffffff84242a22>] SyS_bind+0x82/0xa0 net/socket.c:1356
- [<ffffffff8515991b>] entry_SYSCALL_64_fastpath+0x13/0x8f
-arch/x86/entry/entry_64.o:?
-origin description: ----address@SYSC_bind (origin=00000000eb400911)
-==================================================================
-(the line numbers are relative to 4.8-rc6, but the bug persists
-upstream)
-
-, when I run the following program as root:
-
-=====================================
- #include <string.h>
- #include <sys/socket.h>
- #include <netpacket/packet.h>
- #include <net/ethernet.h>
-
- int main() {
-   struct sockaddr addr;
-   memset(&addr, 0xff, sizeof(addr));
-   addr.sa_family = AF_PACKET;
-   int fd = socket(PF_PACKET, SOCK_PACKET, htons(ETH_P_ALL));
-   bind(fd, &addr, sizeof(addr));
-   return 0;
- }
-=====================================
-
-This happens because addr.sa_data copied from the userspace is not
-zero-terminated, and copying it with strlcpy() in packet_bind_spkt()
-results in calling strlen() on the kernel copy of that non-terminated
-buffer.
-
-Signed-off-by: Alexander Potapenko <glider@google.com>
-Signed-off-by: David S. Miller <davem@davemloft.net>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- net/packet/af_packet.c | 8 ++++++--
- 1 file changed, 6 insertions(+), 2 deletions(-)
-
-diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
-index d805cd577a60..3975ac809934 100644
---- a/net/packet/af_packet.c
-+++ b/net/packet/af_packet.c
-@@ -3021,7 +3021,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
-                           int addr_len)
- {
-       struct sock *sk = sock->sk;
--      char name[15];
-+      char name[sizeof(uaddr->sa_data) + 1];
- 
-       /*
-        *      Check legality
-@@ -3029,7 +3029,11 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
- 
-       if (addr_len != sizeof(struct sockaddr))
-               return -EINVAL;
--      strlcpy(name, uaddr->sa_data, sizeof(name));
-+      /* uaddr->sa_data comes from the userspace, it's not guaranteed to be
-+       * zero-terminated.
-+       */
-+      memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data));
-+      name[sizeof(uaddr->sa_data)] = 0;
- 
-       return packet_do_bind(sk, name, 0, pkt_sk(sk)->num);
- }
--- 
-2.12.2
-
-From a70c328597045be2962098916c88ddd172caa054 Mon Sep 17 00:00:00 2001
-From: Eric Dumazet <edumazet@google.com>
-Date: Wed, 1 Mar 2017 14:28:39 -0800
-Subject: [PATCH 080/251] net: net_enable_timestamp() can be called from irq
- contexts
-Content-Length: 2734
-Lines: 92
-
-[ Upstream commit 13baa00ad01bb3a9f893e3a08cbc2d072fc0c15d ]
-
-It is now very clear that silly TCP listeners might play with
-enabling/disabling timestamping while new children are added
-to their accept queue.
-
-Meaning net_enable_timestamp() can be called from BH context
-while current state of the static key is not enabled.
-
-Lets play safe and allow all contexts.
-
-The work queue is scheduled only under the problematic cases,
-which are the static key enable/disable transition, to not slow down
-critical paths.
-
-This extends and improves what we did in commit 5fa8bbda38c6 ("net: use
-a work queue to defer net_disable_timestamp() work")
-
-Fixes: b90e5794c5bd ("net: dont call jump_label_dec from irq context")
-Signed-off-by: Eric Dumazet <edumazet@google.com>
-Reported-by: Dmitry Vyukov <dvyukov@google.com>
-Signed-off-by: David S. Miller <davem@davemloft.net>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- net/core/dev.c | 35 +++++++++++++++++++++++++++++++----
- 1 file changed, 31 insertions(+), 4 deletions(-)
-
-diff --git a/net/core/dev.c b/net/core/dev.c
-index 08215a85c742..48399d8ce614 100644
---- a/net/core/dev.c
-+++ b/net/core/dev.c
-@@ -1677,27 +1677,54 @@ EXPORT_SYMBOL_GPL(net_dec_ingress_queue);
- static struct static_key netstamp_needed __read_mostly;
- #ifdef HAVE_JUMP_LABEL
- static atomic_t netstamp_needed_deferred;
-+static atomic_t netstamp_wanted;
- static void netstamp_clear(struct work_struct *work)
- {
-       int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
-+      int wanted;
- 
--      while (deferred--)
--              static_key_slow_dec(&netstamp_needed);
-+      wanted = atomic_add_return(deferred, &netstamp_wanted);
-+      if (wanted > 0)
-+              static_key_enable(&netstamp_needed);
-+      else
-+              static_key_disable(&netstamp_needed);
- }
- static DECLARE_WORK(netstamp_work, netstamp_clear);
- #endif
- 
- void net_enable_timestamp(void)
- {
-+#ifdef HAVE_JUMP_LABEL
-+      int wanted;
-+
-+      while (1) {
-+              wanted = atomic_read(&netstamp_wanted);
-+              if (wanted <= 0)
-+                      break;
-+              if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted + 1) == wanted)
-+                      return;
-+      }
-+      atomic_inc(&netstamp_needed_deferred);
-+      schedule_work(&netstamp_work);
-+#else
-       static_key_slow_inc(&netstamp_needed);
-+#endif
- }
- EXPORT_SYMBOL(net_enable_timestamp);
- 
- void net_disable_timestamp(void)
- {
- #ifdef HAVE_JUMP_LABEL
--      /* net_disable_timestamp() can be called from non process context */
--      atomic_inc(&netstamp_needed_deferred);
-+      int wanted;
-+
-+      while (1) {
-+              wanted = atomic_read(&netstamp_wanted);
-+              if (wanted <= 1)
-+                      break;
-+              if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted - 1) == wanted)
-+                      return;
-+      }
-+      atomic_dec(&netstamp_needed_deferred);
-       schedule_work(&netstamp_work);
- #else
-       static_key_slow_dec(&netstamp_needed);
--- 
-2.12.2
-
-From 9216632bf4a0bafdc998d1c68b37b70446775900 Mon Sep 17 00:00:00 2001
-From: Arnaldo Carvalho de Melo <acme@redhat.com>
-Date: Wed, 1 Mar 2017 16:35:07 -0300
-Subject: [PATCH 081/251] dccp: Unlock sock before calling sk_free()
-Content-Length: 3158
-Lines: 77
-
-[ Upstream commit d5afb6f9b6bb2c57bd0c05e76e12489dc0d037d9 ]
-
-The code where sk_clone() came from created a new socket and locked it,
-but then, on the error path didn't unlock it.
-
-This problem stayed there for a long while, till b0691c8ee7c2 ("net:
-Unlock sock before calling sk_free()") fixed it, but unfortunately the
-callers of sk_clone() (now sk_clone_locked()) were not audited and the
-one in dccp_create_openreq_child() remained.
-
-Now in the age of the syskaller fuzzer, this was finally uncovered, as
-reported by Dmitry:
-
- ---- 8< ----
-
-I've got the following report while running syzkaller fuzzer on
-86292b33d4b7 ("Merge branch 'akpm' (patches from Andrew)")
-
-  [ BUG: held lock freed! ]
-  4.10.0+ #234 Not tainted
-  -------------------------
-  syz-executor6/6898 is freeing memory
-  ffff88006286cac0-ffff88006286d3b7, with a lock still held there!
-   (slock-AF_INET6){+.-...}, at: [<ffffffff8362c2c9>] spin_lock
-  include/linux/spinlock.h:299 [inline]
-   (slock-AF_INET6){+.-...}, at: [<ffffffff8362c2c9>]
-  sk_clone_lock+0x3d9/0x12c0 net/core/sock.c:1504
-  5 locks held by syz-executor6/6898:
-   #0:  (sk_lock-AF_INET6){+.+.+.}, at: [<ffffffff839a34b4>] lock_sock
-  include/net/sock.h:1460 [inline]
-   #0:  (sk_lock-AF_INET6){+.+.+.}, at: [<ffffffff839a34b4>]
-  inet_stream_connect+0x44/0xa0 net/ipv4/af_inet.c:681
-   #1:  (rcu_read_lock){......}, at: [<ffffffff83bc1c2a>]
-  inet6_csk_xmit+0x12a/0x5d0 net/ipv6/inet6_connection_sock.c:126
-   #2:  (rcu_read_lock){......}, at: [<ffffffff8369b424>] __skb_unlink
-  include/linux/skbuff.h:1767 [inline]
-   #2:  (rcu_read_lock){......}, at: [<ffffffff8369b424>] __skb_dequeue
-  include/linux/skbuff.h:1783 [inline]
-   #2:  (rcu_read_lock){......}, at: [<ffffffff8369b424>]
-  process_backlog+0x264/0x730 net/core/dev.c:4835
-   #3:  (rcu_read_lock){......}, at: [<ffffffff83aeb5c0>]
-  ip6_input_finish+0x0/0x1700 net/ipv6/ip6_input.c:59
-   #4:  (slock-AF_INET6){+.-...}, at: [<ffffffff8362c2c9>] spin_lock
-  include/linux/spinlock.h:299 [inline]
-   #4:  (slock-AF_INET6){+.-...}, at: [<ffffffff8362c2c9>]
-  sk_clone_lock+0x3d9/0x12c0 net/core/sock.c:1504
-
-Fix it just like was done by b0691c8ee7c2 ("net: Unlock sock before calling
-sk_free()").
-
-Reported-by: Dmitry Vyukov <dvyukov@google.com>
-Cc: Cong Wang <xiyou.wangcong@gmail.com>
-Cc: Eric Dumazet <edumazet@google.com>
-Cc: Gerrit Renker <gerrit@erg.abdn.ac.uk>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/20170301153510.GE15145@kernel.org
-Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-Signed-off-by: David S. Miller <davem@davemloft.net>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- net/dccp/minisocks.c | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
-index 1994f8af646b..e314caa39176 100644
---- a/net/dccp/minisocks.c
-+++ b/net/dccp/minisocks.c
-@@ -122,6 +122,7 @@ struct sock *dccp_create_openreq_child(const struct sock *sk,
-                       /* It is still raw copy of parent, so invalidate
-                        * destructor and make plain sk_free() */
-                       newsk->sk_destruct = NULL;
-+                      bh_unlock_sock(newsk);
-                       sk_free(newsk);
-                       return NULL;
-               }
--- 
-2.12.2
-
  From 2681a7853ad73bfebc3a683765a496bb283c6648 Mon Sep 17 00:00:00 2001
  From: Eric Dumazet <edumazet@google.com>
  Date: Fri, 3 Mar 2017 14:08:21 -0800
diff --git a/queue-3.18/net-don-t-call-strlen-on-the-user-buffer-in-packet_bind_spkt.patch b/queue-3.18/net-don-t-call-strlen-on-the-user-buffer-in-packet_bind_spkt.patch

new file mode 100644 (file)

index 0000000..d4b1a5c
--- /dev/null
+++ b/queue-3.18/net-don-t-call-strlen-on-the-user-buffer-in-packet_bind_spkt.patch
@@ -0,0 +1,108 @@
+From 540e2894f7905538740aaf122bd8e0548e1c34a4 Mon Sep 17 00:00:00 2001
+From: Alexander Potapenko <glider@google.com>
+Date: Wed, 1 Mar 2017 12:57:20 +0100
+Subject: net: don't call strlen() on the user buffer in packet_bind_spkt()
+
+From: Alexander Potapenko <glider@google.com>
+
+commit 540e2894f7905538740aaf122bd8e0548e1c34a4 upstream.
+
+KMSAN (KernelMemorySanitizer, a new error detection tool) reports use of
+uninitialized memory in packet_bind_spkt():
+Acked-by: Eric Dumazet <edumazet@google.com>
+
+==================================================================
+BUG: KMSAN: use of unitialized memory
+CPU: 0 PID: 1074 Comm: packet Not tainted 4.8.0-rc6+ #1891
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs
+01/01/2011
+ 0000000000000000 ffff88006b6dfc08 ffffffff82559ae8 ffff88006b6dfb48
+ ffffffff818a7c91 ffffffff85b9c870 0000000000000092 ffffffff85b9c550
+ 0000000000000000 0000000000000092 00000000ec400911 0000000000000002
+Call Trace:
+ [<     inline     >] __dump_stack lib/dump_stack.c:15
+ [<ffffffff82559ae8>] dump_stack+0x238/0x290 lib/dump_stack.c:51
+ [<ffffffff818a6626>] kmsan_report+0x276/0x2e0 mm/kmsan/kmsan.c:1003
+ [<ffffffff818a783b>] __msan_warning+0x5b/0xb0
+mm/kmsan/kmsan_instr.c:424
+ [<     inline     >] strlen lib/string.c:484
+ [<ffffffff8259b58d>] strlcpy+0x9d/0x200 lib/string.c:144
+ [<ffffffff84b2eca4>] packet_bind_spkt+0x144/0x230
+net/packet/af_packet.c:3132
+ [<ffffffff84242e4d>] SYSC_bind+0x40d/0x5f0 net/socket.c:1370
+ [<ffffffff84242a22>] SyS_bind+0x82/0xa0 net/socket.c:1356
+ [<ffffffff8515991b>] entry_SYSCALL_64_fastpath+0x13/0x8f
+arch/x86/entry/entry_64.o:?
+chained origin: 00000000eba00911
+ [<ffffffff810bb787>] save_stack_trace+0x27/0x50
+arch/x86/kernel/stacktrace.c:67
+ [<     inline     >] kmsan_save_stack_with_flags mm/kmsan/kmsan.c:322
+ [<     inline     >] kmsan_save_stack mm/kmsan/kmsan.c:334
+ [<ffffffff818a59f8>] kmsan_internal_chain_origin+0x118/0x1e0
+mm/kmsan/kmsan.c:527
+ [<ffffffff818a7773>] __msan_set_alloca_origin4+0xc3/0x130
+mm/kmsan/kmsan_instr.c:380
+ [<ffffffff84242b69>] SYSC_bind+0x129/0x5f0 net/socket.c:1356
+ [<ffffffff84242a22>] SyS_bind+0x82/0xa0 net/socket.c:1356
+ [<ffffffff8515991b>] entry_SYSCALL_64_fastpath+0x13/0x8f
+arch/x86/entry/entry_64.o:?
+origin description: ----address@SYSC_bind (origin=00000000eb400911)
+==================================================================
+(the line numbers are relative to 4.8-rc6, but the bug persists
+upstream)
+
+, when I run the following program as root:
+
+=====================================
+ #include <string.h>
+ #include <sys/socket.h>
+ #include <netpacket/packet.h>
+ #include <net/ethernet.h>
+
+ int main() {
+   struct sockaddr addr;
+   memset(&addr, 0xff, sizeof(addr));
+   addr.sa_family = AF_PACKET;
+   int fd = socket(PF_PACKET, SOCK_PACKET, htons(ETH_P_ALL));
+   bind(fd, &addr, sizeof(addr));
+   return 0;
+ }
+=====================================
+
+This happens because addr.sa_data copied from the userspace is not
+zero-terminated, and copying it with strlcpy() in packet_bind_spkt()
+results in calling strlen() on the kernel copy of that non-terminated
+buffer.
+
+Signed-off-by: Alexander Potapenko <glider@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/packet/af_packet.c |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -2738,7 +2738,7 @@ static int packet_bind_spkt(struct socke
+                           int addr_len)
+ {
+       struct sock *sk = sock->sk;
+-      char name[15];
++      char name[sizeof(uaddr->sa_data) + 1];
+ 
+       /*
+        *      Check legality
+@@ -2746,7 +2746,11 @@ static int packet_bind_spkt(struct socke
+ 
+       if (addr_len != sizeof(struct sockaddr))
+               return -EINVAL;
+-      strlcpy(name, uaddr->sa_data, sizeof(name));
++      /* uaddr->sa_data comes from the userspace, it's not guaranteed to be
++       * zero-terminated.
++       */
++      memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data));
++      name[sizeof(uaddr->sa_data)] = 0;
+ 
+       return packet_do_bind(sk, name, 0, pkt_sk(sk)->num);
+ }
diff --git a/queue-3.18/netlink-remove-mmapped-netlink-support.patch b/queue-3.18/netlink-remove-mmapped-netlink-support.patch

new file mode 100644 (file)

index 0000000..99649a1
--- /dev/null
+++ b/queue-3.18/netlink-remove-mmapped-netlink-support.patch
@@ -0,0 +1,1410 @@
+From 0c0be310ba29e4a053e8aac934aebe590c5da909 Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Thu, 18 Feb 2016 15:03:24 +0100
+Subject: netlink: remove mmapped netlink support
+
+From: Florian Westphal <fw@strlen.de>
+
+commit d1b4c689d4130bcfd3532680b64db562300716b6 upstream.
+
+mmapped netlink has a number of unresolved issues:
+
+- TX zerocopy support had to be disabled more than a year ago via
+  commit 4682a0358639b29cf ("netlink: Always copy on mmap TX.")
+  because the content of the mmapped area can change after netlink
+  attribute validation but before message processing.
+
+- RX support was implemented mainly to speed up nfqueue dumping packet
+  payload to userspace.  However, since commit ae08ce0021087a5d812d2
+  ("netfilter: nfnetlink_queue: zero copy support") we avoid one copy
+  with the socket-based interface too (via the skb_zerocopy helper).
+
+The other problem is that skbs attached to mmaped netlink socket
+behave different from normal skbs:
+
+- they don't have a shinfo area, so all functions that use skb_shinfo()
+(e.g. skb_clone) cannot be used.
+
+- reserving headroom prevents userspace from seeing the content as
+it expects message to start at skb->head.
+See for instance
+commit aa3a022094fa ("netlink: not trim skb for mmaped socket when dump").
+
+- skbs handed e.g. to netlink_ack must have non-NULL skb->sk, else we
+crash because it needs the sk to check if a tx ring is attached.
+
+Also not obvious, leads to non-intuitive bug fixes such as 7c7bdf359
+("netfilter: nfnetlink: use original skbuff when acking batches").
+
+mmaped netlink also didn't play nicely with the skb_zerocopy helper
+used by nfqueue and openvswitch.  Daniel Borkmann fixed this via
+commit 6bb0fef489f6 ("netlink, mmap: fix edge-case leakages in nf queue
+zero-copy")' but at the cost of also needing to provide remaining
+length to the allocation function.
+
+nfqueue also has problems when used with mmaped rx netlink:
+- mmaped netlink doesn't allow use of nfqueue batch verdict messages.
+  Problem is that in the mmap case, the allocation time also determines
+  the ordering in which the frame will be seen by userspace (A
+  allocating before B means that A is located in earlier ring slot,
+  but this also means that B might get a lower sequence number then A
+  since seqno is decided later.  To fix this we would need to extend the
+  spinlocked region to also cover the allocation and message setup which
+  isn't desirable.
+- nfqueue can now be configured to queue large (GSO) skbs to userspace.
+  Queing GSO packets is faster than having to force a software segmentation
+  in the kernel, so this is a desirable option.  However, with a mmap based
+  ring one has to use 64kb per ring slot element, else mmap has to fall back
+  to the socket path (NL_MMAP_STATUS_COPY) for all large packets.
+
+To use the mmap interface, userspace not only has to probe for mmap netlink
+support, it also has to implement a recv/socket receive path in order to
+handle messages that exceed the size of an rx ring element.
+
+Cc: Daniel Borkmann <daniel@iogearbox.net>
+Cc: Ken-ichirou MATSUZAWA <chamaken@gmail.com>
+Cc: Pablo Neira Ayuso <pablo@netfilter.org>
+Cc: Patrick McHardy <kaber@trash.net>
+Cc: Thomas Graf <tgraf@suug.ch>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Cc: Shi Yuejie <shiyuejie@outlook.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ Documentation/networking/netlink_mmap.txt |  339 -------------
+ include/uapi/linux/netlink.h              |    4 
+ include/uapi/linux/netlink_diag.h         |    2 
+ net/netlink/Kconfig                       |    9 
+ net/netlink/af_netlink.c                  |  732 ------------------------------
+ net/netlink/af_netlink.h                  |   15 
+ net/netlink/diag.c                        |   39 -
+ 7 files changed, 15 insertions(+), 1125 deletions(-)
+
+--- a/Documentation/networking/netlink_mmap.txt
++++ /dev/null
+@@ -1,339 +0,0 @@
+-This file documents how to use memory mapped I/O with netlink.
+-
+-Author: Patrick McHardy <kaber@trash.net>
+-
+-Overview
+---------
+-
+-Memory mapped netlink I/O can be used to increase throughput and decrease
+-overhead of unicast receive and transmit operations. Some netlink subsystems
+-require high throughput, these are mainly the netfilter subsystems
+-nfnetlink_queue and nfnetlink_log, but it can also help speed up large
+-dump operations of f.i. the routing database.
+-
+-Memory mapped netlink I/O used two circular ring buffers for RX and TX which
+-are mapped into the processes address space.
+-
+-The RX ring is used by the kernel to directly construct netlink messages into
+-user-space memory without copying them as done with regular socket I/O,
+-additionally as long as the ring contains messages no recvmsg() or poll()
+-syscalls have to be issued by user-space to get more message.
+-
+-The TX ring is used to process messages directly from user-space memory, the
+-kernel processes all messages contained in the ring using a single sendmsg()
+-call.
+-
+-Usage overview
+---------------
+-
+-In order to use memory mapped netlink I/O, user-space needs three main changes:
+-
+-- ring setup
+-- conversion of the RX path to get messages from the ring instead of recvmsg()
+-- conversion of the TX path to construct messages into the ring
+-
+-Ring setup is done using setsockopt() to provide the ring parameters to the
+-kernel, then a call to mmap() to map the ring into the processes address space:
+-
+-- setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, &params, sizeof(params));
+-- setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, &params, sizeof(params));
+-- ring = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0)
+-
+-Usage of either ring is optional, but even if only the RX ring is used the
+-mapping still needs to be writable in order to update the frame status after
+-processing.
+-
+-Conversion of the reception path involves calling poll() on the file
+-descriptor, once the socket is readable the frames from the ring are
+-processed in order until no more messages are available, as indicated by
+-a status word in the frame header.
+-
+-On kernel side, in order to make use of memory mapped I/O on receive, the
+-originating netlink subsystem needs to support memory mapped I/O, otherwise
+-it will use an allocated socket buffer as usual and the contents will be
+- copied to the ring on transmission, nullifying most of the performance gains.
+-Dumps of kernel databases automatically support memory mapped I/O.
+-
+-Conversion of the transmit path involves changing message construction to
+-use memory from the TX ring instead of (usually) a buffer declared on the
+-stack and setting up the frame header appropriately. Optionally poll() can
+-be used to wait for free frames in the TX ring.
+-
+-Structured and definitions for using memory mapped I/O are contained in
+-<linux/netlink.h>.
+-
+-RX and TX rings
+-----------------
+-
+-Each ring contains a number of continuous memory blocks, containing frames of
+-fixed size dependent on the parameters used for ring setup.
+-
+-Ring: [ block 0 ]
+-              [ frame 0 ]
+-              [ frame 1 ]
+-      [ block 1 ]
+-              [ frame 2 ]
+-              [ frame 3 ]
+-      ...
+-      [ block n ]
+-              [ frame 2 * n ]
+-              [ frame 2 * n + 1 ]
+-
+-The blocks are only visible to the kernel, from the point of view of user-space
+-the ring just contains the frames in a continuous memory zone.
+-
+-The ring parameters used for setting up the ring are defined as follows:
+-
+-struct nl_mmap_req {
+-      unsigned int    nm_block_size;
+-      unsigned int    nm_block_nr;
+-      unsigned int    nm_frame_size;
+-      unsigned int    nm_frame_nr;
+-};
+-
+-Frames are grouped into blocks, where each block is a continuous region of memory
+-and holds nm_block_size / nm_frame_size frames. The total number of frames in
+-the ring is nm_frame_nr. The following invariants hold:
+-
+-- frames_per_block = nm_block_size / nm_frame_size
+-
+-- nm_frame_nr = frames_per_block * nm_block_nr
+-
+-Some parameters are constrained, specifically:
+-
+-- nm_block_size must be a multiple of the architectures memory page size.
+-  The getpagesize() function can be used to get the page size.
+-
+-- nm_frame_size must be equal or larger to NL_MMAP_HDRLEN, IOW a frame must be
+-  able to hold at least the frame header
+-
+-- nm_frame_size must be smaller or equal to nm_block_size
+-
+-- nm_frame_size must be a multiple of NL_MMAP_MSG_ALIGNMENT
+-
+-- nm_frame_nr must equal the actual number of frames as specified above.
+-
+-When the kernel can't allocate physically continuous memory for a ring block,
+-it will fall back to use physically discontinuous memory. This might affect
+-performance negatively, in order to avoid this the nm_frame_size parameter
+-should be chosen to be as small as possible for the required frame size and
+-the number of blocks should be increased instead.
+-
+-Ring frames
+-------------
+-
+-Each frames contain a frame header, consisting of a synchronization word and some
+-meta-data, and the message itself.
+-
+-Frame:        [ header message ]
+-
+-The frame header is defined as follows:
+-
+-struct nl_mmap_hdr {
+-      unsigned int    nm_status;
+-      unsigned int    nm_len;
+-      __u32           nm_group;
+-      /* credentials */
+-      __u32           nm_pid;
+-      __u32           nm_uid;
+-      __u32           nm_gid;
+-};
+-
+-- nm_status is used for synchronizing processing between the kernel and user-
+-  space and specifies ownership of the frame as well as the operation to perform
+-
+-- nm_len contains the length of the message contained in the data area
+-
+-- nm_group specified the destination multicast group of message
+-
+-- nm_pid, nm_uid and nm_gid contain the netlink pid, UID and GID of the sending
+-  process. These values correspond to the data available using SOCK_PASSCRED in
+-  the SCM_CREDENTIALS cmsg.
+-
+-The possible values in the status word are:
+-
+-- NL_MMAP_STATUS_UNUSED:
+-      RX ring:        frame belongs to the kernel and contains no message
+-                      for user-space. Approriate action is to invoke poll()
+-                      to wait for new messages.
+-
+-      TX ring:        frame belongs to user-space and can be used for
+-                      message construction.
+-
+-- NL_MMAP_STATUS_RESERVED:
+-      RX ring only:   frame is currently used by the kernel for message
+-                      construction and contains no valid message yet.
+-                      Appropriate action is to invoke poll() to wait for
+-                      new messages.
+-
+-- NL_MMAP_STATUS_VALID:
+-      RX ring:        frame contains a valid message. Approriate action is
+-                      to process the message and release the frame back to
+-                      the kernel by setting the status to
+-                      NL_MMAP_STATUS_UNUSED or queue the frame by setting the
+-                      status to NL_MMAP_STATUS_SKIP.
+-
+-      TX ring:        the frame contains a valid message from user-space to
+-                      be processed by the kernel. After completing processing
+-                      the kernel will release the frame back to user-space by
+-                      setting the status to NL_MMAP_STATUS_UNUSED.
+-
+-- NL_MMAP_STATUS_COPY:
+-      RX ring only:   a message is ready to be processed but could not be
+-                      stored in the ring, either because it exceeded the
+-                      frame size or because the originating subsystem does
+-                      not support memory mapped I/O. Appropriate action is
+-                      to invoke recvmsg() to receive the message and release
+-                      the frame back to the kernel by setting the status to
+-                      NL_MMAP_STATUS_UNUSED.
+-
+-- NL_MMAP_STATUS_SKIP:
+-      RX ring only:   user-space queued the message for later processing, but
+-                      processed some messages following it in the ring. The
+-                      kernel should skip this frame when looking for unused
+-                      frames.
+-
+-The data area of a frame begins at a offset of NL_MMAP_HDRLEN relative to the
+-frame header.
+-
+-TX limitations
+---------------
+-
+-Kernel processing usually involves validation of the message received by
+-user-space, then processing its contents. The kernel must assure that
+-userspace is not able to modify the message contents after they have been
+-validated. In order to do so, the message is copied from the ring frame
+-to an allocated buffer if either of these conditions is false:
+-
+-- only a single mapping of the ring exists
+-- the file descriptor is not shared between processes
+-
+-This means that for threaded programs, the kernel will fall back to copying.
+-
+-Example
+--------
+-
+-Ring setup:
+-
+-      unsigned int block_size = 16 * getpagesize();
+-      struct nl_mmap_req req = {
+-              .nm_block_size          = block_size,
+-              .nm_block_nr            = 64,
+-              .nm_frame_size          = 16384,
+-              .nm_frame_nr            = 64 * block_size / 16384,
+-      };
+-      unsigned int ring_size;
+-      void *rx_ring, *tx_ring;
+-
+-      /* Configure ring parameters */
+-      if (setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, &req, sizeof(req)) < 0)
+-              exit(1);
+-      if (setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, &req, sizeof(req)) < 0)
+-              exit(1)
+-
+-      /* Calculate size of each individual ring */
+-      ring_size = req.nm_block_nr * req.nm_block_size;
+-
+-      /* Map RX/TX rings. The TX ring is located after the RX ring */
+-      rx_ring = mmap(NULL, 2 * ring_size, PROT_READ | PROT_WRITE,
+-                     MAP_SHARED, fd, 0);
+-      if ((long)rx_ring == -1L)
+-              exit(1);
+-      tx_ring = rx_ring + ring_size:
+-
+-Message reception:
+-
+-This example assumes some ring parameters of the ring setup are available.
+-
+-      unsigned int frame_offset = 0;
+-      struct nl_mmap_hdr *hdr;
+-      struct nlmsghdr *nlh;
+-      unsigned char buf[16384];
+-      ssize_t len;
+-
+-      while (1) {
+-              struct pollfd pfds[1];
+-
+-              pfds[0].fd      = fd;
+-              pfds[0].events  = POLLIN | POLLERR;
+-              pfds[0].revents = 0;
+-
+-              if (poll(pfds, 1, -1) < 0 && errno != -EINTR)
+-                      exit(1);
+-
+-              /* Check for errors. Error handling omitted */
+-              if (pfds[0].revents & POLLERR)
+-                      <handle error>
+-
+-              /* If no new messages, poll again */
+-              if (!(pfds[0].revents & POLLIN))
+-                      continue;
+-
+-              /* Process all frames */
+-              while (1) {
+-                      /* Get next frame header */
+-                      hdr = rx_ring + frame_offset;
+-
+-                      if (hdr->nm_status == NL_MMAP_STATUS_VALID) {
+-                              /* Regular memory mapped frame */
+-                              nlh = (void *)hdr + NL_MMAP_HDRLEN;
+-                              len = hdr->nm_len;
+-
+-                              /* Release empty message immediately. May happen
+-                               * on error during message construction.
+-                               */
+-                              if (len == 0)
+-                                      goto release;
+-                      } else if (hdr->nm_status == NL_MMAP_STATUS_COPY) {
+-                              /* Frame queued to socket receive queue */
+-                              len = recv(fd, buf, sizeof(buf), MSG_DONTWAIT);
+-                              if (len <= 0)
+-                                      break;
+-                              nlh = buf;
+-                      } else
+-                              /* No more messages to process, continue polling */
+-                              break;
+-
+-                      process_msg(nlh);
+-release:
+-                      /* Release frame back to the kernel */
+-                      hdr->nm_status = NL_MMAP_STATUS_UNUSED;
+-
+-                      /* Advance frame offset to next frame */
+-                      frame_offset = (frame_offset + frame_size) % ring_size;
+-              }
+-      }
+-
+-Message transmission:
+-
+-This example assumes some ring parameters of the ring setup are available.
+-A single message is constructed and transmitted, to send multiple messages
+-at once they would be constructed in consecutive frames before a final call
+-to sendto().
+-
+-      unsigned int frame_offset = 0;
+-      struct nl_mmap_hdr *hdr;
+-      struct nlmsghdr *nlh;
+-      struct sockaddr_nl addr = {
+-              .nl_family      = AF_NETLINK,
+-      };
+-
+-      hdr = tx_ring + frame_offset;
+-      if (hdr->nm_status != NL_MMAP_STATUS_UNUSED)
+-              /* No frame available. Use poll() to avoid. */
+-              exit(1);
+-
+-      nlh = (void *)hdr + NL_MMAP_HDRLEN;
+-
+-      /* Build message */
+-      build_message(nlh);
+-
+-      /* Fill frame header: length and status need to be set */
+-      hdr->nm_len     = nlh->nlmsg_len;
+-      hdr->nm_status  = NL_MMAP_STATUS_VALID;
+-
+-      if (sendto(fd, NULL, 0, 0, &addr, sizeof(addr)) < 0)
+-              exit(1);
+-
+-      /* Advance frame offset to next frame */
+-      frame_offset = (frame_offset + frame_size) % ring_size;
+--- a/include/uapi/linux/netlink.h
++++ b/include/uapi/linux/netlink.h
+@@ -106,8 +106,10 @@ struct nlmsgerr {
+ #define NETLINK_PKTINFO               3
+ #define NETLINK_BROADCAST_ERROR       4
+ #define NETLINK_NO_ENOBUFS    5
++#ifndef __KERNEL__
+ #define NETLINK_RX_RING               6
+ #define NETLINK_TX_RING               7
++#endif
+ 
+ struct nl_pktinfo {
+       __u32   group;
+@@ -130,6 +132,7 @@ struct nl_mmap_hdr {
+       __u32           nm_gid;
+ };
+ 
++#ifndef __KERNEL__
+ enum nl_mmap_status {
+       NL_MMAP_STATUS_UNUSED,
+       NL_MMAP_STATUS_RESERVED,
+@@ -141,6 +144,7 @@ enum nl_mmap_status {
+ #define NL_MMAP_MSG_ALIGNMENT         NLMSG_ALIGNTO
+ #define NL_MMAP_MSG_ALIGN(sz)         __ALIGN_KERNEL(sz, NL_MMAP_MSG_ALIGNMENT)
+ #define NL_MMAP_HDRLEN                        NL_MMAP_MSG_ALIGN(sizeof(struct nl_mmap_hdr))
++#endif
+ 
+ #define NET_MAJOR 36          /* Major 36 is reserved for networking                                          */
+ 
+--- a/include/uapi/linux/netlink_diag.h
++++ b/include/uapi/linux/netlink_diag.h
+@@ -48,6 +48,8 @@ enum {
+ 
+ #define NDIAG_SHOW_MEMINFO    0x00000001 /* show memory info of a socket */
+ #define NDIAG_SHOW_GROUPS     0x00000002 /* show groups of a netlink socket */
++#ifndef __KERNEL__
+ #define NDIAG_SHOW_RING_CFG   0x00000004 /* show ring configuration */
++#endif
+ 
+ #endif
+--- a/net/netlink/Kconfig
++++ b/net/netlink/Kconfig
+@@ -2,15 +2,6 @@
+ # Netlink Sockets
+ #
+ 
+-config NETLINK_MMAP
+-      bool "NETLINK: mmaped IO"
+-      ---help---
+-        This option enables support for memory mapped netlink IO. This
+-        reduces overhead by avoiding copying data between kernel- and
+-        userspace.
+-
+-        If unsure, say N.
+-
+ config NETLINK_DIAG
+       tristate "NETLINK: socket monitoring interface"
+       default n
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -234,7 +234,7 @@ static int __netlink_deliver_tap_skb(str
+ 
+       dev_hold(dev);
+ 
+-      if (netlink_skb_is_mmaped(skb) || is_vmalloc_addr(skb->head))
++      if (is_vmalloc_addr(skb->head))
+               nskb = netlink_to_full_skb(skb, GFP_ATOMIC);
+       else
+               nskb = skb_clone(skb, GFP_ATOMIC);
+@@ -308,599 +308,8 @@ static void netlink_rcv_wake(struct sock
+               wake_up_interruptible(&nlk->wait);
+ }
+ 
+-#ifdef CONFIG_NETLINK_MMAP
+-static bool netlink_rx_is_mmaped(struct sock *sk)
+-{
+-      return nlk_sk(sk)->rx_ring.pg_vec != NULL;
+-}
+-
+-static bool netlink_tx_is_mmaped(struct sock *sk)
+-{
+-      return nlk_sk(sk)->tx_ring.pg_vec != NULL;
+-}
+-
+-static __pure struct page *pgvec_to_page(const void *addr)
+-{
+-      if (is_vmalloc_addr(addr))
+-              return vmalloc_to_page(addr);
+-      else
+-              return virt_to_page(addr);
+-}
+-
+-static void free_pg_vec(void **pg_vec, unsigned int order, unsigned int len)
+-{
+-      unsigned int i;
+-
+-      for (i = 0; i < len; i++) {
+-              if (pg_vec[i] != NULL) {
+-                      if (is_vmalloc_addr(pg_vec[i]))
+-                              vfree(pg_vec[i]);
+-                      else
+-                              free_pages((unsigned long)pg_vec[i], order);
+-              }
+-      }
+-      kfree(pg_vec);
+-}
+-
+-static void *alloc_one_pg_vec_page(unsigned long order)
+-{
+-      void *buffer;
+-      gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO |
+-                        __GFP_NOWARN | __GFP_NORETRY;
+-
+-      buffer = (void *)__get_free_pages(gfp_flags, order);
+-      if (buffer != NULL)
+-              return buffer;
+-
+-      buffer = vzalloc((1 << order) * PAGE_SIZE);
+-      if (buffer != NULL)
+-              return buffer;
+-
+-      gfp_flags &= ~__GFP_NORETRY;
+-      return (void *)__get_free_pages(gfp_flags, order);
+-}
+-
+-static void **alloc_pg_vec(struct netlink_sock *nlk,
+-                         struct nl_mmap_req *req, unsigned int order)
+-{
+-      unsigned int block_nr = req->nm_block_nr;
+-      unsigned int i;
+-      void **pg_vec;
+-
+-      pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL);
+-      if (pg_vec == NULL)
+-              return NULL;
+-
+-      for (i = 0; i < block_nr; i++) {
+-              pg_vec[i] = alloc_one_pg_vec_page(order);
+-              if (pg_vec[i] == NULL)
+-                      goto err1;
+-      }
+-
+-      return pg_vec;
+-err1:
+-      free_pg_vec(pg_vec, order, block_nr);
+-      return NULL;
+-}
+-
+-
+-static void
+-__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec,
+-                 unsigned int order)
+-{
+-      struct netlink_sock *nlk = nlk_sk(sk);
+-      struct sk_buff_head *queue;
+-      struct netlink_ring *ring;
+-
+-      queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
+-      ring  = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
+-
+-      spin_lock_bh(&queue->lock);
+-
+-      ring->frame_max         = req->nm_frame_nr - 1;
+-      ring->head              = 0;
+-      ring->frame_size        = req->nm_frame_size;
+-      ring->pg_vec_pages      = req->nm_block_size / PAGE_SIZE;
+-
+-      swap(ring->pg_vec_len, req->nm_block_nr);
+-      swap(ring->pg_vec_order, order);
+-      swap(ring->pg_vec, pg_vec);
+-
+-      __skb_queue_purge(queue);
+-      spin_unlock_bh(&queue->lock);
+-
+-      WARN_ON(atomic_read(&nlk->mapped));
+-
+-      if (pg_vec)
+-              free_pg_vec(pg_vec, order, req->nm_block_nr);
+-}
+-
+-static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
+-                          bool tx_ring)
+-{
+-      struct netlink_sock *nlk = nlk_sk(sk);
+-      struct netlink_ring *ring;
+-      void **pg_vec = NULL;
+-      unsigned int order = 0;
+-
+-      ring  = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
+-
+-      if (atomic_read(&nlk->mapped))
+-              return -EBUSY;
+-      if (atomic_read(&ring->pending))
+-              return -EBUSY;
+-
+-      if (req->nm_block_nr) {
+-              if (ring->pg_vec != NULL)
+-                      return -EBUSY;
+-
+-              if ((int)req->nm_block_size <= 0)
+-                      return -EINVAL;
+-              if (!PAGE_ALIGNED(req->nm_block_size))
+-                      return -EINVAL;
+-              if (req->nm_frame_size < NL_MMAP_HDRLEN)
+-                      return -EINVAL;
+-              if (!IS_ALIGNED(req->nm_frame_size, NL_MMAP_MSG_ALIGNMENT))
+-                      return -EINVAL;
+-
+-              ring->frames_per_block = req->nm_block_size /
+-                                       req->nm_frame_size;
+-              if (ring->frames_per_block == 0)
+-                      return -EINVAL;
+-              if (ring->frames_per_block * req->nm_block_nr !=
+-                  req->nm_frame_nr)
+-                      return -EINVAL;
+-
+-              order = get_order(req->nm_block_size);
+-              pg_vec = alloc_pg_vec(nlk, req, order);
+-              if (pg_vec == NULL)
+-                      return -ENOMEM;
+-      } else {
+-              if (req->nm_frame_nr)
+-                      return -EINVAL;
+-      }
+-
+-      mutex_lock(&nlk->pg_vec_lock);
+-      if (atomic_read(&nlk->mapped) == 0) {
+-              __netlink_set_ring(sk, req, tx_ring, pg_vec, order);
+-              mutex_unlock(&nlk->pg_vec_lock);
+-              return 0;
+-      }
+-
+-      mutex_unlock(&nlk->pg_vec_lock);
+-
+-      if (pg_vec)
+-              free_pg_vec(pg_vec, order, req->nm_block_nr);
+-
+-      return -EBUSY;
+-}
+-
+-static void netlink_mm_open(struct vm_area_struct *vma)
+-{
+-      struct file *file = vma->vm_file;
+-      struct socket *sock = file->private_data;
+-      struct sock *sk = sock->sk;
+-
+-      if (sk)
+-              atomic_inc(&nlk_sk(sk)->mapped);
+-}
+-
+-static void netlink_mm_close(struct vm_area_struct *vma)
+-{
+-      struct file *file = vma->vm_file;
+-      struct socket *sock = file->private_data;
+-      struct sock *sk = sock->sk;
+-
+-      if (sk)
+-              atomic_dec(&nlk_sk(sk)->mapped);
+-}
+-
+-static const struct vm_operations_struct netlink_mmap_ops = {
+-      .open   = netlink_mm_open,
+-      .close  = netlink_mm_close,
+-};
+-
+-static int netlink_mmap(struct file *file, struct socket *sock,
+-                      struct vm_area_struct *vma)
+-{
+-      struct sock *sk = sock->sk;
+-      struct netlink_sock *nlk = nlk_sk(sk);
+-      struct netlink_ring *ring;
+-      unsigned long start, size, expected;
+-      unsigned int i;
+-      int err = -EINVAL;
+-
+-      if (vma->vm_pgoff)
+-              return -EINVAL;
+-
+-      mutex_lock(&nlk->pg_vec_lock);
+-
+-      expected = 0;
+-      for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
+-              if (ring->pg_vec == NULL)
+-                      continue;
+-              expected += ring->pg_vec_len * ring->pg_vec_pages * PAGE_SIZE;
+-      }
+-
+-      if (expected == 0)
+-              goto out;
+-
+-      size = vma->vm_end - vma->vm_start;
+-      if (size != expected)
+-              goto out;
+-
+-      start = vma->vm_start;
+-      for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
+-              if (ring->pg_vec == NULL)
+-                      continue;
+-
+-              for (i = 0; i < ring->pg_vec_len; i++) {
+-                      struct page *page;
+-                      void *kaddr = ring->pg_vec[i];
+-                      unsigned int pg_num;
+-
+-                      for (pg_num = 0; pg_num < ring->pg_vec_pages; pg_num++) {
+-                              page = pgvec_to_page(kaddr);
+-                              err = vm_insert_page(vma, start, page);
+-                              if (err < 0)
+-                                      goto out;
+-                              start += PAGE_SIZE;
+-                              kaddr += PAGE_SIZE;
+-                      }
+-              }
+-      }
+-
+-      atomic_inc(&nlk->mapped);
+-      vma->vm_ops = &netlink_mmap_ops;
+-      err = 0;
+-out:
+-      mutex_unlock(&nlk->pg_vec_lock);
+-      return err;
+-}
+-
+-static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr, unsigned int nm_len)
+-{
+-#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
+-      struct page *p_start, *p_end;
+-
+-      /* First page is flushed through netlink_{get,set}_status */
+-      p_start = pgvec_to_page(hdr + PAGE_SIZE);
+-      p_end   = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + nm_len - 1);
+-      while (p_start <= p_end) {
+-              flush_dcache_page(p_start);
+-              p_start++;
+-      }
+-#endif
+-}
+-
+-static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr)
+-{
+-      smp_rmb();
+-      flush_dcache_page(pgvec_to_page(hdr));
+-      return hdr->nm_status;
+-}
+-
+-static void netlink_set_status(struct nl_mmap_hdr *hdr,
+-                             enum nl_mmap_status status)
+-{
+-      smp_mb();
+-      hdr->nm_status = status;
+-      flush_dcache_page(pgvec_to_page(hdr));
+-}
+-
+-static struct nl_mmap_hdr *
+-__netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos)
+-{
+-      unsigned int pg_vec_pos, frame_off;
+-
+-      pg_vec_pos = pos / ring->frames_per_block;
+-      frame_off  = pos % ring->frames_per_block;
+-
+-      return ring->pg_vec[pg_vec_pos] + (frame_off * ring->frame_size);
+-}
+-
+-static struct nl_mmap_hdr *
+-netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos,
+-                   enum nl_mmap_status status)
+-{
+-      struct nl_mmap_hdr *hdr;
+-
+-      hdr = __netlink_lookup_frame(ring, pos);
+-      if (netlink_get_status(hdr) != status)
+-              return NULL;
+-
+-      return hdr;
+-}
+-
+-static struct nl_mmap_hdr *
+-netlink_current_frame(const struct netlink_ring *ring,
+-                    enum nl_mmap_status status)
+-{
+-      return netlink_lookup_frame(ring, ring->head, status);
+-}
+-
+-static struct nl_mmap_hdr *
+-netlink_previous_frame(const struct netlink_ring *ring,
+-                     enum nl_mmap_status status)
+-{
+-      unsigned int prev;
+-
+-      prev = ring->head ? ring->head - 1 : ring->frame_max;
+-      return netlink_lookup_frame(ring, prev, status);
+-}
+-
+-static void netlink_increment_head(struct netlink_ring *ring)
+-{
+-      ring->head = ring->head != ring->frame_max ? ring->head + 1 : 0;
+-}
+-
+-static void netlink_forward_ring(struct netlink_ring *ring)
+-{
+-      unsigned int head = ring->head, pos = head;
+-      const struct nl_mmap_hdr *hdr;
+-
+-      do {
+-              hdr = __netlink_lookup_frame(ring, pos);
+-              if (hdr->nm_status == NL_MMAP_STATUS_UNUSED)
+-                      break;
+-              if (hdr->nm_status != NL_MMAP_STATUS_SKIP)
+-                      break;
+-              netlink_increment_head(ring);
+-      } while (ring->head != head);
+-}
+-
+-static bool netlink_dump_space(struct netlink_sock *nlk)
+-{
+-      struct netlink_ring *ring = &nlk->rx_ring;
+-      struct nl_mmap_hdr *hdr;
+-      unsigned int n;
+-
+-      hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
+-      if (hdr == NULL)
+-              return false;
+-
+-      n = ring->head + ring->frame_max / 2;
+-      if (n > ring->frame_max)
+-              n -= ring->frame_max;
+-
+-      hdr = __netlink_lookup_frame(ring, n);
+-
+-      return hdr->nm_status == NL_MMAP_STATUS_UNUSED;
+-}
+-
+-static unsigned int netlink_poll(struct file *file, struct socket *sock,
+-                               poll_table *wait)
+-{
+-      struct sock *sk = sock->sk;
+-      struct netlink_sock *nlk = nlk_sk(sk);
+-      unsigned int mask;
+-      int err;
+-
+-      if (nlk->rx_ring.pg_vec != NULL) {
+-              /* Memory mapped sockets don't call recvmsg(), so flow control
+-               * for dumps is performed here. A dump is allowed to continue
+-               * if at least half the ring is unused.
+-               */
+-              while (nlk->cb_running && netlink_dump_space(nlk)) {
+-                      err = netlink_dump(sk);
+-                      if (err < 0) {
+-                              sk->sk_err = -err;
+-                              sk->sk_error_report(sk);
+-                              break;
+-                      }
+-              }
+-              netlink_rcv_wake(sk);
+-      }
+-
+-      mask = datagram_poll(file, sock, wait);
+-
+-      spin_lock_bh(&sk->sk_receive_queue.lock);
+-      if (nlk->rx_ring.pg_vec) {
+-              netlink_forward_ring(&nlk->rx_ring);
+-              if (!netlink_previous_frame(&nlk->rx_ring, NL_MMAP_STATUS_UNUSED))
+-                      mask |= POLLIN | POLLRDNORM;
+-      }
+-      spin_unlock_bh(&sk->sk_receive_queue.lock);
+-
+-      spin_lock_bh(&sk->sk_write_queue.lock);
+-      if (nlk->tx_ring.pg_vec) {
+-              if (netlink_current_frame(&nlk->tx_ring, NL_MMAP_STATUS_UNUSED))
+-                      mask |= POLLOUT | POLLWRNORM;
+-      }
+-      spin_unlock_bh(&sk->sk_write_queue.lock);
+-
+-      return mask;
+-}
+-
+-static struct nl_mmap_hdr *netlink_mmap_hdr(struct sk_buff *skb)
+-{
+-      return (struct nl_mmap_hdr *)(skb->head - NL_MMAP_HDRLEN);
+-}
+-
+-static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk,
+-                                 struct netlink_ring *ring,
+-                                 struct nl_mmap_hdr *hdr)
+-{
+-      unsigned int size;
+-      void *data;
+-
+-      size = ring->frame_size - NL_MMAP_HDRLEN;
+-      data = (void *)hdr + NL_MMAP_HDRLEN;
+-
+-      skb->head       = data;
+-      skb->data       = data;
+-      skb_reset_tail_pointer(skb);
+-      skb->end        = skb->tail + size;
+-      skb->len        = 0;
+-
+-      skb->destructor = netlink_skb_destructor;
+-      NETLINK_CB(skb).flags |= NETLINK_SKB_MMAPED;
+-      NETLINK_CB(skb).sk = sk;
+-}
+-
+-static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg,
+-                              u32 dst_portid, u32 dst_group,
+-                              struct sock_iocb *siocb)
+-{
+-      struct netlink_sock *nlk = nlk_sk(sk);
+-      struct netlink_ring *ring;
+-      struct nl_mmap_hdr *hdr;
+-      struct sk_buff *skb;
+-      unsigned int maxlen;
+-      int err = 0, len = 0;
+-
+-      mutex_lock(&nlk->pg_vec_lock);
+-
+-      ring   = &nlk->tx_ring;
+-      maxlen = ring->frame_size - NL_MMAP_HDRLEN;
+-
+-      do {
+-              unsigned int nm_len;
+-
+-              hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID);
+-              if (hdr == NULL) {
+-                      if (!(msg->msg_flags & MSG_DONTWAIT) &&
+-                          atomic_read(&nlk->tx_ring.pending))
+-                              schedule();
+-                      continue;
+-              }
+-
+-              nm_len = ACCESS_ONCE(hdr->nm_len);
+-              if (nm_len > maxlen) {
+-                      err = -EINVAL;
+-                      goto out;
+-              }
+-
+-              netlink_frame_flush_dcache(hdr, nm_len);
+-
+-              skb = alloc_skb(nm_len, GFP_KERNEL);
+-              if (skb == NULL) {
+-                      err = -ENOBUFS;
+-                      goto out;
+-              }
+-              __skb_put(skb, nm_len);
+-              memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, nm_len);
+-              netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
+-
+-              netlink_increment_head(ring);
+-
+-              NETLINK_CB(skb).portid    = nlk->portid;
+-              NETLINK_CB(skb).dst_group = dst_group;
+-              NETLINK_CB(skb).creds     = siocb->scm->creds;
+-
+-              err = security_netlink_send(sk, skb);
+-              if (err) {
+-                      kfree_skb(skb);
+-                      goto out;
+-              }
+-
+-              if (unlikely(dst_group)) {
+-                      atomic_inc(&skb->users);
+-                      netlink_broadcast(sk, skb, dst_portid, dst_group,
+-                                        GFP_KERNEL);
+-              }
+-              err = netlink_unicast(sk, skb, dst_portid,
+-                                    msg->msg_flags & MSG_DONTWAIT);
+-              if (err < 0)
+-                      goto out;
+-              len += err;
+-
+-      } while (hdr != NULL ||
+-               (!(msg->msg_flags & MSG_DONTWAIT) &&
+-                atomic_read(&nlk->tx_ring.pending)));
+-
+-      if (len > 0)
+-              err = len;
+-out:
+-      mutex_unlock(&nlk->pg_vec_lock);
+-      return err;
+-}
+-
+-static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb)
+-{
+-      struct nl_mmap_hdr *hdr;
+-
+-      hdr = netlink_mmap_hdr(skb);
+-      hdr->nm_len     = skb->len;
+-      hdr->nm_group   = NETLINK_CB(skb).dst_group;
+-      hdr->nm_pid     = NETLINK_CB(skb).creds.pid;
+-      hdr->nm_uid     = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
+-      hdr->nm_gid     = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
+-      netlink_frame_flush_dcache(hdr, hdr->nm_len);
+-      netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
+-
+-      NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED;
+-      kfree_skb(skb);
+-}
+-
+-static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb)
+-{
+-      struct netlink_sock *nlk = nlk_sk(sk);
+-      struct netlink_ring *ring = &nlk->rx_ring;
+-      struct nl_mmap_hdr *hdr;
+-
+-      spin_lock_bh(&sk->sk_receive_queue.lock);
+-      hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
+-      if (hdr == NULL) {
+-              spin_unlock_bh(&sk->sk_receive_queue.lock);
+-              kfree_skb(skb);
+-              netlink_overrun(sk);
+-              return;
+-      }
+-      netlink_increment_head(ring);
+-      __skb_queue_tail(&sk->sk_receive_queue, skb);
+-      spin_unlock_bh(&sk->sk_receive_queue.lock);
+-
+-      hdr->nm_len     = skb->len;
+-      hdr->nm_group   = NETLINK_CB(skb).dst_group;
+-      hdr->nm_pid     = NETLINK_CB(skb).creds.pid;
+-      hdr->nm_uid     = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
+-      hdr->nm_gid     = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
+-      netlink_set_status(hdr, NL_MMAP_STATUS_COPY);
+-}
+-
+-#else /* CONFIG_NETLINK_MMAP */
+-#define netlink_rx_is_mmaped(sk)      false
+-#define netlink_tx_is_mmaped(sk)      false
+-#define netlink_mmap                  sock_no_mmap
+-#define netlink_poll                  datagram_poll
+-#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, siocb)   0
+-#endif /* CONFIG_NETLINK_MMAP */
+-
+ static void netlink_skb_destructor(struct sk_buff *skb)
+ {
+-#ifdef CONFIG_NETLINK_MMAP
+-      struct nl_mmap_hdr *hdr;
+-      struct netlink_ring *ring;
+-      struct sock *sk;
+-
+-      /* If a packet from the kernel to userspace was freed because of an
+-       * error without being delivered to userspace, the kernel must reset
+-       * the status. In the direction userspace to kernel, the status is
+-       * always reset here after the packet was processed and freed.
+-       */
+-      if (netlink_skb_is_mmaped(skb)) {
+-              hdr = netlink_mmap_hdr(skb);
+-              sk = NETLINK_CB(skb).sk;
+-
+-              if (NETLINK_CB(skb).flags & NETLINK_SKB_TX) {
+-                      netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
+-                      ring = &nlk_sk(sk)->tx_ring;
+-              } else {
+-                      if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) {
+-                              hdr->nm_len = 0;
+-                              netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
+-                      }
+-                      ring = &nlk_sk(sk)->rx_ring;
+-              }
+-
+-              WARN_ON(atomic_read(&ring->pending) == 0);
+-              atomic_dec(&ring->pending);
+-              sock_put(sk);
+-
+-              skb->head = NULL;
+-      }
+-#endif
+       if (is_vmalloc_addr(skb->head)) {
+               if (!skb->cloned ||
+                   !atomic_dec_return(&(skb_shinfo(skb)->dataref)))
+@@ -934,18 +343,6 @@ static void netlink_sock_destruct(struct
+       }
+ 
+       skb_queue_purge(&sk->sk_receive_queue);
+-#ifdef CONFIG_NETLINK_MMAP
+-      if (1) {
+-              struct nl_mmap_req req;
+-
+-              memset(&req, 0, sizeof(req));
+-              if (nlk->rx_ring.pg_vec)
+-                      __netlink_set_ring(sk, &req, false, NULL, 0);
+-              memset(&req, 0, sizeof(req));
+-              if (nlk->tx_ring.pg_vec)
+-                      __netlink_set_ring(sk, &req, true, NULL, 0);
+-      }
+-#endif /* CONFIG_NETLINK_MMAP */
+ 
+       if (!sock_flag(sk, SOCK_DEAD)) {
+               printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
+@@ -1153,9 +550,6 @@ static int __netlink_create(struct net *
+               mutex_init(nlk->cb_mutex);
+       }
+       init_waitqueue_head(&nlk->wait);
+-#ifdef CONFIG_NETLINK_MMAP
+-      mutex_init(&nlk->pg_vec_lock);
+-#endif
+ 
+       sk->sk_destruct = netlink_sock_destruct;
+       sk->sk_protocol = protocol;
+@@ -1653,9 +1047,8 @@ int netlink_attachskb(struct sock *sk, s
+ 
+       nlk = nlk_sk(sk);
+ 
+-      if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
+-           test_bit(NETLINK_CONGESTED, &nlk->state)) &&
+-          !netlink_skb_is_mmaped(skb)) {
++      if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
++          test_bit(NETLINK_CONGESTED, &nlk->state)) {
+               DECLARE_WAITQUEUE(wait, current);
+               if (!*timeo) {
+                       if (!ssk || netlink_is_kernel(ssk))
+@@ -1693,14 +1086,7 @@ static int __netlink_sendskb(struct sock
+ 
+       netlink_deliver_tap(skb);
+ 
+-#ifdef CONFIG_NETLINK_MMAP
+-      if (netlink_skb_is_mmaped(skb))
+-              netlink_queue_mmaped_skb(sk, skb);
+-      else if (netlink_rx_is_mmaped(sk))
+-              netlink_ring_set_copied(sk, skb);
+-      else
+-#endif /* CONFIG_NETLINK_MMAP */
+-              skb_queue_tail(&sk->sk_receive_queue, skb);
++      skb_queue_tail(&sk->sk_receive_queue, skb);
+       sk->sk_data_ready(sk);
+       return len;
+ }
+@@ -1724,9 +1110,6 @@ static struct sk_buff *netlink_trim(stru
+       int delta;
+ 
+       WARN_ON(skb->sk != NULL);
+-      if (netlink_skb_is_mmaped(skb))
+-              return skb;
+-
+       delta = skb->end - skb->tail;
+       if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize)
+               return skb;
+@@ -1805,66 +1188,6 @@ EXPORT_SYMBOL(netlink_unicast);
+ struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,
+                                 u32 dst_portid, gfp_t gfp_mask)
+ {
+-#ifdef CONFIG_NETLINK_MMAP
+-      struct sock *sk = NULL;
+-      struct sk_buff *skb;
+-      struct netlink_ring *ring;
+-      struct nl_mmap_hdr *hdr;
+-      unsigned int maxlen;
+-
+-      sk = netlink_getsockbyportid(ssk, dst_portid);
+-      if (IS_ERR(sk))
+-              goto out;
+-
+-      ring = &nlk_sk(sk)->rx_ring;
+-      /* fast-path without atomic ops for common case: non-mmaped receiver */
+-      if (ring->pg_vec == NULL)
+-              goto out_put;
+-
+-      if (ring->frame_size - NL_MMAP_HDRLEN < size)
+-              goto out_put;
+-
+-      skb = alloc_skb_head(gfp_mask);
+-      if (skb == NULL)
+-              goto err1;
+-
+-      spin_lock_bh(&sk->sk_receive_queue.lock);
+-      /* check again under lock */
+-      if (ring->pg_vec == NULL)
+-              goto out_free;
+-
+-      /* check again under lock */
+-      maxlen = ring->frame_size - NL_MMAP_HDRLEN;
+-      if (maxlen < size)
+-              goto out_free;
+-
+-      netlink_forward_ring(ring);
+-      hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
+-      if (hdr == NULL)
+-              goto err2;
+-      netlink_ring_setup_skb(skb, sk, ring, hdr);
+-      netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
+-      atomic_inc(&ring->pending);
+-      netlink_increment_head(ring);
+-
+-      spin_unlock_bh(&sk->sk_receive_queue.lock);
+-      return skb;
+-
+-err2:
+-      kfree_skb(skb);
+-      spin_unlock_bh(&sk->sk_receive_queue.lock);
+-      netlink_overrun(sk);
+-err1:
+-      sock_put(sk);
+-      return NULL;
+-
+-out_free:
+-      kfree_skb(skb);
+-      spin_unlock_bh(&sk->sk_receive_queue.lock);
+-out_put:
+-      sock_put(sk);
+-out:
+-#endif
+       return alloc_skb(size, gfp_mask);
+ }
+ EXPORT_SYMBOL_GPL(netlink_alloc_skb);
+@@ -2126,8 +1449,7 @@ static int netlink_setsockopt(struct soc
+       if (level != SOL_NETLINK)
+               return -ENOPROTOOPT;
+ 
+-      if (optname != NETLINK_RX_RING && optname != NETLINK_TX_RING &&
+-          optlen >= sizeof(int) &&
++      if (optlen >= sizeof(int) &&
+           get_user(val, (unsigned int __user *)optval))
+               return -EFAULT;
+ 
+@@ -2180,25 +1502,6 @@ static int netlink_setsockopt(struct soc
+               }
+               err = 0;
+               break;
+-#ifdef CONFIG_NETLINK_MMAP
+-      case NETLINK_RX_RING:
+-      case NETLINK_TX_RING: {
+-              struct nl_mmap_req req;
+-
+-              /* Rings might consume more memory than queue limits, require
+-               * CAP_NET_ADMIN.
+-               */
+-              if (!capable(CAP_NET_ADMIN))
+-                      return -EPERM;
+-              if (optlen < sizeof(req))
+-                      return -EINVAL;
+-              if (copy_from_user(&req, optval, sizeof(req)))
+-                      return -EFAULT;
+-              err = netlink_set_ring(sk, &req,
+-                                     optname == NETLINK_TX_RING);
+-              break;
+-      }
+-#endif /* CONFIG_NETLINK_MMAP */
+       default:
+               err = -ENOPROTOOPT;
+       }
+@@ -2311,13 +1614,6 @@ static int netlink_sendmsg(struct kiocb
+                       goto out;
+       }
+ 
+-      if (netlink_tx_is_mmaped(sk) &&
+-          msg->msg_iov->iov_base == NULL) {
+-              err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group,
+-                                         siocb);
+-              goto out;
+-      }
+-
+       err = -EMSGSIZE;
+       if (len > sk->sk_sndbuf - 32)
+               goto out;
+@@ -2643,8 +1939,7 @@ static int netlink_dump(struct sock *sk)
+       cb = &nlk->cb;
+       alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE);
+ 
+-      if (!netlink_rx_is_mmaped(sk) &&
+-          atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
++      if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
+               goto errout_skb;
+ 
+       /* NLMSG_GOODSIZE is small to avoid high order allocations being
+@@ -2721,16 +2016,7 @@ int __netlink_dump_start(struct sock *ss
+       struct netlink_sock *nlk;
+       int ret;
+ 
+-      /* Memory mapped dump requests need to be copied to avoid looping
+-       * on the pending state in netlink_mmap_sendmsg() while the CB hold
+-       * a reference to the skb.
+-       */
+-      if (netlink_skb_is_mmaped(skb)) {
+-              skb = skb_copy(skb, GFP_KERNEL);
+-              if (skb == NULL)
+-                      return -ENOBUFS;
+-      } else
+-              atomic_inc(&skb->users);
++      atomic_inc(&skb->users);
+ 
+       sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid);
+       if (sk == NULL) {
+@@ -3071,7 +2357,7 @@ static const struct proto_ops netlink_op
+       .socketpair =   sock_no_socketpair,
+       .accept =       sock_no_accept,
+       .getname =      netlink_getname,
+-      .poll =         netlink_poll,
++      .poll =         datagram_poll,
+       .ioctl =        sock_no_ioctl,
+       .listen =       sock_no_listen,
+       .shutdown =     sock_no_shutdown,
+@@ -3079,7 +2365,7 @@ static const struct proto_ops netlink_op
+       .getsockopt =   netlink_getsockopt,
+       .sendmsg =      netlink_sendmsg,
+       .recvmsg =      netlink_recvmsg,
+-      .mmap =         netlink_mmap,
++      .mmap =         sock_no_mmap,
+       .sendpage =     sock_no_sendpage,
+ };
+ 
+--- a/net/netlink/af_netlink.h
++++ b/net/netlink/af_netlink.h
+@@ -42,12 +42,6 @@ struct netlink_sock {
+       int                     (*netlink_bind)(int group);
+       void                    (*netlink_unbind)(int group);
+       struct module           *module;
+-#ifdef CONFIG_NETLINK_MMAP
+-      struct mutex            pg_vec_lock;
+-      struct netlink_ring     rx_ring;
+-      struct netlink_ring     tx_ring;
+-      atomic_t                mapped;
+-#endif /* CONFIG_NETLINK_MMAP */
+ 
+       struct rhash_head       node;
+ };
+@@ -57,15 +51,6 @@ static inline struct netlink_sock *nlk_s
+       return container_of(sk, struct netlink_sock, sk);
+ }
+ 
+-static inline bool netlink_skb_is_mmaped(const struct sk_buff *skb)
+-{
+-#ifdef CONFIG_NETLINK_MMAP
+-      return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED;
+-#else
+-      return false;
+-#endif /* CONFIG_NETLINK_MMAP */
+-}
+-
+ struct netlink_table {
+       struct rhashtable       hash;
+       struct hlist_head       mc_list;
+--- a/net/netlink/diag.c
++++ b/net/netlink/diag.c
+@@ -8,41 +8,6 @@
+ 
+ #include "af_netlink.h"
+ 
+-#ifdef CONFIG_NETLINK_MMAP
+-static int sk_diag_put_ring(struct netlink_ring *ring, int nl_type,
+-                          struct sk_buff *nlskb)
+-{
+-      struct netlink_diag_ring ndr;
+-
+-      ndr.ndr_block_size = ring->pg_vec_pages << PAGE_SHIFT;
+-      ndr.ndr_block_nr   = ring->pg_vec_len;
+-      ndr.ndr_frame_size = ring->frame_size;
+-      ndr.ndr_frame_nr   = ring->frame_max + 1;
+-
+-      return nla_put(nlskb, nl_type, sizeof(ndr), &ndr);
+-}
+-
+-static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb)
+-{
+-      struct netlink_sock *nlk = nlk_sk(sk);
+-      int ret;
+-
+-      mutex_lock(&nlk->pg_vec_lock);
+-      ret = sk_diag_put_ring(&nlk->rx_ring, NETLINK_DIAG_RX_RING, nlskb);
+-      if (!ret)
+-              ret = sk_diag_put_ring(&nlk->tx_ring, NETLINK_DIAG_TX_RING,
+-                                     nlskb);
+-      mutex_unlock(&nlk->pg_vec_lock);
+-
+-      return ret;
+-}
+-#else
+-static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb)
+-{
+-      return 0;
+-}
+-#endif
+-
+ static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb)
+ {
+       struct netlink_sock *nlk = nlk_sk(sk);
+@@ -87,10 +52,6 @@ static int sk_diag_fill(struct sock *sk,
+           sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO))
+               goto out_nlmsg_trim;
+ 
+-      if ((req->ndiag_show & NDIAG_SHOW_RING_CFG) &&
+-          sk_diag_put_rings_cfg(sk, skb))
+-              goto out_nlmsg_trim;
+-
+       return nlmsg_end(skb, nlh);
+ 
+ out_nlmsg_trim:
diff --git a/queue-3.18/series b/queue-3.18/series

index 57e65b69a500b1d905b99094063523b8c45cae75..09bfe83dd12e0991540db13556c9aba96a9404f1 100644 (file)
--- a/queue-3.18/series
+++ b/queue-3.18/series
@@ -94,3 +94,16 @@ mvsas-fix-misleading-indentation.patch
  dm-flush-queued-bios-when-process-blocks-to-avoid-deadlock.patch
  padata-avoid-race-in-reordering.patch
  samples-move-mic-mpssd-example-code-from-documentation.patch
+drm-ast-fix-test-for-vga-enabled.patch
+drm-ast-call-open_key-before-enable_mmio-in-post-code.patch
+drm-ast-fix-ast2400-post-failure-without-bmc-fw-or-vbios.patch
+drm-ttm-make-sure-bos-being-swapped-out-are-cacheable.patch
+cpmac-remove-hopeless-warning.patch
+tracing-add-undef-to-fix-compile-error.patch
+netlink-remove-mmapped-netlink-support.patch
+vxlan-correctly-validate-vxlan-id-against-vxlan_n_vid.patch
+vti6-return-gre_key-for-vti6.patch
+ipv4-mask-tos-for-input-route.patch
+l2tp-avoid-use-after-free-caused-by-l2tp_ip_backlog_recv.patch
+net-don-t-call-strlen-on-the-user-buffer-in-packet_bind_spkt.patch
+dccp-unlock-sock-before-calling-sk_free.patch
diff --git a/queue-3.18/tracing-add-undef-to-fix-compile-error.patch b/queue-3.18/tracing-add-undef-to-fix-compile-error.patch

new file mode 100644 (file)

index 0000000..9491407
--- /dev/null
+++ b/queue-3.18/tracing-add-undef-to-fix-compile-error.patch
@@ -0,0 +1,39 @@
+From bf7165cfa23695c51998231c4efa080fe1d3548d Mon Sep 17 00:00:00 2001
+From: Rik van Riel <riel@redhat.com>
+Date: Wed, 28 Sep 2016 22:55:54 -0400
+Subject: tracing: Add #undef to fix compile error
+
+From: Rik van Riel <riel@redhat.com>
+
+commit bf7165cfa23695c51998231c4efa080fe1d3548d upstream.
+
+There are several trace include files that define TRACE_INCLUDE_FILE.
+
+Include several of them in the same .c file (as I currently have in
+some code I am working on), and the compile will blow up with a
+"warning: "TRACE_INCLUDE_FILE" redefined #define TRACE_INCLUDE_FILE syscalls"
+
+Every other include file in include/trace/events/ avoids that issue
+by having a #undef TRACE_INCLUDE_FILE before the #define; syscalls.h
+should have one, too.
+
+Link: http://lkml.kernel.org/r/20160928225554.13bd7ac6@annuminas.surriel.com
+
+Fixes: b8007ef74222 ("tracing: Separate raw syscall from syscall tracer")
+Signed-off-by: Rik van Riel <riel@redhat.com>
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/trace/events/syscalls.h |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/include/trace/events/syscalls.h
++++ b/include/trace/events/syscalls.h
+@@ -1,5 +1,6 @@
+ #undef TRACE_SYSTEM
+ #define TRACE_SYSTEM raw_syscalls
++#undef TRACE_INCLUDE_FILE
+ #define TRACE_INCLUDE_FILE syscalls
+ 
+ #if !defined(_TRACE_EVENTS_SYSCALLS_H) || defined(TRACE_HEADER_MULTI_READ)
diff --git a/queue-3.18/vti6-return-gre_key-for-vti6.patch b/queue-3.18/vti6-return-gre_key-for-vti6.patch

new file mode 100644 (file)

index 0000000..fb52f4a
--- /dev/null
+++ b/queue-3.18/vti6-return-gre_key-for-vti6.patch
@@ -0,0 +1,33 @@
+From 7dcdf941cdc96692ab99fd790c8cc68945514851 Mon Sep 17 00:00:00 2001
+From: David Forster <dforster@brocade.com>
+Date: Fri, 24 Feb 2017 14:20:32 +0000
+Subject: vti6: return GRE_KEY for vti6
+
+From: David Forster <dforster@brocade.com>
+
+commit 7dcdf941cdc96692ab99fd790c8cc68945514851 upstream.
+
+Align vti6 with vti by returning GRE_KEY flag. This enables iproute2
+to display tunnel keys on "ip -6 tunnel show"
+
+Signed-off-by: David Forster <dforster@brocade.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/ipv6/ip6_vti.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/net/ipv6/ip6_vti.c
++++ b/net/ipv6/ip6_vti.c
+@@ -645,6 +645,10 @@ vti6_parm_to_user(struct ip6_tnl_parm2 *
+       u->link = p->link;
+       u->i_key = p->i_key;
+       u->o_key = p->o_key;
++      if (u->i_key)
++              u->i_flags |= GRE_KEY;
++      if (u->o_key)
++              u->o_flags |= GRE_KEY;
+       u->proto = p->proto;
+ 
+       memcpy(u->name, p->name, sizeof(u->name));
diff --git a/queue-3.18/vxlan-correctly-validate-vxlan-id-against-vxlan_n_vid.patch b/queue-3.18/vxlan-correctly-validate-vxlan-id-against-vxlan_n_vid.patch

new file mode 100644 (file)

index 0000000..553871c
--- /dev/null
+++ b/queue-3.18/vxlan-correctly-validate-vxlan-id-against-vxlan_n_vid.patch
@@ -0,0 +1,33 @@
+From 4e37d6911f36545b286d15073f6f2222f840e81c Mon Sep 17 00:00:00 2001
+From: Matthias Schiffer <mschiffer@universe-factory.net>
+Date: Thu, 23 Feb 2017 17:19:41 +0100
+Subject: vxlan: correctly validate VXLAN ID against VXLAN_N_VID
+
+From: Matthias Schiffer <mschiffer@universe-factory.net>
+
+commit 4e37d6911f36545b286d15073f6f2222f840e81c upstream.
+
+The incorrect check caused an off-by-one error: the maximum VID 0xffffff
+was unusable.
+
+Fixes: d342894c5d2f ("vxlan: virtual extensible lan")
+Signed-off-by: Matthias Schiffer <mschiffer@universe-factory.net>
+Acked-by: Jiri Benc <jbenc@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/vxlan.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/vxlan.c
++++ b/drivers/net/vxlan.c
+@@ -2260,7 +2260,7 @@ static int vxlan_validate(struct nlattr
+ 
+       if (data[IFLA_VXLAN_ID]) {
+               __u32 id = nla_get_u32(data[IFLA_VXLAN_ID]);
+-              if (id >= VXLAN_VID_MASK)
++              if (id >= VXLAN_N_VID)
+                       return -ERANGE;
+       }
+
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 11 Apr 2017 18:45:45 +0000 (20:45 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 11 Apr 2017 18:45:45 +0000 (20:45 +0200)
queue-3.18/cpmac-remove-hopeless-warning.patch	[new file with mode: 0644]	patch \| blob
queue-3.18/dccp-unlock-sock-before-calling-sk_free.patch	[new file with mode: 0644]	patch \| blob
queue-3.18/drm-ast-call-open_key-before-enable_mmio-in-post-code.patch	[new file with mode: 0644]	patch \| blob
queue-3.18/drm-ast-fix-ast2400-post-failure-without-bmc-fw-or-vbios.patch	[new file with mode: 0644]	patch \| blob
queue-3.18/drm-ast-fix-test-for-vga-enabled.patch	[new file with mode: 0644]	patch \| blob
queue-3.18/drm-ttm-make-sure-bos-being-swapped-out-are-cacheable.patch	[new file with mode: 0644]	patch \| blob
queue-3.18/ipv4-mask-tos-for-input-route.patch	[new file with mode: 0644]	patch \| blob
queue-3.18/l2tp-avoid-use-after-free-caused-by-l2tp_ip_backlog_recv.patch	[new file with mode: 0644]	patch \| blob
queue-3.18/mbox_todo		patch \| blob \| blame \| history
queue-3.18/net-don-t-call-strlen-on-the-user-buffer-in-packet_bind_spkt.patch	[new file with mode: 0644]	patch \| blob
queue-3.18/netlink-remove-mmapped-netlink-support.patch	[new file with mode: 0644]	patch \| blob
queue-3.18/series		patch \| blob \| blame \| history
queue-3.18/tracing-add-undef-to-fix-compile-error.patch	[new file with mode: 0644]	patch \| blob
queue-3.18/vti6-return-gre_key-for-vti6.patch	[new file with mode: 0644]	patch \| blob
queue-3.18/vxlan-correctly-validate-vxlan-id-against-vxlan_n_vid.patch	[new file with mode: 0644]	patch \| blob