--
2.12.2
-From 944690cdb5f48d03842365b7359fe090d6c2b1fa Mon Sep 17 00:00:00 2001
-From: Bart Van Assche <bart.vanassche@sandisk.com>
-Date: Tue, 14 Feb 2017 10:56:30 -0800
-Subject: [PATCH 016/251] IB/srp: Avoid that duplicate responses trigger a
- kernel bug
-Content-Length: 1979
-Lines: 54
-
-commit 6cb72bc1b40bb2c1750ee7a5ebade93bed49a5fb upstream.
-
-After srp_process_rsp() returns there is a short time during which
-the scsi_host_find_tag() call will return a pointer to the SCSI
-command that is being completed. If during that time a duplicate
-response is received, avoid that the following call stack appears:
-
-BUG: unable to handle kernel NULL pointer dereference at (null)
-IP: srp_recv_done+0x450/0x6b0 [ib_srp]
-Oops: 0000 [#1] SMP
-CPU: 10 PID: 0 Comm: swapper/10 Not tainted 4.10.0-rc7-dbg+ #1
-Call Trace:
- <IRQ>
- __ib_process_cq+0x4b/0xd0 [ib_core]
- ib_poll_handler+0x1d/0x70 [ib_core]
- irq_poll_softirq+0xba/0x120
- __do_softirq+0xba/0x4c0
- irq_exit+0xbe/0xd0
- smp_apic_timer_interrupt+0x38/0x50
- apic_timer_interrupt+0x90/0xa0
- </IRQ>
-RIP: srp_recv_done+0x450/0x6b0 [ib_srp] RSP: ffff88046f483e20
-
-Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
-Cc: Israel Rukshin <israelr@mellanox.com>
-Cc: Max Gurtovoy <maxg@mellanox.com>
-Cc: Laurence Oberman <loberman@redhat.com>
-Cc: Steve Feeley <Steve.Feeley@sandisk.com>
-Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
-Signed-off-by: Doug Ledford <dledford@redhat.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/infiniband/ulp/srp/ib_srp.c | 4 +++-
- 1 file changed, 3 insertions(+), 1 deletion(-)
-
-diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
-index 5f0f4fc58f43..57a8a200e741 100644
---- a/drivers/infiniband/ulp/srp/ib_srp.c
-+++ b/drivers/infiniband/ulp/srp/ib_srp.c
-@@ -1795,9 +1795,11 @@ static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
- complete(&ch->tsk_mgmt_done);
- } else {
- scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
-- if (scmnd) {
-+ if (scmnd && scmnd->host_scribble) {
- req = (void *)scmnd->host_scribble;
- scmnd = srp_claim_req(ch, req, NULL, scmnd);
-+ } else {
-+ scmnd = NULL;
- }
- if (!scmnd) {
- shost_printk(KERN_ERR, target->scsi_host,
---
-2.12.2
-
-From 696255449b89af5487bce53b1a65eddedc72aeff Mon Sep 17 00:00:00 2001
-From: Bart Van Assche <bart.vanassche@sandisk.com>
-Date: Tue, 14 Feb 2017 10:56:31 -0800
-Subject: [PATCH 017/251] IB/srp: Fix race conditions related to task
- management
-Content-Length: 5896
-Lines: 169
-
-commit 0a6fdbdeb1c25e31763c1fb333fa2723a7d2aba6 upstream.
-
-Avoid that srp_process_rsp() overwrites the status information
-in ch if the SRP target response timed out and processing of
-another task management function has already started. Avoid that
-issuing multiple task management functions concurrently triggers
-list corruption. This patch prevents that the following stack
-trace appears in the system log:
-
-WARNING: CPU: 8 PID: 9269 at lib/list_debug.c:52 __list_del_entry_valid+0xbc/0xc0
-list_del corruption. prev->next should be ffffc90004bb7b00, but was ffff8804052ecc68
-CPU: 8 PID: 9269 Comm: sg_reset Tainted: G W 4.10.0-rc7-dbg+ #3
-Call Trace:
- dump_stack+0x68/0x93
- __warn+0xc6/0xe0
- warn_slowpath_fmt+0x4a/0x50
- __list_del_entry_valid+0xbc/0xc0
- wait_for_completion_timeout+0x12e/0x170
- srp_send_tsk_mgmt+0x1ef/0x2d0 [ib_srp]
- srp_reset_device+0x5b/0x110 [ib_srp]
- scsi_ioctl_reset+0x1c7/0x290
- scsi_ioctl+0x12a/0x420
- sd_ioctl+0x9d/0x100
- blkdev_ioctl+0x51e/0x9f0
- block_ioctl+0x38/0x40
- do_vfs_ioctl+0x8f/0x700
- SyS_ioctl+0x3c/0x70
- entry_SYSCALL_64_fastpath+0x18/0xad
-
-Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
-Cc: Israel Rukshin <israelr@mellanox.com>
-Cc: Max Gurtovoy <maxg@mellanox.com>
-Cc: Laurence Oberman <loberman@redhat.com>
-Cc: Steve Feeley <Steve.Feeley@sandisk.com>
-Signed-off-by: Doug Ledford <dledford@redhat.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/infiniband/ulp/srp/ib_srp.c | 45 ++++++++++++++++++++++++-------------
- drivers/infiniband/ulp/srp/ib_srp.h | 1 +
- 2 files changed, 30 insertions(+), 16 deletions(-)
-
-diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
-index 57a8a200e741..e397f1b0af09 100644
---- a/drivers/infiniband/ulp/srp/ib_srp.c
-+++ b/drivers/infiniband/ulp/srp/ib_srp.c
-@@ -1787,12 +1787,17 @@ static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
- if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
- spin_lock_irqsave(&ch->lock, flags);
- ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
-+ if (rsp->tag == ch->tsk_mgmt_tag) {
-+ ch->tsk_mgmt_status = -1;
-+ if (be32_to_cpu(rsp->resp_data_len) >= 4)
-+ ch->tsk_mgmt_status = rsp->data[3];
-+ complete(&ch->tsk_mgmt_done);
-+ } else {
-+ shost_printk(KERN_ERR, target->scsi_host,
-+ "Received tsk mgmt response too late for tag %#llx\n",
-+ rsp->tag);
-+ }
- spin_unlock_irqrestore(&ch->lock, flags);
--
-- ch->tsk_mgmt_status = -1;
-- if (be32_to_cpu(rsp->resp_data_len) >= 4)
-- ch->tsk_mgmt_status = rsp->data[3];
-- complete(&ch->tsk_mgmt_done);
- } else {
- scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
- if (scmnd && scmnd->host_scribble) {
-@@ -2471,19 +2476,18 @@ srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
- }
-
- static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
-- u8 func)
-+ u8 func, u8 *status)
- {
- struct srp_target_port *target = ch->target;
- struct srp_rport *rport = target->rport;
- struct ib_device *dev = target->srp_host->srp_dev->dev;
- struct srp_iu *iu;
- struct srp_tsk_mgmt *tsk_mgmt;
-+ int res;
-
- if (!ch->connected || target->qp_in_error)
- return -1;
-
-- init_completion(&ch->tsk_mgmt_done);
--
- /*
- * Lock the rport mutex to avoid that srp_create_ch_ib() is
- * invoked while a task management function is being sent.
-@@ -2506,10 +2510,16 @@ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
-
- tsk_mgmt->opcode = SRP_TSK_MGMT;
- int_to_scsilun(lun, &tsk_mgmt->lun);
-- tsk_mgmt->tag = req_tag | SRP_TAG_TSK_MGMT;
- tsk_mgmt->tsk_mgmt_func = func;
- tsk_mgmt->task_tag = req_tag;
-
-+ spin_lock_irq(&ch->lock);
-+ ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT;
-+ tsk_mgmt->tag = ch->tsk_mgmt_tag;
-+ spin_unlock_irq(&ch->lock);
-+
-+ init_completion(&ch->tsk_mgmt_done);
-+
- ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
- DMA_TO_DEVICE);
- if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
-@@ -2518,13 +2528,15 @@ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
-
- return -1;
- }
-+ res = wait_for_completion_timeout(&ch->tsk_mgmt_done,
-+ msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS));
-+ if (res > 0 && status)
-+ *status = ch->tsk_mgmt_status;
- mutex_unlock(&rport->mutex);
-
-- if (!wait_for_completion_timeout(&ch->tsk_mgmt_done,
-- msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
-- return -1;
-+ WARN_ON_ONCE(res < 0);
-
-- return 0;
-+ return res > 0 ? 0 : -1;
- }
-
- static int srp_abort(struct scsi_cmnd *scmnd)
-@@ -2550,7 +2562,7 @@ static int srp_abort(struct scsi_cmnd *scmnd)
- shost_printk(KERN_ERR, target->scsi_host,
- "Sending SRP abort for tag %#x\n", tag);
- if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
-- SRP_TSK_ABORT_TASK) == 0)
-+ SRP_TSK_ABORT_TASK, NULL) == 0)
- ret = SUCCESS;
- else if (target->rport->state == SRP_RPORT_LOST)
- ret = FAST_IO_FAIL;
-@@ -2568,14 +2580,15 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)
- struct srp_target_port *target = host_to_target(scmnd->device->host);
- struct srp_rdma_ch *ch;
- int i;
-+ u8 status;
-
- shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
-
- ch = &target->ch[0];
- if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
-- SRP_TSK_LUN_RESET))
-+ SRP_TSK_LUN_RESET, &status))
- return FAILED;
-- if (ch->tsk_mgmt_status)
-+ if (status)
- return FAILED;
-
- for (i = 0; i < target->ch_count; i++) {
-diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
-index f6af531f9f32..109eea94d0f9 100644
---- a/drivers/infiniband/ulp/srp/ib_srp.h
-+++ b/drivers/infiniband/ulp/srp/ib_srp.h
-@@ -168,6 +168,7 @@ struct srp_rdma_ch {
- int max_ti_iu_len;
- int comp_vector;
-
-+ u64 tsk_mgmt_tag;
- struct completion tsk_mgmt_done;
- u8 tsk_mgmt_status;
- bool connected;
---
-2.12.2
-
-From ca739e3fd7dc803d526ea5bb9b80c0d07fbca55f Mon Sep 17 00:00:00 2001
-From: Nicholas Bellinger <nab@linux-iscsi.org>
-Date: Wed, 22 Feb 2017 22:06:32 -0800
-Subject: [PATCH 020/251] target: Fix NULL dereference during LUN lookup +
- active I/O shutdown
-Content-Length: 6768
-Lines: 191
-
-commit bd4e2d2907fa23a11d46217064ecf80470ddae10 upstream.
-
-When transport_clear_lun_ref() is shutting down a se_lun via
-configfs with new I/O in-flight, it's possible to trigger a
-NULL pointer dereference in transport_lookup_cmd_lun() due
-to the fact percpu_ref_get() doesn't do any __PERCPU_REF_DEAD
-checking before incrementing lun->lun_ref.count after
-lun->lun_ref has switched to atomic_t mode.
-
-This results in a NULL pointer dereference as LUN shutdown
-code in core_tpg_remove_lun() continues running after the
-existing ->release() -> core_tpg_lun_ref_release() callback
-completes, and clears the RCU protected se_lun->lun_se_dev
-pointer.
-
-During the OOPs, the state of lun->lun_ref in the process
-which triggered the NULL pointer dereference looks like
-the following on v4.1.y stable code:
-
-struct se_lun {
- lun_link_magic = 4294932337,
- lun_status = TRANSPORT_LUN_STATUS_FREE,
-
- .....
-
- lun_se_dev = 0x0,
- lun_sep = 0x0,
-
- .....
-
- lun_ref = {
- count = {
- counter = 1
- },
- percpu_count_ptr = 3,
- release = 0xffffffffa02fa1e0 <core_tpg_lun_ref_release>,
- confirm_switch = 0x0,
- force_atomic = false,
- rcu = {
- next = 0xffff88154fa1a5d0,
- func = 0xffffffff8137c4c0 <percpu_ref_switch_to_atomic_rcu>
- }
- }
-}
-
-To address this bug, use percpu_ref_tryget_live() to ensure
-once __PERCPU_REF_DEAD is visable on all CPUs and ->lun_ref
-has switched to atomic_t, all new I/Os will fail to obtain
-a new lun->lun_ref reference.
-
-Also use an explicit percpu_ref_kill_and_confirm() callback
-to block on ->lun_ref_comp to allow the first stage and
-associated RCU grace period to complete, and then block on
-->lun_ref_shutdown waiting for the final percpu_ref_put()
-to drop the last reference via transport_lun_remove_cmd()
-before continuing with core_tpg_remove_lun() shutdown.
-
-Reported-by: Rob Millner <rlm@daterainc.com>
-Tested-by: Rob Millner <rlm@daterainc.com>
-Cc: Rob Millner <rlm@daterainc.com>
-Tested-by: Vaibhav Tandon <vst@datera.io>
-Cc: Vaibhav Tandon <vst@datera.io>
-Tested-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
-Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/target/target_core_device.c | 10 ++++++++--
- drivers/target/target_core_tpg.c | 3 ++-
- drivers/target/target_core_transport.c | 31 ++++++++++++++++++++++++++++++-
- include/target/target_core_base.h | 1 +
- 4 files changed, 41 insertions(+), 4 deletions(-)
-
-diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
-index 356c80fbb304..bb6a6c35324a 100644
---- a/drivers/target/target_core_device.c
-+++ b/drivers/target/target_core_device.c
-@@ -77,12 +77,16 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u64 unpacked_lun)
- &deve->read_bytes);
-
- se_lun = rcu_dereference(deve->se_lun);
-+
-+ if (!percpu_ref_tryget_live(&se_lun->lun_ref)) {
-+ se_lun = NULL;
-+ goto out_unlock;
-+ }
-+
- se_cmd->se_lun = rcu_dereference(deve->se_lun);
- se_cmd->pr_res_key = deve->pr_res_key;
- se_cmd->orig_fe_lun = unpacked_lun;
- se_cmd->se_cmd_flags |= SCF_SE_LUN_CMD;
--
-- percpu_ref_get(&se_lun->lun_ref);
- se_cmd->lun_ref_active = true;
-
- if ((se_cmd->data_direction == DMA_TO_DEVICE) &&
-@@ -96,6 +100,7 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u64 unpacked_lun)
- goto ref_dev;
- }
- }
-+out_unlock:
- rcu_read_unlock();
-
- if (!se_lun) {
-@@ -826,6 +831,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
- xcopy_lun = &dev->xcopy_lun;
- rcu_assign_pointer(xcopy_lun->lun_se_dev, dev);
- init_completion(&xcopy_lun->lun_ref_comp);
-+ init_completion(&xcopy_lun->lun_shutdown_comp);
- INIT_LIST_HEAD(&xcopy_lun->lun_deve_list);
- INIT_LIST_HEAD(&xcopy_lun->lun_dev_link);
- mutex_init(&xcopy_lun->lun_tg_pt_md_mutex);
-diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
-index 028854cda97b..2794c6ec5c3c 100644
---- a/drivers/target/target_core_tpg.c
-+++ b/drivers/target/target_core_tpg.c
-@@ -539,7 +539,7 @@ static void core_tpg_lun_ref_release(struct percpu_ref *ref)
- {
- struct se_lun *lun = container_of(ref, struct se_lun, lun_ref);
-
-- complete(&lun->lun_ref_comp);
-+ complete(&lun->lun_shutdown_comp);
- }
-
- int core_tpg_register(
-@@ -666,6 +666,7 @@ struct se_lun *core_tpg_alloc_lun(
- lun->lun_link_magic = SE_LUN_LINK_MAGIC;
- atomic_set(&lun->lun_acl_count, 0);
- init_completion(&lun->lun_ref_comp);
-+ init_completion(&lun->lun_shutdown_comp);
- INIT_LIST_HEAD(&lun->lun_deve_list);
- INIT_LIST_HEAD(&lun->lun_dev_link);
- atomic_set(&lun->lun_tg_pt_secondary_offline, 0);
-diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
-index befe22744802..df2059984e14 100644
---- a/drivers/target/target_core_transport.c
-+++ b/drivers/target/target_core_transport.c
-@@ -2680,10 +2680,39 @@ void target_wait_for_sess_cmds(struct se_session *se_sess)
- }
- EXPORT_SYMBOL(target_wait_for_sess_cmds);
-
-+static void target_lun_confirm(struct percpu_ref *ref)
-+{
-+ struct se_lun *lun = container_of(ref, struct se_lun, lun_ref);
-+
-+ complete(&lun->lun_ref_comp);
-+}
-+
- void transport_clear_lun_ref(struct se_lun *lun)
- {
-- percpu_ref_kill(&lun->lun_ref);
-+ /*
-+ * Mark the percpu-ref as DEAD, switch to atomic_t mode, drop
-+ * the initial reference and schedule confirm kill to be
-+ * executed after one full RCU grace period has completed.
-+ */
-+ percpu_ref_kill_and_confirm(&lun->lun_ref, target_lun_confirm);
-+ /*
-+ * The first completion waits for percpu_ref_switch_to_atomic_rcu()
-+ * to call target_lun_confirm after lun->lun_ref has been marked
-+ * as __PERCPU_REF_DEAD on all CPUs, and switches to atomic_t
-+ * mode so that percpu_ref_tryget_live() lookup of lun->lun_ref
-+ * fails for all new incoming I/O.
-+ */
- wait_for_completion(&lun->lun_ref_comp);
-+ /*
-+ * The second completion waits for percpu_ref_put_many() to
-+ * invoke ->release() after lun->lun_ref has switched to
-+ * atomic_t mode, and lun->lun_ref.count has reached zero.
-+ *
-+ * At this point all target-core lun->lun_ref references have
-+ * been dropped via transport_lun_remove_cmd(), and it's safe
-+ * to proceed with the remaining LUN shutdown.
-+ */
-+ wait_for_completion(&lun->lun_shutdown_comp);
- }
-
- static bool
-diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
-index 800fe16cc36f..ed66414b91f0 100644
---- a/include/target/target_core_base.h
-+++ b/include/target/target_core_base.h
-@@ -740,6 +740,7 @@ struct se_lun {
- struct config_group lun_group;
- struct se_port_stat_grps port_stat_grps;
- struct completion lun_ref_comp;
-+ struct completion lun_shutdown_comp;
- struct percpu_ref lun_ref;
- struct list_head lun_dev_link;
- struct hlist_node link;
---
-2.12.2
-
-From 0d80ac62b609bce00b78a656b7cdde2d8f587345 Mon Sep 17 00:00:00 2001
-From: Alex Deucher <alexander.deucher@amd.com>
-Date: Fri, 10 Feb 2017 00:00:52 -0500
-Subject: [PATCH 025/251] drm/amdgpu: add more cases to DCE11 possible crtc
- mask setup
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-Content-Length: 1129
-Lines: 38
-
-commit 4ce3bd45b351633f2a0512c587f7fcba2ce044e8 upstream.
-
-Add cases for asics with 3 and 5 crtcs. Fixes an artificial
-limitation on asics with 3 or 5 crtcs.
-
-Fixes:
-https://bugs.freedesktop.org/show_bug.cgi?id=99744
-
-Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
-Reviewed-by: Christian König <christian.koenig@amd.com>
-Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | 6 ++++++
- 1 file changed, 6 insertions(+)
-
-diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
-index c161eeda417b..267749a94c5a 100644
---- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
-+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
-@@ -3704,9 +3704,15 @@ static void dce_v11_0_encoder_add(struct amdgpu_device *adev,
- default:
- encoder->possible_crtcs = 0x3;
- break;
-+ case 3:
-+ encoder->possible_crtcs = 0x7;
-+ break;
- case 4:
- encoder->possible_crtcs = 0xf;
- break;
-+ case 5:
-+ encoder->possible_crtcs = 0x1f;
-+ break;
- case 6:
- encoder->possible_crtcs = 0x3f;
- break;
---
-2.12.2
-
-From 8b787652386e26c7974092f11bd477126b0d53ce Mon Sep 17 00:00:00 2001
-From: "Y.C. Chen" <yc_chen@aspeedtech.com>
-Date: Wed, 22 Feb 2017 15:10:50 +1100
-Subject: [PATCH 026/251] drm/ast: Fix test for VGA enabled
-Content-Length: 1240
-Lines: 38
-
-commit 905f21a49d388de3e99438235f3301cabf0c0ef4 upstream.
-
-The test to see if VGA was already enabled is doing an unnecessary
-second test from a register that may or may not have been initialized
-to a valid value. Remove it.
-
-Signed-off-by: Y.C. Chen <yc_chen@aspeedtech.com>
-Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-Acked-by: Joel Stanley <joel@jms.id.au>
-Tested-by: Y.C. Chen <yc_chen@aspeedtech.com>
-Signed-off-by: Dave Airlie <airlied@redhat.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/gpu/drm/ast/ast_post.c | 8 ++------
- 1 file changed, 2 insertions(+), 6 deletions(-)
-
-diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c
-index 810c51d92b99..4e8aaab5dd52 100644
---- a/drivers/gpu/drm/ast/ast_post.c
-+++ b/drivers/gpu/drm/ast/ast_post.c
-@@ -58,13 +58,9 @@ bool ast_is_vga_enabled(struct drm_device *dev)
- /* TODO 1180 */
- } else {
- ch = ast_io_read8(ast, AST_IO_VGA_ENABLE_PORT);
-- if (ch) {
-- ast_open_key(ast);
-- ch = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb6, 0xff);
-- return ch & 0x04;
-- }
-+ return !!(ch & 0x01);
- }
-- return 0;
-+ return false;
- }
-
- static const u8 extreginfo[] = { 0x0f, 0x04, 0x1c, 0xff };
---
-2.12.2
-
-From 93eab4f5259485e9cad0339a298b6da1dd2e6e40 Mon Sep 17 00:00:00 2001
-From: "Y.C. Chen" <yc_chen@aspeedtech.com>
-Date: Wed, 22 Feb 2017 15:14:19 +1100
-Subject: [PATCH 027/251] drm/ast: Call open_key before enable_mmio in POST
- code
-Content-Length: 1014
-Lines: 30
-
-commit 9bb92f51558f2ef5f56c257bdcea0588f31d857e upstream.
-
-open_key enables access the registers used by enable_mmio
-
-Signed-off-by: Y.C. Chen <yc_chen@aspeedtech.com>
-Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-Acked-by: Joel Stanley <joel@jms.id.au>
-Tested-by: Y.C. Chen <yc_chen@aspeedtech.com>
-Signed-off-by: Dave Airlie <airlied@redhat.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/gpu/drm/ast/ast_post.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c
-index 4e8aaab5dd52..50836e549867 100644
---- a/drivers/gpu/drm/ast/ast_post.c
-+++ b/drivers/gpu/drm/ast/ast_post.c
-@@ -371,8 +371,8 @@ void ast_post_gpu(struct drm_device *dev)
- pci_write_config_dword(ast->dev->pdev, 0x04, reg);
-
- ast_enable_vga(dev);
-- ast_enable_mmio(dev);
- ast_open_key(ast);
-+ ast_enable_mmio(dev);
- ast_set_def_ext_reg(dev);
-
- if (ast->chip == AST2300 || ast->chip == AST2400)
---
-2.12.2
-
-From b9cfd5517b309513e50d80b89eaae98a82a2c3b1 Mon Sep 17 00:00:00 2001
-From: "Y.C. Chen" <yc_chen@aspeedtech.com>
-Date: Thu, 23 Feb 2017 15:52:33 +0800
-Subject: [PATCH 028/251] drm/ast: Fix AST2400 POST failure without BMC FW or
- VBIOS
-Content-Length: 2034
-Lines: 70
-
-commit 3856081eede297b617560b85e948cfb00bb395ec upstream.
-
-The current POST code for the AST2300/2400 family doesn't work properly
-if the chip hasn't been initialized previously by either the BMC own FW
-or the VBIOS. This fixes it.
-
-Signed-off-by: Y.C. Chen <yc_chen@aspeedtech.com>
-Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-Tested-by: Y.C. Chen <yc_chen@aspeedtech.com>
-Acked-by: Joel Stanley <joel@jms.id.au>
-Signed-off-by: Dave Airlie <airlied@redhat.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/gpu/drm/ast/ast_post.c | 38 +++++++++++++++++++++++++++++++++++---
- 1 file changed, 35 insertions(+), 3 deletions(-)
-
-diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c
-index 50836e549867..30672a3df8a9 100644
---- a/drivers/gpu/drm/ast/ast_post.c
-+++ b/drivers/gpu/drm/ast/ast_post.c
-@@ -1626,12 +1626,44 @@ static void ast_init_dram_2300(struct drm_device *dev)
- temp |= 0x73;
- ast_write32(ast, 0x12008, temp);
-
-+ param.dram_freq = 396;
- param.dram_type = AST_DDR3;
-+ temp = ast_mindwm(ast, 0x1e6e2070);
- if (temp & 0x01000000)
- param.dram_type = AST_DDR2;
-- param.dram_chipid = ast->dram_type;
-- param.dram_freq = ast->mclk;
-- param.vram_size = ast->vram_size;
-+ switch (temp & 0x18000000) {
-+ case 0:
-+ param.dram_chipid = AST_DRAM_512Mx16;
-+ break;
-+ default:
-+ case 0x08000000:
-+ param.dram_chipid = AST_DRAM_1Gx16;
-+ break;
-+ case 0x10000000:
-+ param.dram_chipid = AST_DRAM_2Gx16;
-+ break;
-+ case 0x18000000:
-+ param.dram_chipid = AST_DRAM_4Gx16;
-+ break;
-+ }
-+ switch (temp & 0x0c) {
-+ default:
-+ case 0x00:
-+ param.vram_size = AST_VIDMEM_SIZE_8M;
-+ break;
-+
-+ case 0x04:
-+ param.vram_size = AST_VIDMEM_SIZE_16M;
-+ break;
-+
-+ case 0x08:
-+ param.vram_size = AST_VIDMEM_SIZE_32M;
-+ break;
-+
-+ case 0x0c:
-+ param.vram_size = AST_VIDMEM_SIZE_64M;
-+ break;
-+ }
-
- if (param.dram_type == AST_DDR3) {
- get_ddr3_info(ast, ¶m);
---
-2.12.2
-
-From 36fd36b900b9382af54a1e49a81cd99663b83eda Mon Sep 17 00:00:00 2001
-From: Tomeu Vizoso <tomeu.vizoso@collabora.com>
-Date: Mon, 20 Feb 2017 16:25:45 +0100
-Subject: [PATCH 029/251] drm/edid: Add EDID_QUIRK_FORCE_8BPC quirk for Rotel
- RSX-1058
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-Content-Length: 1596
-Lines: 44
-
-commit 36fc579761b50784b63dafd0f2e796b659e0f5ee upstream.
-
-Rotel RSX-1058 is a receiver with 4 HDMI inputs and a HDMI output, all
-1.1.
-
-When a sink that supports deep color is connected to the output, the
-receiver will send EDIDs that advertise this capability, even if it
-isn't possible with HDMI versions earlier than 1.3.
-
-Currently the kernel is assuming that deep color is possible and the
-sink displays an error.
-
-This quirk will make sure that deep color isn't used with this
-particular receiver.
-
-Fixes: 7a0baa623446 ("Revert "drm/i915: Disable 12bpc hdmi for now"")
-Signed-off-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
-Link: http://patchwork.freedesktop.org/patch/msgid/20170220152545.13153-1-tomeu.vizoso@collabora.com
-Cc: Matt Horan <matt@matthoran.com>
-Tested-by: Matt Horan <matt@matthoran.com>
-Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99869
-Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
-Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/gpu/drm/drm_edid.c | 3 +++
- 1 file changed, 3 insertions(+)
-
-diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
-index 8c9ac021608f..cc1e16fd7e76 100644
---- a/drivers/gpu/drm/drm_edid.c
-+++ b/drivers/gpu/drm/drm_edid.c
-@@ -144,6 +144,9 @@ static struct edid_quirk {
-
- /* Panel in Samsung NP700G7A-S01PL notebook reports 6bpc */
- { "SEC", 0xd033, EDID_QUIRK_FORCE_8BPC },
-+
-+ /* Rotel RSX-1058 forwards sink's EDID but only does HDMI 1.1*/
-+ { "ETR", 13896, EDID_QUIRK_FORCE_8BPC },
- };
-
- /*
---
-2.12.2
-
-From 59fc34fc69066bfabf8bed21f4ce5bf312e68bb3 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer@amd.com>
-Date: Wed, 25 Jan 2017 17:21:31 +0900
-Subject: [PATCH 030/251] drm/ttm: Make sure BOs being swapped out are
- cacheable
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-Content-Length: 1580
-Lines: 44
-
-commit 239ac65fa5ffab71adf66e642750f940e7241d99 upstream.
-
-The current caching state may not be tt_cached, even though the
-placement contains TTM_PL_FLAG_CACHED, because placement can contain
-multiple caching flags. Trying to swap out such a BO would trip up the
-
- BUG_ON(ttm->caching_state != tt_cached);
-
-in ttm_tt_swapout.
-
-Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
-Reviewed-by: Thomas Hellstrom <thellstrom@vmware.com>
-Reviewed-by: Christian König <christian.koenig@amd.com>.
-Reviewed-by: Sinclair Yeh <syeh@vmware.com>
-Signed-off-by: Christian König <christian.koenig@amd.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/gpu/drm/ttm/ttm_bo.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
-index 4ae8b56b1847..037c38bb5333 100644
---- a/drivers/gpu/drm/ttm/ttm_bo.c
-+++ b/drivers/gpu/drm/ttm/ttm_bo.c
-@@ -1621,7 +1621,6 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
- struct ttm_buffer_object *bo;
- int ret = -EBUSY;
- int put_count;
-- uint32_t swap_placement = (TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM);
-
- spin_lock(&glob->lru_lock);
- list_for_each_entry(bo, &glob->swap_lru, swap) {
-@@ -1657,7 +1656,8 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
- if (unlikely(ret != 0))
- goto out;
-
-- if ((bo->mem.placement & swap_placement) != swap_placement) {
-+ if (bo->mem.mem_type != TTM_PL_SYSTEM ||
-+ bo->ttm->caching_state != tt_cached) {
- struct ttm_mem_reg evict_mem;
-
- evict_mem = bo->mem;
---
-2.12.2
-
-From bb5b96344ed378a1d5b8cf3bd149bb86919f3b9f Mon Sep 17 00:00:00 2001
-From: Dan Carpenter <dan.carpenter@oracle.com>
-Date: Wed, 8 Feb 2017 02:46:01 +0300
-Subject: [PATCH 031/251] drm/atomic: fix an error code in mode_fixup()
-Content-Length: 1297
-Lines: 34
-
-commit f9ad86e42d0303eeb8e0d41bb208153022ebd9d2 upstream.
-
-Having "ret" be a bool type works for everything except
-ret = funcs->atomic_check(). The other functions all return zero on
-error but ->atomic_check() returns negative error codes. We want to
-propagate the error code but instead we return 1.
-
-I found this bug with static analysis and I don't know if it affects
-run time.
-
-Fixes: 4cd4df8080a3 ("drm/atomic: Add ->atomic_check() to encoder helpers")
-Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
-Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-Link: http://patchwork.freedesktop.org/patch/msgid/20170207234601.GA23981@mwanda
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/gpu/drm/drm_atomic_helper.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
-index 1ac29d703c12..ea443fafb934 100644
---- a/drivers/gpu/drm/drm_atomic_helper.c
-+++ b/drivers/gpu/drm/drm_atomic_helper.c
-@@ -265,7 +265,7 @@ mode_fixup(struct drm_atomic_state *state)
- struct drm_connector *connector;
- struct drm_connector_state *conn_state;
- int i;
-- bool ret;
-+ int ret;
-
- for_each_crtc_in_state(state, crtc, crtc_state, i) {
- if (!crtc_state->mode_changed &&
---
-2.12.2
-
-From 7952b6490bbce45e078c8c0e669df7a0a8f8948a Mon Sep 17 00:00:00 2001
-From: Hans de Goede <hdegoede@redhat.com>
-Date: Fri, 2 Dec 2016 15:29:04 +0100
-Subject: [PATCH 033/251] drm/i915/dsi: Do not clear DPOUNIT_CLOCK_GATE_DISABLE
- from vlv_init_display_clock_gating
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-Content-Length: 2606
-Lines: 62
-
-commit bb98e72adaf9d19719aba35f802d4836f5d5176c upstream.
-
-On my Cherrytrail CUBE iwork8 Air tablet PIPE-A would get stuck on loading
-i915 at boot 1 out of every 3 boots, resulting in a non functional LCD.
-Once the i915 driver has successfully loaded, the panel can be disabled /
-enabled without hitting this issue.
-
-The getting stuck is caused by vlv_init_display_clock_gating() clearing
-the DPOUNIT_CLOCK_GATE_DISABLE bit in DSPCLK_GATE_D when called from
-chv_pipe_power_well_ops.enable() on driver load, while a pipe is enabled
-driving the DSI LCD by the BIOS.
-
-Clearing this bit while DSI is in use is a known issue and
-intel_dsi_pre_enable() / intel_dsi_post_disable() already set / clear it
-as appropriate.
-
-This commit modifies vlv_init_display_clock_gating() to leave the
-DPOUNIT_CLOCK_GATE_DISABLE bit alone fixing the pipe getting stuck.
-
-Changes in v2:
--Replace PIPE-A with "a pipe" or "the pipe" in the commit msg and
-comment
-
-Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97330
-Signed-off-by: Hans de Goede <hdegoede@redhat.com>
-Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
-Link: http://patchwork.freedesktop.org/patch/msgid/20161202142904.25613-1-hdegoede@redhat.com
-Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
-(cherry picked from commit 721d484563e1a51ada760089c490cbc47e909756)
-Signed-off-by: Jani Nikula <jani.nikula@intel.com>
-Signed-off-by: River Zhou <riverzhou2000@163.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/gpu/drm/i915/intel_pm.c | 13 ++++++++++++-
- 1 file changed, 12 insertions(+), 1 deletion(-)
-
-diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
-index 3f802163f7d4..e7c18519274a 100644
---- a/drivers/gpu/drm/i915/intel_pm.c
-+++ b/drivers/gpu/drm/i915/intel_pm.c
-@@ -6803,7 +6803,18 @@ static void ivybridge_init_clock_gating(struct drm_device *dev)
-
- static void vlv_init_display_clock_gating(struct drm_i915_private *dev_priv)
- {
-- I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE);
-+ u32 val;
-+
-+ /*
-+ * On driver load, a pipe may be active and driving a DSI display.
-+ * Preserve DPOUNIT_CLOCK_GATE_DISABLE to avoid the pipe getting stuck
-+ * (and never recovering) in this case. intel_dsi_post_disable() will
-+ * clear it when we turn off the display.
-+ */
-+ val = I915_READ(DSPCLK_GATE_D);
-+ val &= DPOUNIT_CLOCK_GATE_DISABLE;
-+ val |= VRHUNIT_CLOCK_GATE_DISABLE;
-+ I915_WRITE(DSPCLK_GATE_D, val);
-
- /*
- * Disable trickle feed and enable pnd deadline calculation
---
-2.12.2
-
From 804a935963a91acd1764ba914f825dd2a29c5871 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 15 Mar 2017 09:57:56 +0800
Subject: [PATCH 037/251] Linux 4.4.54
+Status: RO
Content-Length: 301
Lines: 18
--
2.12.2
-From 5e45d834f762312e3031a8b6bba3bc2b1f9481ec Mon Sep 17 00:00:00 2001
-From: Arnd Bergmann <arnd@arndb.de>
-Date: Mon, 16 Jan 2017 14:20:54 +0100
-Subject: [PATCH 049/251] cpmac: remove hopeless #warning
-Content-Length: 1108
-Lines: 32
+From 2e4aff2405af6a4573299dee361a44903c9bb717 Mon Sep 17 00:00:00 2001
+From: Ralf Baechle <ralf@linux-mips.org>
+Date: Tue, 20 Sep 2016 14:33:01 +0200
+Subject: [PATCH 051/251] MIPS: DEC: Avoid la pseudo-instruction in delay slots
+Content-Length: 2448
+Lines: 81
+
+commit 3021773c7c3e75e20b693931a19362681e744ea9 upstream.
-commit d43e6fb4ac4abfe4ef7c102833ed02330ad701e0 upstream.
+When expanding the la or dla pseudo-instruction in a delay slot the GNU
+assembler will complain should the pseudo-instruction expand to multiple
+actual instructions, since only the first of them will be in the delay
+slot leading to the pseudo-instruction being only partially executed if
+the branch is taken. Use of PTR_LA in the dec int-handler.S leads to
+such warnings:
-The #warning was present 10 years ago when the driver first got merged.
-As the platform is rather obsolete by now, it seems very unlikely that
-the warning will cause anyone to fix the code properly.
+ arch/mips/dec/int-handler.S: Assembler messages:
+ arch/mips/dec/int-handler.S:149: Warning: macro instruction expanded into multiple instructions in a branch delay slot
+ arch/mips/dec/int-handler.S:198: Warning: macro instruction expanded into multiple instructions in a branch delay slot
-kernelci.org reports the warning for every build in the meantime, so
-I think it's better to just turn it into a code comment to reduce
-noise.
+Avoid this by open coding the PTR_LA macros.
-Signed-off-by: Arnd Bergmann <arnd@arndb.de>
-Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
- drivers/net/ethernet/ti/cpmac.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c
-index d52ea3008946..7e8bce46e6b4 100644
---- a/drivers/net/ethernet/ti/cpmac.c
-+++ b/drivers/net/ethernet/ti/cpmac.c
-@@ -1237,7 +1237,7 @@ int cpmac_init(void)
- goto fail_alloc;
- }
-
--#warning FIXME: unhardcode gpio&reset bits
-+ /* FIXME: unhardcode gpio&reset bits */
- ar7_gpio_disable(26);
- ar7_gpio_disable(27);
- ar7_device_reset(AR7_RESET_BIT_CPMAC_LO);
---
-2.12.2
-
-From 5fad17434465a9e9ddddfb38a162e9e2e53e33a1 Mon Sep 17 00:00:00 2001
-From: Arnd Bergmann <arnd@arndb.de>
-Date: Thu, 25 Aug 2016 15:17:08 -0700
-Subject: [PATCH 050/251] mm: memcontrol: avoid unused function warning
-Content-Length: 2551
-Lines: 79
-
-commit 358c07fcc3b60ab08d77f1684de8bd81bcf49a1a upstream.
-
-A bugfix in v4.8-rc2 introduced a harmless warning when
-CONFIG_MEMCG_SWAP is disabled but CONFIG_MEMCG is enabled:
-
- mm/memcontrol.c:4085:27: error: 'mem_cgroup_id_get_online' defined but not used [-Werror=unused-function]
- static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
-
-This moves the function inside of the #ifdef block that hides the
-calling function, to avoid the warning.
-
-Fixes: 1f47b61fb407 ("mm: memcontrol: fix swap counter leak on swapout from offline cgroup")
-Link: http://lkml.kernel.org/r/20160824113733.2776701-1-arnd@arndb.de
-Signed-off-by: Arnd Bergmann <arnd@arndb.de>
-Acked-by: Michal Hocko <mhocko@suse.com>
-Acked-by: Vladimir Davydov <vdavydov@virtuozzo.com>
-Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- mm/memcontrol.c | 36 ++++++++++++++++++------------------
- 1 file changed, 18 insertions(+), 18 deletions(-)
-
-diff --git a/mm/memcontrol.c b/mm/memcontrol.c
-index 43eefe9d834c..e25b93a4267d 100644
---- a/mm/memcontrol.c
-+++ b/mm/memcontrol.c
-@@ -4150,24 +4150,6 @@ static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n)
- atomic_add(n, &memcg->id.ref);
- }
-
--static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
--{
-- while (!atomic_inc_not_zero(&memcg->id.ref)) {
-- /*
-- * The root cgroup cannot be destroyed, so it's refcount must
-- * always be >= 1.
-- */
-- if (WARN_ON_ONCE(memcg == root_mem_cgroup)) {
-- VM_BUG_ON(1);
-- break;
-- }
-- memcg = parent_mem_cgroup(memcg);
-- if (!memcg)
-- memcg = root_mem_cgroup;
-- }
-- return memcg;
--}
--
- static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n)
- {
- if (atomic_sub_and_test(n, &memcg->id.ref)) {
-@@ -5751,6 +5733,24 @@ static int __init mem_cgroup_init(void)
- subsys_initcall(mem_cgroup_init);
-
- #ifdef CONFIG_MEMCG_SWAP
-+static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
-+{
-+ while (!atomic_inc_not_zero(&memcg->id.ref)) {
-+ /*
-+ * The root cgroup cannot be destroyed, so it's refcount must
-+ * always be >= 1.
-+ */
-+ if (WARN_ON_ONCE(memcg == root_mem_cgroup)) {
-+ VM_BUG_ON(1);
-+ break;
-+ }
-+ memcg = parent_mem_cgroup(memcg);
-+ if (!memcg)
-+ memcg = root_mem_cgroup;
-+ }
-+ return memcg;
-+}
-+
- /**
- * mem_cgroup_swapout - transfer a memsw charge to swap
- * @page: page whose memsw charge to transfer
---
-2.12.2
-
-From 2e4aff2405af6a4573299dee361a44903c9bb717 Mon Sep 17 00:00:00 2001
-From: Ralf Baechle <ralf@linux-mips.org>
-Date: Tue, 20 Sep 2016 14:33:01 +0200
-Subject: [PATCH 051/251] MIPS: DEC: Avoid la pseudo-instruction in delay slots
-Content-Length: 2448
-Lines: 81
-
-commit 3021773c7c3e75e20b693931a19362681e744ea9 upstream.
-
-When expanding the la or dla pseudo-instruction in a delay slot the GNU
-assembler will complain should the pseudo-instruction expand to multiple
-actual instructions, since only the first of them will be in the delay
-slot leading to the pseudo-instruction being only partially executed if
-the branch is taken. Use of PTR_LA in the dec int-handler.S leads to
-such warnings:
-
- arch/mips/dec/int-handler.S: Assembler messages:
- arch/mips/dec/int-handler.S:149: Warning: macro instruction expanded into multiple instructions in a branch delay slot
- arch/mips/dec/int-handler.S:198: Warning: macro instruction expanded into multiple instructions in a branch delay slot
-
-Avoid this by open coding the PTR_LA macros.
-
-Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- arch/mips/dec/int-handler.S | 40 ++++++++++++++++++++++++++++++++++++++--
- 1 file changed, 38 insertions(+), 2 deletions(-)
+ arch/mips/dec/int-handler.S | 40 ++++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 38 insertions(+), 2 deletions(-)
diff --git a/arch/mips/dec/int-handler.S b/arch/mips/dec/int-handler.S
index 8c6f508e59de..554d1da97743 100644
--
2.12.2
-From 074893495b72c043a108797ffd6297db3e4af1dc Mon Sep 17 00:00:00 2001
-From: Rik van Riel <riel@redhat.com>
-Date: Wed, 28 Sep 2016 22:55:54 -0400
-Subject: [PATCH 053/251] tracing: Add #undef to fix compile error
-Content-Length: 1319
-Lines: 35
-
-commit bf7165cfa23695c51998231c4efa080fe1d3548d upstream.
-
-There are several trace include files that define TRACE_INCLUDE_FILE.
-
-Include several of them in the same .c file (as I currently have in
-some code I am working on), and the compile will blow up with a
-"warning: "TRACE_INCLUDE_FILE" redefined #define TRACE_INCLUDE_FILE syscalls"
-
-Every other include file in include/trace/events/ avoids that issue
-by having a #undef TRACE_INCLUDE_FILE before the #define; syscalls.h
-should have one, too.
-
-Link: http://lkml.kernel.org/r/20160928225554.13bd7ac6@annuminas.surriel.com
-
-Fixes: b8007ef74222 ("tracing: Separate raw syscall from syscall tracer")
-Signed-off-by: Rik van Riel <riel@redhat.com>
-Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- include/trace/events/syscalls.h | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h
-index 14e49c798135..b35533b94277 100644
---- a/include/trace/events/syscalls.h
-+++ b/include/trace/events/syscalls.h
-@@ -1,5 +1,6 @@
- #undef TRACE_SYSTEM
- #define TRACE_SYSTEM raw_syscalls
-+#undef TRACE_INCLUDE_FILE
- #define TRACE_INCLUDE_FILE syscalls
-
- #if !defined(_TRACE_EVENTS_SYSCALLS_H) || defined(TRACE_HEADER_MULTI_READ)
---
-2.12.2
-
From 2ca39d1300152e70977797c3e39c105adfcc0e0b Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Date: Tue, 14 Feb 2017 14:46:42 +0530
Subject: [PATCH 054/251] powerpc: Emulation support for load/store
instructions on LE
+Status: RO
Content-Length: 3197
Lines: 106
Date: Thu, 2 Mar 2017 15:23:42 +0100
Subject: [PATCH 068/251] KVM: s390: Fix guest migration for huge guests
resulting in panic
+Status: RO
Content-Length: 1904
Lines: 58
--
2.12.2
-From a084aeef5633db4f649b699785f79676cb71ba6c Mon Sep 17 00:00:00 2001
-From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
-Date: Tue, 7 Feb 2017 18:09:14 +0100
-Subject: [PATCH 069/251] s390/kdump: Use "LINUX" ELF note name instead of
- "CORE"
-Content-Length: 3784
-Lines: 108
-
-commit a4a81d8eebdc1d209d034f62a082a5131e4242b5 upstream.
-
-In binutils/libbfd (bfd/elf.c) it is enforced that all s390 specific ELF
-notes like e.g. NT_S390_PREFIX or NT_S390_CTRS have "LINUX" specified
-as note name. Otherwise the notes are ignored.
-
-For /proc/vmcore we currently use "CORE" for these notes.
-
-Up to now this has not been a real problem because the dump analysis tool
-"crash" does not check the note name. But it will break all programs that
-use libbfd for processing ELF notes.
-
-So fix this and use "LINUX" for all s390 specific notes to comply with
-libbfd.
-
-Reported-by: Philipp Rudo <prudo@linux.vnet.ibm.com>
-Reviewed-by: Philipp Rudo <prudo@linux.vnet.ibm.com>
-Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
-Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- arch/s390/kernel/crash_dump.c | 18 ++++++++++--------
- 1 file changed, 10 insertions(+), 8 deletions(-)
-
-diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
-index 171e09bb8ea2..f7c3a61040bd 100644
---- a/arch/s390/kernel/crash_dump.c
-+++ b/arch/s390/kernel/crash_dump.c
-@@ -23,6 +23,8 @@
- #define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y)))
- #define PTR_DIFF(x, y) ((unsigned long)(((char *) (x)) - ((unsigned long) (y))))
-
-+#define LINUX_NOTE_NAME "LINUX"
-+
- static struct memblock_region oldmem_region;
-
- static struct memblock_type oldmem_type = {
-@@ -312,7 +314,7 @@ static void *nt_fpregset(void *ptr, struct save_area *sa)
- static void *nt_s390_timer(void *ptr, struct save_area *sa)
- {
- return nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer),
-- KEXEC_CORE_NOTE_NAME);
-+ LINUX_NOTE_NAME);
- }
-
- /*
-@@ -321,7 +323,7 @@ static void *nt_s390_timer(void *ptr, struct save_area *sa)
- static void *nt_s390_tod_cmp(void *ptr, struct save_area *sa)
- {
- return nt_init(ptr, NT_S390_TODCMP, &sa->clk_cmp,
-- sizeof(sa->clk_cmp), KEXEC_CORE_NOTE_NAME);
-+ sizeof(sa->clk_cmp), LINUX_NOTE_NAME);
- }
-
- /*
-@@ -330,7 +332,7 @@ static void *nt_s390_tod_cmp(void *ptr, struct save_area *sa)
- static void *nt_s390_tod_preg(void *ptr, struct save_area *sa)
- {
- return nt_init(ptr, NT_S390_TODPREG, &sa->tod_reg,
-- sizeof(sa->tod_reg), KEXEC_CORE_NOTE_NAME);
-+ sizeof(sa->tod_reg), LINUX_NOTE_NAME);
- }
-
- /*
-@@ -339,7 +341,7 @@ static void *nt_s390_tod_preg(void *ptr, struct save_area *sa)
- static void *nt_s390_ctrs(void *ptr, struct save_area *sa)
- {
- return nt_init(ptr, NT_S390_CTRS, &sa->ctrl_regs,
-- sizeof(sa->ctrl_regs), KEXEC_CORE_NOTE_NAME);
-+ sizeof(sa->ctrl_regs), LINUX_NOTE_NAME);
- }
-
- /*
-@@ -348,7 +350,7 @@ static void *nt_s390_ctrs(void *ptr, struct save_area *sa)
- static void *nt_s390_prefix(void *ptr, struct save_area *sa)
- {
- return nt_init(ptr, NT_S390_PREFIX, &sa->pref_reg,
-- sizeof(sa->pref_reg), KEXEC_CORE_NOTE_NAME);
-+ sizeof(sa->pref_reg), LINUX_NOTE_NAME);
- }
-
- /*
-@@ -357,7 +359,7 @@ static void *nt_s390_prefix(void *ptr, struct save_area *sa)
- static void *nt_s390_vx_high(void *ptr, __vector128 *vx_regs)
- {
- return nt_init(ptr, NT_S390_VXRS_HIGH, &vx_regs[16],
-- 16 * sizeof(__vector128), KEXEC_CORE_NOTE_NAME);
-+ 16 * sizeof(__vector128), LINUX_NOTE_NAME);
- }
-
- /*
-@@ -370,12 +372,12 @@ static void *nt_s390_vx_low(void *ptr, __vector128 *vx_regs)
- int i;
-
- note = (Elf64_Nhdr *)ptr;
-- note->n_namesz = strlen(KEXEC_CORE_NOTE_NAME) + 1;
-+ note->n_namesz = strlen(LINUX_NOTE_NAME) + 1;
- note->n_descsz = 16 * 8;
- note->n_type = NT_S390_VXRS_LOW;
- len = sizeof(Elf64_Nhdr);
-
-- memcpy(ptr + len, KEXEC_CORE_NOTE_NAME, note->n_namesz);
-+ memcpy(ptr + len, LINUX_NOTE_NAME, note->n_namesz);
- len = roundup(len + note->n_namesz, 4);
-
- ptr += len;
---
-2.12.2
-
From 28ec98bc2e4a175b60f45d505e715a33b93dd077 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 18 Mar 2017 19:10:23 +0800
--
2.12.2
-From 0c0be310ba29e4a053e8aac934aebe590c5da909 Mon Sep 17 00:00:00 2001
-From: Florian Westphal <fw@strlen.de>
-Date: Thu, 18 Feb 2016 15:03:24 +0100
-Subject: [PATCH 074/251] netlink: remove mmapped netlink support
-Content-Length: 42335
-Lines: 1432
-
-commit d1b4c689d4130bcfd3532680b64db562300716b6 upstream.
-
-mmapped netlink has a number of unresolved issues:
-
-- TX zerocopy support had to be disabled more than a year ago via
- commit 4682a0358639b29cf ("netlink: Always copy on mmap TX.")
- because the content of the mmapped area can change after netlink
- attribute validation but before message processing.
-
-- RX support was implemented mainly to speed up nfqueue dumping packet
- payload to userspace. However, since commit ae08ce0021087a5d812d2
- ("netfilter: nfnetlink_queue: zero copy support") we avoid one copy
- with the socket-based interface too (via the skb_zerocopy helper).
-
-The other problem is that skbs attached to mmaped netlink socket
-behave different from normal skbs:
-
-- they don't have a shinfo area, so all functions that use skb_shinfo()
-(e.g. skb_clone) cannot be used.
-
-- reserving headroom prevents userspace from seeing the content as
-it expects message to start at skb->head.
-See for instance
-commit aa3a022094fa ("netlink: not trim skb for mmaped socket when dump").
-
-- skbs handed e.g. to netlink_ack must have non-NULL skb->sk, else we
-crash because it needs the sk to check if a tx ring is attached.
-
-Also not obvious, leads to non-intuitive bug fixes such as 7c7bdf359
-("netfilter: nfnetlink: use original skbuff when acking batches").
-
-mmaped netlink also didn't play nicely with the skb_zerocopy helper
-used by nfqueue and openvswitch. Daniel Borkmann fixed this via
-commit 6bb0fef489f6 ("netlink, mmap: fix edge-case leakages in nf queue
-zero-copy")' but at the cost of also needing to provide remaining
-length to the allocation function.
-
-nfqueue also has problems when used with mmaped rx netlink:
-- mmaped netlink doesn't allow use of nfqueue batch verdict messages.
- Problem is that in the mmap case, the allocation time also determines
- the ordering in which the frame will be seen by userspace (A
- allocating before B means that A is located in earlier ring slot,
- but this also means that B might get a lower sequence number then A
- since seqno is decided later. To fix this we would need to extend the
- spinlocked region to also cover the allocation and message setup which
- isn't desirable.
-- nfqueue can now be configured to queue large (GSO) skbs to userspace.
- Queing GSO packets is faster than having to force a software segmentation
- in the kernel, so this is a desirable option. However, with a mmap based
- ring one has to use 64kb per ring slot element, else mmap has to fall back
- to the socket path (NL_MMAP_STATUS_COPY) for all large packets.
-
-To use the mmap interface, userspace not only has to probe for mmap netlink
-support, it also has to implement a recv/socket receive path in order to
-handle messages that exceed the size of an rx ring element.
-
-Cc: Daniel Borkmann <daniel@iogearbox.net>
-Cc: Ken-ichirou MATSUZAWA <chamaken@gmail.com>
-Cc: Pablo Neira Ayuso <pablo@netfilter.org>
-Cc: Patrick McHardy <kaber@trash.net>
-Cc: Thomas Graf <tgraf@suug.ch>
-Signed-off-by: Florian Westphal <fw@strlen.de>
-Signed-off-by: David S. Miller <davem@davemloft.net>
-Cc: Shi Yuejie <shiyuejie@outlook.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- Documentation/networking/netlink_mmap.txt | 332 -------------
- include/uapi/linux/netlink.h | 4 +
- include/uapi/linux/netlink_diag.h | 2 +
- net/netlink/Kconfig | 9 -
- net/netlink/af_netlink.c | 751 +-----------------------------
- net/netlink/af_netlink.h | 15 -
- net/netlink/diag.c | 39 --
- 7 files changed, 14 insertions(+), 1138 deletions(-)
- delete mode 100644 Documentation/networking/netlink_mmap.txt
-
-diff --git a/Documentation/networking/netlink_mmap.txt b/Documentation/networking/netlink_mmap.txt
-deleted file mode 100644
-index 54f10478e8e3..000000000000
---- a/Documentation/networking/netlink_mmap.txt
-+++ /dev/null
-@@ -1,332 +0,0 @@
--This file documents how to use memory mapped I/O with netlink.
--
--Author: Patrick McHardy <kaber@trash.net>
--
--Overview
----------
--
--Memory mapped netlink I/O can be used to increase throughput and decrease
--overhead of unicast receive and transmit operations. Some netlink subsystems
--require high throughput, these are mainly the netfilter subsystems
--nfnetlink_queue and nfnetlink_log, but it can also help speed up large
--dump operations of f.i. the routing database.
--
--Memory mapped netlink I/O used two circular ring buffers for RX and TX which
--are mapped into the processes address space.
--
--The RX ring is used by the kernel to directly construct netlink messages into
--user-space memory without copying them as done with regular socket I/O,
--additionally as long as the ring contains messages no recvmsg() or poll()
--syscalls have to be issued by user-space to get more message.
--
--The TX ring is used to process messages directly from user-space memory, the
--kernel processes all messages contained in the ring using a single sendmsg()
--call.
--
--Usage overview
----------------
--
--In order to use memory mapped netlink I/O, user-space needs three main changes:
--
--- ring setup
--- conversion of the RX path to get messages from the ring instead of recvmsg()
--- conversion of the TX path to construct messages into the ring
--
--Ring setup is done using setsockopt() to provide the ring parameters to the
--kernel, then a call to mmap() to map the ring into the processes address space:
--
--- setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, ¶ms, sizeof(params));
--- setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, ¶ms, sizeof(params));
--- ring = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0)
--
--Usage of either ring is optional, but even if only the RX ring is used the
--mapping still needs to be writable in order to update the frame status after
--processing.
--
--Conversion of the reception path involves calling poll() on the file
--descriptor, once the socket is readable the frames from the ring are
--processed in order until no more messages are available, as indicated by
--a status word in the frame header.
--
--On kernel side, in order to make use of memory mapped I/O on receive, the
--originating netlink subsystem needs to support memory mapped I/O, otherwise
--it will use an allocated socket buffer as usual and the contents will be
-- copied to the ring on transmission, nullifying most of the performance gains.
--Dumps of kernel databases automatically support memory mapped I/O.
--
--Conversion of the transmit path involves changing message construction to
--use memory from the TX ring instead of (usually) a buffer declared on the
--stack and setting up the frame header appropriately. Optionally poll() can
--be used to wait for free frames in the TX ring.
--
--Structured and definitions for using memory mapped I/O are contained in
--<linux/netlink.h>.
--
--RX and TX rings
------------------
--
--Each ring contains a number of continuous memory blocks, containing frames of
--fixed size dependent on the parameters used for ring setup.
--
--Ring: [ block 0 ]
-- [ frame 0 ]
-- [ frame 1 ]
-- [ block 1 ]
-- [ frame 2 ]
-- [ frame 3 ]
-- ...
-- [ block n ]
-- [ frame 2 * n ]
-- [ frame 2 * n + 1 ]
--
--The blocks are only visible to the kernel, from the point of view of user-space
--the ring just contains the frames in a continuous memory zone.
--
--The ring parameters used for setting up the ring are defined as follows:
--
--struct nl_mmap_req {
-- unsigned int nm_block_size;
-- unsigned int nm_block_nr;
-- unsigned int nm_frame_size;
-- unsigned int nm_frame_nr;
--};
--
--Frames are grouped into blocks, where each block is a continuous region of memory
--and holds nm_block_size / nm_frame_size frames. The total number of frames in
--the ring is nm_frame_nr. The following invariants hold:
--
--- frames_per_block = nm_block_size / nm_frame_size
--
--- nm_frame_nr = frames_per_block * nm_block_nr
--
--Some parameters are constrained, specifically:
--
--- nm_block_size must be a multiple of the architectures memory page size.
-- The getpagesize() function can be used to get the page size.
--
--- nm_frame_size must be equal or larger to NL_MMAP_HDRLEN, IOW a frame must be
-- able to hold at least the frame header
--
--- nm_frame_size must be smaller or equal to nm_block_size
--
--- nm_frame_size must be a multiple of NL_MMAP_MSG_ALIGNMENT
--
--- nm_frame_nr must equal the actual number of frames as specified above.
--
--When the kernel can't allocate physically continuous memory for a ring block,
--it will fall back to use physically discontinuous memory. This might affect
--performance negatively, in order to avoid this the nm_frame_size parameter
--should be chosen to be as small as possible for the required frame size and
--the number of blocks should be increased instead.
--
--Ring frames
--------------
--
--Each frames contain a frame header, consisting of a synchronization word and some
--meta-data, and the message itself.
--
--Frame: [ header message ]
--
--The frame header is defined as follows:
--
--struct nl_mmap_hdr {
-- unsigned int nm_status;
-- unsigned int nm_len;
-- __u32 nm_group;
-- /* credentials */
-- __u32 nm_pid;
-- __u32 nm_uid;
-- __u32 nm_gid;
--};
--
--- nm_status is used for synchronizing processing between the kernel and user-
-- space and specifies ownership of the frame as well as the operation to perform
--
--- nm_len contains the length of the message contained in the data area
--
--- nm_group specified the destination multicast group of message
--
--- nm_pid, nm_uid and nm_gid contain the netlink pid, UID and GID of the sending
-- process. These values correspond to the data available using SOCK_PASSCRED in
-- the SCM_CREDENTIALS cmsg.
--
--The possible values in the status word are:
--
--- NL_MMAP_STATUS_UNUSED:
-- RX ring: frame belongs to the kernel and contains no message
-- for user-space. Approriate action is to invoke poll()
-- to wait for new messages.
--
-- TX ring: frame belongs to user-space and can be used for
-- message construction.
--
--- NL_MMAP_STATUS_RESERVED:
-- RX ring only: frame is currently used by the kernel for message
-- construction and contains no valid message yet.
-- Appropriate action is to invoke poll() to wait for
-- new messages.
--
--- NL_MMAP_STATUS_VALID:
-- RX ring: frame contains a valid message. Approriate action is
-- to process the message and release the frame back to
-- the kernel by setting the status to
-- NL_MMAP_STATUS_UNUSED or queue the frame by setting the
-- status to NL_MMAP_STATUS_SKIP.
--
-- TX ring: the frame contains a valid message from user-space to
-- be processed by the kernel. After completing processing
-- the kernel will release the frame back to user-space by
-- setting the status to NL_MMAP_STATUS_UNUSED.
--
--- NL_MMAP_STATUS_COPY:
-- RX ring only: a message is ready to be processed but could not be
-- stored in the ring, either because it exceeded the
-- frame size or because the originating subsystem does
-- not support memory mapped I/O. Appropriate action is
-- to invoke recvmsg() to receive the message and release
-- the frame back to the kernel by setting the status to
-- NL_MMAP_STATUS_UNUSED.
--
--- NL_MMAP_STATUS_SKIP:
-- RX ring only: user-space queued the message for later processing, but
-- processed some messages following it in the ring. The
-- kernel should skip this frame when looking for unused
-- frames.
--
--The data area of a frame begins at a offset of NL_MMAP_HDRLEN relative to the
--frame header.
--
--TX limitations
----------------
--
--As of Jan 2015 the message is always copied from the ring frame to an
--allocated buffer due to unresolved security concerns.
--See commit 4682a0358639b29cf ("netlink: Always copy on mmap TX.").
--
--Example
---------
--
--Ring setup:
--
-- unsigned int block_size = 16 * getpagesize();
-- struct nl_mmap_req req = {
-- .nm_block_size = block_size,
-- .nm_block_nr = 64,
-- .nm_frame_size = 16384,
-- .nm_frame_nr = 64 * block_size / 16384,
-- };
-- unsigned int ring_size;
-- void *rx_ring, *tx_ring;
--
-- /* Configure ring parameters */
-- if (setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, &req, sizeof(req)) < 0)
-- exit(1);
-- if (setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, &req, sizeof(req)) < 0)
-- exit(1)
--
-- /* Calculate size of each individual ring */
-- ring_size = req.nm_block_nr * req.nm_block_size;
--
-- /* Map RX/TX rings. The TX ring is located after the RX ring */
-- rx_ring = mmap(NULL, 2 * ring_size, PROT_READ | PROT_WRITE,
-- MAP_SHARED, fd, 0);
-- if ((long)rx_ring == -1L)
-- exit(1);
-- tx_ring = rx_ring + ring_size:
--
--Message reception:
--
--This example assumes some ring parameters of the ring setup are available.
--
-- unsigned int frame_offset = 0;
-- struct nl_mmap_hdr *hdr;
-- struct nlmsghdr *nlh;
-- unsigned char buf[16384];
-- ssize_t len;
--
-- while (1) {
-- struct pollfd pfds[1];
--
-- pfds[0].fd = fd;
-- pfds[0].events = POLLIN | POLLERR;
-- pfds[0].revents = 0;
--
-- if (poll(pfds, 1, -1) < 0 && errno != -EINTR)
-- exit(1);
--
-- /* Check for errors. Error handling omitted */
-- if (pfds[0].revents & POLLERR)
-- <handle error>
--
-- /* If no new messages, poll again */
-- if (!(pfds[0].revents & POLLIN))
-- continue;
--
-- /* Process all frames */
-- while (1) {
-- /* Get next frame header */
-- hdr = rx_ring + frame_offset;
--
-- if (hdr->nm_status == NL_MMAP_STATUS_VALID) {
-- /* Regular memory mapped frame */
-- nlh = (void *)hdr + NL_MMAP_HDRLEN;
-- len = hdr->nm_len;
--
-- /* Release empty message immediately. May happen
-- * on error during message construction.
-- */
-- if (len == 0)
-- goto release;
-- } else if (hdr->nm_status == NL_MMAP_STATUS_COPY) {
-- /* Frame queued to socket receive queue */
-- len = recv(fd, buf, sizeof(buf), MSG_DONTWAIT);
-- if (len <= 0)
-- break;
-- nlh = buf;
-- } else
-- /* No more messages to process, continue polling */
-- break;
--
-- process_msg(nlh);
--release:
-- /* Release frame back to the kernel */
-- hdr->nm_status = NL_MMAP_STATUS_UNUSED;
--
-- /* Advance frame offset to next frame */
-- frame_offset = (frame_offset + frame_size) % ring_size;
-- }
-- }
--
--Message transmission:
--
--This example assumes some ring parameters of the ring setup are available.
--A single message is constructed and transmitted, to send multiple messages
--at once they would be constructed in consecutive frames before a final call
--to sendto().
--
-- unsigned int frame_offset = 0;
-- struct nl_mmap_hdr *hdr;
-- struct nlmsghdr *nlh;
-- struct sockaddr_nl addr = {
-- .nl_family = AF_NETLINK,
-- };
--
-- hdr = tx_ring + frame_offset;
-- if (hdr->nm_status != NL_MMAP_STATUS_UNUSED)
-- /* No frame available. Use poll() to avoid. */
-- exit(1);
--
-- nlh = (void *)hdr + NL_MMAP_HDRLEN;
--
-- /* Build message */
-- build_message(nlh);
--
-- /* Fill frame header: length and status need to be set */
-- hdr->nm_len = nlh->nlmsg_len;
-- hdr->nm_status = NL_MMAP_STATUS_VALID;
--
-- if (sendto(fd, NULL, 0, 0, &addr, sizeof(addr)) < 0)
-- exit(1);
--
-- /* Advance frame offset to next frame */
-- frame_offset = (frame_offset + frame_size) % ring_size;
-diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
-index f095155d8749..0dba4e4ed2be 100644
---- a/include/uapi/linux/netlink.h
-+++ b/include/uapi/linux/netlink.h
-@@ -107,8 +107,10 @@ struct nlmsgerr {
- #define NETLINK_PKTINFO 3
- #define NETLINK_BROADCAST_ERROR 4
- #define NETLINK_NO_ENOBUFS 5
-+#ifndef __KERNEL__
- #define NETLINK_RX_RING 6
- #define NETLINK_TX_RING 7
-+#endif
- #define NETLINK_LISTEN_ALL_NSID 8
- #define NETLINK_LIST_MEMBERSHIPS 9
- #define NETLINK_CAP_ACK 10
-@@ -134,6 +136,7 @@ struct nl_mmap_hdr {
- __u32 nm_gid;
- };
-
-+#ifndef __KERNEL__
- enum nl_mmap_status {
- NL_MMAP_STATUS_UNUSED,
- NL_MMAP_STATUS_RESERVED,
-@@ -145,6 +148,7 @@ enum nl_mmap_status {
- #define NL_MMAP_MSG_ALIGNMENT NLMSG_ALIGNTO
- #define NL_MMAP_MSG_ALIGN(sz) __ALIGN_KERNEL(sz, NL_MMAP_MSG_ALIGNMENT)
- #define NL_MMAP_HDRLEN NL_MMAP_MSG_ALIGN(sizeof(struct nl_mmap_hdr))
-+#endif
-
- #define NET_MAJOR 36 /* Major 36 is reserved for networking */
-
-diff --git a/include/uapi/linux/netlink_diag.h b/include/uapi/linux/netlink_diag.h
-index f2159d30d1f5..d79399394b46 100644
---- a/include/uapi/linux/netlink_diag.h
-+++ b/include/uapi/linux/netlink_diag.h
-@@ -48,6 +48,8 @@ enum {
-
- #define NDIAG_SHOW_MEMINFO 0x00000001 /* show memory info of a socket */
- #define NDIAG_SHOW_GROUPS 0x00000002 /* show groups of a netlink socket */
-+#ifndef __KERNEL__
- #define NDIAG_SHOW_RING_CFG 0x00000004 /* show ring configuration */
-+#endif
-
- #endif
-diff --git a/net/netlink/Kconfig b/net/netlink/Kconfig
-index 2c5e95e9bfbd..5d6e8c05b3d4 100644
---- a/net/netlink/Kconfig
-+++ b/net/netlink/Kconfig
-@@ -2,15 +2,6 @@
- # Netlink Sockets
- #
-
--config NETLINK_MMAP
-- bool "NETLINK: mmaped IO"
-- ---help---
-- This option enables support for memory mapped netlink IO. This
-- reduces overhead by avoiding copying data between kernel- and
-- userspace.
--
-- If unsure, say N.
--
- config NETLINK_DIAG
- tristate "NETLINK: socket monitoring interface"
- default n
-diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
-index 360700a2f46c..8e33019d8e7b 100644
---- a/net/netlink/af_netlink.c
-+++ b/net/netlink/af_netlink.c
-@@ -225,7 +225,7 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb,
-
- dev_hold(dev);
-
-- if (netlink_skb_is_mmaped(skb) || is_vmalloc_addr(skb->head))
-+ if (is_vmalloc_addr(skb->head))
- nskb = netlink_to_full_skb(skb, GFP_ATOMIC);
- else
- nskb = skb_clone(skb, GFP_ATOMIC);
-@@ -300,610 +300,8 @@ static void netlink_rcv_wake(struct sock *sk)
- wake_up_interruptible(&nlk->wait);
- }
-
--#ifdef CONFIG_NETLINK_MMAP
--static bool netlink_rx_is_mmaped(struct sock *sk)
--{
-- return nlk_sk(sk)->rx_ring.pg_vec != NULL;
--}
--
--static bool netlink_tx_is_mmaped(struct sock *sk)
--{
-- return nlk_sk(sk)->tx_ring.pg_vec != NULL;
--}
--
--static __pure struct page *pgvec_to_page(const void *addr)
--{
-- if (is_vmalloc_addr(addr))
-- return vmalloc_to_page(addr);
-- else
-- return virt_to_page(addr);
--}
--
--static void free_pg_vec(void **pg_vec, unsigned int order, unsigned int len)
--{
-- unsigned int i;
--
-- for (i = 0; i < len; i++) {
-- if (pg_vec[i] != NULL) {
-- if (is_vmalloc_addr(pg_vec[i]))
-- vfree(pg_vec[i]);
-- else
-- free_pages((unsigned long)pg_vec[i], order);
-- }
-- }
-- kfree(pg_vec);
--}
--
--static void *alloc_one_pg_vec_page(unsigned long order)
--{
-- void *buffer;
-- gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO |
-- __GFP_NOWARN | __GFP_NORETRY;
--
-- buffer = (void *)__get_free_pages(gfp_flags, order);
-- if (buffer != NULL)
-- return buffer;
--
-- buffer = vzalloc((1 << order) * PAGE_SIZE);
-- if (buffer != NULL)
-- return buffer;
--
-- gfp_flags &= ~__GFP_NORETRY;
-- return (void *)__get_free_pages(gfp_flags, order);
--}
--
--static void **alloc_pg_vec(struct netlink_sock *nlk,
-- struct nl_mmap_req *req, unsigned int order)
--{
-- unsigned int block_nr = req->nm_block_nr;
-- unsigned int i;
-- void **pg_vec;
--
-- pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL);
-- if (pg_vec == NULL)
-- return NULL;
--
-- for (i = 0; i < block_nr; i++) {
-- pg_vec[i] = alloc_one_pg_vec_page(order);
-- if (pg_vec[i] == NULL)
-- goto err1;
-- }
--
-- return pg_vec;
--err1:
-- free_pg_vec(pg_vec, order, block_nr);
-- return NULL;
--}
--
--
--static void
--__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec,
-- unsigned int order)
--{
-- struct netlink_sock *nlk = nlk_sk(sk);
-- struct sk_buff_head *queue;
-- struct netlink_ring *ring;
--
-- queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
-- ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
--
-- spin_lock_bh(&queue->lock);
--
-- ring->frame_max = req->nm_frame_nr - 1;
-- ring->head = 0;
-- ring->frame_size = req->nm_frame_size;
-- ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE;
--
-- swap(ring->pg_vec_len, req->nm_block_nr);
-- swap(ring->pg_vec_order, order);
-- swap(ring->pg_vec, pg_vec);
--
-- __skb_queue_purge(queue);
-- spin_unlock_bh(&queue->lock);
--
-- WARN_ON(atomic_read(&nlk->mapped));
--
-- if (pg_vec)
-- free_pg_vec(pg_vec, order, req->nm_block_nr);
--}
--
--static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
-- bool tx_ring)
--{
-- struct netlink_sock *nlk = nlk_sk(sk);
-- struct netlink_ring *ring;
-- void **pg_vec = NULL;
-- unsigned int order = 0;
--
-- ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
--
-- if (atomic_read(&nlk->mapped))
-- return -EBUSY;
-- if (atomic_read(&ring->pending))
-- return -EBUSY;
--
-- if (req->nm_block_nr) {
-- if (ring->pg_vec != NULL)
-- return -EBUSY;
--
-- if ((int)req->nm_block_size <= 0)
-- return -EINVAL;
-- if (!PAGE_ALIGNED(req->nm_block_size))
-- return -EINVAL;
-- if (req->nm_frame_size < NL_MMAP_HDRLEN)
-- return -EINVAL;
-- if (!IS_ALIGNED(req->nm_frame_size, NL_MMAP_MSG_ALIGNMENT))
-- return -EINVAL;
--
-- ring->frames_per_block = req->nm_block_size /
-- req->nm_frame_size;
-- if (ring->frames_per_block == 0)
-- return -EINVAL;
-- if (ring->frames_per_block * req->nm_block_nr !=
-- req->nm_frame_nr)
-- return -EINVAL;
--
-- order = get_order(req->nm_block_size);
-- pg_vec = alloc_pg_vec(nlk, req, order);
-- if (pg_vec == NULL)
-- return -ENOMEM;
-- } else {
-- if (req->nm_frame_nr)
-- return -EINVAL;
-- }
--
-- mutex_lock(&nlk->pg_vec_lock);
-- if (atomic_read(&nlk->mapped) == 0) {
-- __netlink_set_ring(sk, req, tx_ring, pg_vec, order);
-- mutex_unlock(&nlk->pg_vec_lock);
-- return 0;
-- }
--
-- mutex_unlock(&nlk->pg_vec_lock);
--
-- if (pg_vec)
-- free_pg_vec(pg_vec, order, req->nm_block_nr);
--
-- return -EBUSY;
--}
--
--static void netlink_mm_open(struct vm_area_struct *vma)
--{
-- struct file *file = vma->vm_file;
-- struct socket *sock = file->private_data;
-- struct sock *sk = sock->sk;
--
-- if (sk)
-- atomic_inc(&nlk_sk(sk)->mapped);
--}
--
--static void netlink_mm_close(struct vm_area_struct *vma)
--{
-- struct file *file = vma->vm_file;
-- struct socket *sock = file->private_data;
-- struct sock *sk = sock->sk;
--
-- if (sk)
-- atomic_dec(&nlk_sk(sk)->mapped);
--}
--
--static const struct vm_operations_struct netlink_mmap_ops = {
-- .open = netlink_mm_open,
-- .close = netlink_mm_close,
--};
--
--static int netlink_mmap(struct file *file, struct socket *sock,
-- struct vm_area_struct *vma)
--{
-- struct sock *sk = sock->sk;
-- struct netlink_sock *nlk = nlk_sk(sk);
-- struct netlink_ring *ring;
-- unsigned long start, size, expected;
-- unsigned int i;
-- int err = -EINVAL;
--
-- if (vma->vm_pgoff)
-- return -EINVAL;
--
-- mutex_lock(&nlk->pg_vec_lock);
--
-- expected = 0;
-- for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
-- if (ring->pg_vec == NULL)
-- continue;
-- expected += ring->pg_vec_len * ring->pg_vec_pages * PAGE_SIZE;
-- }
--
-- if (expected == 0)
-- goto out;
--
-- size = vma->vm_end - vma->vm_start;
-- if (size != expected)
-- goto out;
--
-- start = vma->vm_start;
-- for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
-- if (ring->pg_vec == NULL)
-- continue;
--
-- for (i = 0; i < ring->pg_vec_len; i++) {
-- struct page *page;
-- void *kaddr = ring->pg_vec[i];
-- unsigned int pg_num;
--
-- for (pg_num = 0; pg_num < ring->pg_vec_pages; pg_num++) {
-- page = pgvec_to_page(kaddr);
-- err = vm_insert_page(vma, start, page);
-- if (err < 0)
-- goto out;
-- start += PAGE_SIZE;
-- kaddr += PAGE_SIZE;
-- }
-- }
-- }
--
-- atomic_inc(&nlk->mapped);
-- vma->vm_ops = &netlink_mmap_ops;
-- err = 0;
--out:
-- mutex_unlock(&nlk->pg_vec_lock);
-- return err;
--}
--
--static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr, unsigned int nm_len)
--{
--#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
-- struct page *p_start, *p_end;
--
-- /* First page is flushed through netlink_{get,set}_status */
-- p_start = pgvec_to_page(hdr + PAGE_SIZE);
-- p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + nm_len - 1);
-- while (p_start <= p_end) {
-- flush_dcache_page(p_start);
-- p_start++;
-- }
--#endif
--}
--
--static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr)
--{
-- smp_rmb();
-- flush_dcache_page(pgvec_to_page(hdr));
-- return hdr->nm_status;
--}
--
--static void netlink_set_status(struct nl_mmap_hdr *hdr,
-- enum nl_mmap_status status)
--{
-- smp_mb();
-- hdr->nm_status = status;
-- flush_dcache_page(pgvec_to_page(hdr));
--}
--
--static struct nl_mmap_hdr *
--__netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos)
--{
-- unsigned int pg_vec_pos, frame_off;
--
-- pg_vec_pos = pos / ring->frames_per_block;
-- frame_off = pos % ring->frames_per_block;
--
-- return ring->pg_vec[pg_vec_pos] + (frame_off * ring->frame_size);
--}
--
--static struct nl_mmap_hdr *
--netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos,
-- enum nl_mmap_status status)
--{
-- struct nl_mmap_hdr *hdr;
--
-- hdr = __netlink_lookup_frame(ring, pos);
-- if (netlink_get_status(hdr) != status)
-- return NULL;
--
-- return hdr;
--}
--
--static struct nl_mmap_hdr *
--netlink_current_frame(const struct netlink_ring *ring,
-- enum nl_mmap_status status)
--{
-- return netlink_lookup_frame(ring, ring->head, status);
--}
--
--static void netlink_increment_head(struct netlink_ring *ring)
--{
-- ring->head = ring->head != ring->frame_max ? ring->head + 1 : 0;
--}
--
--static void netlink_forward_ring(struct netlink_ring *ring)
--{
-- unsigned int head = ring->head;
-- const struct nl_mmap_hdr *hdr;
--
-- do {
-- hdr = __netlink_lookup_frame(ring, ring->head);
-- if (hdr->nm_status == NL_MMAP_STATUS_UNUSED)
-- break;
-- if (hdr->nm_status != NL_MMAP_STATUS_SKIP)
-- break;
-- netlink_increment_head(ring);
-- } while (ring->head != head);
--}
--
--static bool netlink_has_valid_frame(struct netlink_ring *ring)
--{
-- unsigned int head = ring->head, pos = head;
-- const struct nl_mmap_hdr *hdr;
--
-- do {
-- hdr = __netlink_lookup_frame(ring, pos);
-- if (hdr->nm_status == NL_MMAP_STATUS_VALID)
-- return true;
-- pos = pos != 0 ? pos - 1 : ring->frame_max;
-- } while (pos != head);
--
-- return false;
--}
--
--static bool netlink_dump_space(struct netlink_sock *nlk)
--{
-- struct netlink_ring *ring = &nlk->rx_ring;
-- struct nl_mmap_hdr *hdr;
-- unsigned int n;
--
-- hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
-- if (hdr == NULL)
-- return false;
--
-- n = ring->head + ring->frame_max / 2;
-- if (n > ring->frame_max)
-- n -= ring->frame_max;
--
-- hdr = __netlink_lookup_frame(ring, n);
--
-- return hdr->nm_status == NL_MMAP_STATUS_UNUSED;
--}
--
--static unsigned int netlink_poll(struct file *file, struct socket *sock,
-- poll_table *wait)
--{
-- struct sock *sk = sock->sk;
-- struct netlink_sock *nlk = nlk_sk(sk);
-- unsigned int mask;
-- int err;
--
-- if (nlk->rx_ring.pg_vec != NULL) {
-- /* Memory mapped sockets don't call recvmsg(), so flow control
-- * for dumps is performed here. A dump is allowed to continue
-- * if at least half the ring is unused.
-- */
-- while (nlk->cb_running && netlink_dump_space(nlk)) {
-- err = netlink_dump(sk);
-- if (err < 0) {
-- sk->sk_err = -err;
-- sk->sk_error_report(sk);
-- break;
-- }
-- }
-- netlink_rcv_wake(sk);
-- }
--
-- mask = datagram_poll(file, sock, wait);
--
-- /* We could already have received frames in the normal receive
-- * queue, that will show up as NL_MMAP_STATUS_COPY in the ring,
-- * so if mask contains pollin/etc already, there's no point
-- * walking the ring.
-- */
-- if ((mask & (POLLIN | POLLRDNORM)) != (POLLIN | POLLRDNORM)) {
-- spin_lock_bh(&sk->sk_receive_queue.lock);
-- if (nlk->rx_ring.pg_vec) {
-- if (netlink_has_valid_frame(&nlk->rx_ring))
-- mask |= POLLIN | POLLRDNORM;
-- }
-- spin_unlock_bh(&sk->sk_receive_queue.lock);
-- }
--
-- spin_lock_bh(&sk->sk_write_queue.lock);
-- if (nlk->tx_ring.pg_vec) {
-- if (netlink_current_frame(&nlk->tx_ring, NL_MMAP_STATUS_UNUSED))
-- mask |= POLLOUT | POLLWRNORM;
-- }
-- spin_unlock_bh(&sk->sk_write_queue.lock);
--
-- return mask;
--}
--
--static struct nl_mmap_hdr *netlink_mmap_hdr(struct sk_buff *skb)
--{
-- return (struct nl_mmap_hdr *)(skb->head - NL_MMAP_HDRLEN);
--}
--
--static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk,
-- struct netlink_ring *ring,
-- struct nl_mmap_hdr *hdr)
--{
-- unsigned int size;
-- void *data;
--
-- size = ring->frame_size - NL_MMAP_HDRLEN;
-- data = (void *)hdr + NL_MMAP_HDRLEN;
--
-- skb->head = data;
-- skb->data = data;
-- skb_reset_tail_pointer(skb);
-- skb->end = skb->tail + size;
-- skb->len = 0;
--
-- skb->destructor = netlink_skb_destructor;
-- NETLINK_CB(skb).flags |= NETLINK_SKB_MMAPED;
-- NETLINK_CB(skb).sk = sk;
--}
--
--static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg,
-- u32 dst_portid, u32 dst_group,
-- struct scm_cookie *scm)
--{
-- struct netlink_sock *nlk = nlk_sk(sk);
-- struct netlink_ring *ring;
-- struct nl_mmap_hdr *hdr;
-- struct sk_buff *skb;
-- unsigned int maxlen;
-- int err = 0, len = 0;
--
-- mutex_lock(&nlk->pg_vec_lock);
--
-- ring = &nlk->tx_ring;
-- maxlen = ring->frame_size - NL_MMAP_HDRLEN;
--
-- do {
-- unsigned int nm_len;
--
-- hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID);
-- if (hdr == NULL) {
-- if (!(msg->msg_flags & MSG_DONTWAIT) &&
-- atomic_read(&nlk->tx_ring.pending))
-- schedule();
-- continue;
-- }
--
-- nm_len = ACCESS_ONCE(hdr->nm_len);
-- if (nm_len > maxlen) {
-- err = -EINVAL;
-- goto out;
-- }
--
-- netlink_frame_flush_dcache(hdr, nm_len);
--
-- skb = alloc_skb(nm_len, GFP_KERNEL);
-- if (skb == NULL) {
-- err = -ENOBUFS;
-- goto out;
-- }
-- __skb_put(skb, nm_len);
-- memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, nm_len);
-- netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
--
-- netlink_increment_head(ring);
--
-- NETLINK_CB(skb).portid = nlk->portid;
-- NETLINK_CB(skb).dst_group = dst_group;
-- NETLINK_CB(skb).creds = scm->creds;
--
-- err = security_netlink_send(sk, skb);
-- if (err) {
-- kfree_skb(skb);
-- goto out;
-- }
--
-- if (unlikely(dst_group)) {
-- atomic_inc(&skb->users);
-- netlink_broadcast(sk, skb, dst_portid, dst_group,
-- GFP_KERNEL);
-- }
-- err = netlink_unicast(sk, skb, dst_portid,
-- msg->msg_flags & MSG_DONTWAIT);
-- if (err < 0)
-- goto out;
-- len += err;
--
-- } while (hdr != NULL ||
-- (!(msg->msg_flags & MSG_DONTWAIT) &&
-- atomic_read(&nlk->tx_ring.pending)));
--
-- if (len > 0)
-- err = len;
--out:
-- mutex_unlock(&nlk->pg_vec_lock);
-- return err;
--}
--
--static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb)
--{
-- struct nl_mmap_hdr *hdr;
--
-- hdr = netlink_mmap_hdr(skb);
-- hdr->nm_len = skb->len;
-- hdr->nm_group = NETLINK_CB(skb).dst_group;
-- hdr->nm_pid = NETLINK_CB(skb).creds.pid;
-- hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
-- hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
-- netlink_frame_flush_dcache(hdr, hdr->nm_len);
-- netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
--
-- NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED;
-- kfree_skb(skb);
--}
--
--static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb)
--{
-- struct netlink_sock *nlk = nlk_sk(sk);
-- struct netlink_ring *ring = &nlk->rx_ring;
-- struct nl_mmap_hdr *hdr;
--
-- spin_lock_bh(&sk->sk_receive_queue.lock);
-- hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
-- if (hdr == NULL) {
-- spin_unlock_bh(&sk->sk_receive_queue.lock);
-- kfree_skb(skb);
-- netlink_overrun(sk);
-- return;
-- }
-- netlink_increment_head(ring);
-- __skb_queue_tail(&sk->sk_receive_queue, skb);
-- spin_unlock_bh(&sk->sk_receive_queue.lock);
--
-- hdr->nm_len = skb->len;
-- hdr->nm_group = NETLINK_CB(skb).dst_group;
-- hdr->nm_pid = NETLINK_CB(skb).creds.pid;
-- hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
-- hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
-- netlink_set_status(hdr, NL_MMAP_STATUS_COPY);
--}
--
--#else /* CONFIG_NETLINK_MMAP */
--#define netlink_rx_is_mmaped(sk) false
--#define netlink_tx_is_mmaped(sk) false
--#define netlink_mmap sock_no_mmap
--#define netlink_poll datagram_poll
--#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, scm) 0
--#endif /* CONFIG_NETLINK_MMAP */
--
- static void netlink_skb_destructor(struct sk_buff *skb)
- {
--#ifdef CONFIG_NETLINK_MMAP
-- struct nl_mmap_hdr *hdr;
-- struct netlink_ring *ring;
-- struct sock *sk;
--
-- /* If a packet from the kernel to userspace was freed because of an
-- * error without being delivered to userspace, the kernel must reset
-- * the status. In the direction userspace to kernel, the status is
-- * always reset here after the packet was processed and freed.
-- */
-- if (netlink_skb_is_mmaped(skb)) {
-- hdr = netlink_mmap_hdr(skb);
-- sk = NETLINK_CB(skb).sk;
--
-- if (NETLINK_CB(skb).flags & NETLINK_SKB_TX) {
-- netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
-- ring = &nlk_sk(sk)->tx_ring;
-- } else {
-- if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) {
-- hdr->nm_len = 0;
-- netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
-- }
-- ring = &nlk_sk(sk)->rx_ring;
-- }
--
-- WARN_ON(atomic_read(&ring->pending) == 0);
-- atomic_dec(&ring->pending);
-- sock_put(sk);
--
-- skb->head = NULL;
-- }
--#endif
- if (is_vmalloc_addr(skb->head)) {
- if (!skb->cloned ||
- !atomic_dec_return(&(skb_shinfo(skb)->dataref)))
-@@ -936,18 +334,6 @@ static void netlink_sock_destruct(struct sock *sk)
- }
-
- skb_queue_purge(&sk->sk_receive_queue);
--#ifdef CONFIG_NETLINK_MMAP
-- if (1) {
-- struct nl_mmap_req req;
--
-- memset(&req, 0, sizeof(req));
-- if (nlk->rx_ring.pg_vec)
-- __netlink_set_ring(sk, &req, false, NULL, 0);
-- memset(&req, 0, sizeof(req));
-- if (nlk->tx_ring.pg_vec)
-- __netlink_set_ring(sk, &req, true, NULL, 0);
-- }
--#endif /* CONFIG_NETLINK_MMAP */
-
- if (!sock_flag(sk, SOCK_DEAD)) {
- printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
-@@ -1201,9 +587,6 @@ static int __netlink_create(struct net *net, struct socket *sock,
- mutex_init(nlk->cb_mutex);
- }
- init_waitqueue_head(&nlk->wait);
--#ifdef CONFIG_NETLINK_MMAP
-- mutex_init(&nlk->pg_vec_lock);
--#endif
-
- sk->sk_destruct = netlink_sock_destruct;
- sk->sk_protocol = protocol;
-@@ -1745,8 +1128,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
- nlk = nlk_sk(sk);
-
- if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
-- test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
-- !netlink_skb_is_mmaped(skb)) {
-+ test_bit(NETLINK_S_CONGESTED, &nlk->state))) {
- DECLARE_WAITQUEUE(wait, current);
- if (!*timeo) {
- if (!ssk || netlink_is_kernel(ssk))
-@@ -1784,14 +1166,7 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
-
- netlink_deliver_tap(skb);
-
--#ifdef CONFIG_NETLINK_MMAP
-- if (netlink_skb_is_mmaped(skb))
-- netlink_queue_mmaped_skb(sk, skb);
-- else if (netlink_rx_is_mmaped(sk))
-- netlink_ring_set_copied(sk, skb);
-- else
--#endif /* CONFIG_NETLINK_MMAP */
-- skb_queue_tail(&sk->sk_receive_queue, skb);
-+ skb_queue_tail(&sk->sk_receive_queue, skb);
- sk->sk_data_ready(sk);
- return len;
- }
-@@ -1815,9 +1190,6 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
- int delta;
-
- WARN_ON(skb->sk != NULL);
-- if (netlink_skb_is_mmaped(skb))
-- return skb;
--
- delta = skb->end - skb->tail;
- if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize)
- return skb;
-@@ -1897,71 +1269,6 @@ struct sk_buff *__netlink_alloc_skb(struct sock *ssk, unsigned int size,
- unsigned int ldiff, u32 dst_portid,
- gfp_t gfp_mask)
- {
--#ifdef CONFIG_NETLINK_MMAP
-- unsigned int maxlen, linear_size;
-- struct sock *sk = NULL;
-- struct sk_buff *skb;
-- struct netlink_ring *ring;
-- struct nl_mmap_hdr *hdr;
--
-- sk = netlink_getsockbyportid(ssk, dst_portid);
-- if (IS_ERR(sk))
-- goto out;
--
-- ring = &nlk_sk(sk)->rx_ring;
-- /* fast-path without atomic ops for common case: non-mmaped receiver */
-- if (ring->pg_vec == NULL)
-- goto out_put;
--
-- /* We need to account the full linear size needed as a ring
-- * slot cannot have non-linear parts.
-- */
-- linear_size = size + ldiff;
-- if (ring->frame_size - NL_MMAP_HDRLEN < linear_size)
-- goto out_put;
--
-- skb = alloc_skb_head(gfp_mask);
-- if (skb == NULL)
-- goto err1;
--
-- spin_lock_bh(&sk->sk_receive_queue.lock);
-- /* check again under lock */
-- if (ring->pg_vec == NULL)
-- goto out_free;
--
-- /* check again under lock */
-- maxlen = ring->frame_size - NL_MMAP_HDRLEN;
-- if (maxlen < linear_size)
-- goto out_free;
--
-- netlink_forward_ring(ring);
-- hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
-- if (hdr == NULL)
-- goto err2;
--
-- netlink_ring_setup_skb(skb, sk, ring, hdr);
-- netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
-- atomic_inc(&ring->pending);
-- netlink_increment_head(ring);
--
-- spin_unlock_bh(&sk->sk_receive_queue.lock);
-- return skb;
--
--err2:
-- kfree_skb(skb);
-- spin_unlock_bh(&sk->sk_receive_queue.lock);
-- netlink_overrun(sk);
--err1:
-- sock_put(sk);
-- return NULL;
--
--out_free:
-- kfree_skb(skb);
-- spin_unlock_bh(&sk->sk_receive_queue.lock);
--out_put:
-- sock_put(sk);
--out:
--#endif
- return alloc_skb(size, gfp_mask);
- }
- EXPORT_SYMBOL_GPL(__netlink_alloc_skb);
-@@ -2242,8 +1549,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
- if (level != SOL_NETLINK)
- return -ENOPROTOOPT;
-
-- if (optname != NETLINK_RX_RING && optname != NETLINK_TX_RING &&
-- optlen >= sizeof(int) &&
-+ if (optlen >= sizeof(int) &&
- get_user(val, (unsigned int __user *)optval))
- return -EFAULT;
-
-@@ -2296,25 +1602,6 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
- }
- err = 0;
- break;
--#ifdef CONFIG_NETLINK_MMAP
-- case NETLINK_RX_RING:
-- case NETLINK_TX_RING: {
-- struct nl_mmap_req req;
--
-- /* Rings might consume more memory than queue limits, require
-- * CAP_NET_ADMIN.
-- */
-- if (!capable(CAP_NET_ADMIN))
-- return -EPERM;
-- if (optlen < sizeof(req))
-- return -EINVAL;
-- if (copy_from_user(&req, optval, sizeof(req)))
-- return -EFAULT;
-- err = netlink_set_ring(sk, &req,
-- optname == NETLINK_TX_RING);
-- break;
-- }
--#endif /* CONFIG_NETLINK_MMAP */
- case NETLINK_LISTEN_ALL_NSID:
- if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST))
- return -EPERM;
-@@ -2484,18 +1771,6 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
- smp_rmb();
- }
-
-- /* It's a really convoluted way for userland to ask for mmaped
-- * sendmsg(), but that's what we've got...
-- */
-- if (netlink_tx_is_mmaped(sk) &&
-- iter_is_iovec(&msg->msg_iter) &&
-- msg->msg_iter.nr_segs == 1 &&
-- msg->msg_iter.iov->iov_base == NULL) {
-- err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group,
-- &scm);
-- goto out;
-- }
--
- err = -EMSGSIZE;
- if (len > sk->sk_sndbuf - 32)
- goto out;
-@@ -2812,8 +2087,7 @@ static int netlink_dump(struct sock *sk)
- goto errout_skb;
- }
-
-- if (!netlink_rx_is_mmaped(sk) &&
-- atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
-+ if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
- goto errout_skb;
-
- /* NLMSG_GOODSIZE is small to avoid high order allocations being
-@@ -2902,16 +2176,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
- struct netlink_sock *nlk;
- int ret;
-
-- /* Memory mapped dump requests need to be copied to avoid looping
-- * on the pending state in netlink_mmap_sendmsg() while the CB hold
-- * a reference to the skb.
-- */
-- if (netlink_skb_is_mmaped(skb)) {
-- skb = skb_copy(skb, GFP_KERNEL);
-- if (skb == NULL)
-- return -ENOBUFS;
-- } else
-- atomic_inc(&skb->users);
-+ atomic_inc(&skb->users);
-
- sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid);
- if (sk == NULL) {
-@@ -3255,7 +2520,7 @@ static const struct proto_ops netlink_ops = {
- .socketpair = sock_no_socketpair,
- .accept = sock_no_accept,
- .getname = netlink_getname,
-- .poll = netlink_poll,
-+ .poll = datagram_poll,
- .ioctl = sock_no_ioctl,
- .listen = sock_no_listen,
- .shutdown = sock_no_shutdown,
-@@ -3263,7 +2528,7 @@ static const struct proto_ops netlink_ops = {
- .getsockopt = netlink_getsockopt,
- .sendmsg = netlink_sendmsg,
- .recvmsg = netlink_recvmsg,
-- .mmap = netlink_mmap,
-+ .mmap = sock_no_mmap,
- .sendpage = sock_no_sendpage,
- };
-
-diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
-index df32cb92d9fc..ea4600aea6b0 100644
---- a/net/netlink/af_netlink.h
-+++ b/net/netlink/af_netlink.h
-@@ -45,12 +45,6 @@ struct netlink_sock {
- int (*netlink_bind)(struct net *net, int group);
- void (*netlink_unbind)(struct net *net, int group);
- struct module *module;
--#ifdef CONFIG_NETLINK_MMAP
-- struct mutex pg_vec_lock;
-- struct netlink_ring rx_ring;
-- struct netlink_ring tx_ring;
-- atomic_t mapped;
--#endif /* CONFIG_NETLINK_MMAP */
-
- struct rhash_head node;
- struct rcu_head rcu;
-@@ -62,15 +56,6 @@ static inline struct netlink_sock *nlk_sk(struct sock *sk)
- return container_of(sk, struct netlink_sock, sk);
- }
-
--static inline bool netlink_skb_is_mmaped(const struct sk_buff *skb)
--{
--#ifdef CONFIG_NETLINK_MMAP
-- return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED;
--#else
-- return false;
--#endif /* CONFIG_NETLINK_MMAP */
--}
--
- struct netlink_table {
- struct rhashtable hash;
- struct hlist_head mc_list;
-diff --git a/net/netlink/diag.c b/net/netlink/diag.c
-index 3ee63a3cff30..8dd836a8dd60 100644
---- a/net/netlink/diag.c
-+++ b/net/netlink/diag.c
-@@ -8,41 +8,6 @@
-
- #include "af_netlink.h"
-
--#ifdef CONFIG_NETLINK_MMAP
--static int sk_diag_put_ring(struct netlink_ring *ring, int nl_type,
-- struct sk_buff *nlskb)
--{
-- struct netlink_diag_ring ndr;
--
-- ndr.ndr_block_size = ring->pg_vec_pages << PAGE_SHIFT;
-- ndr.ndr_block_nr = ring->pg_vec_len;
-- ndr.ndr_frame_size = ring->frame_size;
-- ndr.ndr_frame_nr = ring->frame_max + 1;
--
-- return nla_put(nlskb, nl_type, sizeof(ndr), &ndr);
--}
--
--static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb)
--{
-- struct netlink_sock *nlk = nlk_sk(sk);
-- int ret;
--
-- mutex_lock(&nlk->pg_vec_lock);
-- ret = sk_diag_put_ring(&nlk->rx_ring, NETLINK_DIAG_RX_RING, nlskb);
-- if (!ret)
-- ret = sk_diag_put_ring(&nlk->tx_ring, NETLINK_DIAG_TX_RING,
-- nlskb);
-- mutex_unlock(&nlk->pg_vec_lock);
--
-- return ret;
--}
--#else
--static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb)
--{
-- return 0;
--}
--#endif
--
- static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb)
- {
- struct netlink_sock *nlk = nlk_sk(sk);
-@@ -87,10 +52,6 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
- sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO))
- goto out_nlmsg_trim;
-
-- if ((req->ndiag_show & NDIAG_SHOW_RING_CFG) &&
-- sk_diag_put_rings_cfg(sk, skb))
-- goto out_nlmsg_trim;
--
- nlmsg_end(skb, nlh);
- return 0;
-
---
-2.12.2
-
-From 51a219a1371ed26ce45acc8209d6064257d00f70 Mon Sep 17 00:00:00 2001
-From: Matthias Schiffer <mschiffer@universe-factory.net>
-Date: Thu, 23 Feb 2017 17:19:41 +0100
-Subject: [PATCH 075/251] vxlan: correctly validate VXLAN ID against
- VXLAN_N_VID
-Content-Length: 915
-Lines: 29
-
-[ Upstream commit 4e37d6911f36545b286d15073f6f2222f840e81c ]
-
-The incorrect check caused an off-by-one error: the maximum VID 0xffffff
-was unusable.
-
-Fixes: d342894c5d2f ("vxlan: virtual extensible lan")
-Signed-off-by: Matthias Schiffer <mschiffer@universe-factory.net>
-Acked-by: Jiri Benc <jbenc@redhat.com>
-Signed-off-by: David S. Miller <davem@davemloft.net>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/net/vxlan.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
-index 6fa8e165878e..590750ab6564 100644
---- a/drivers/net/vxlan.c
-+++ b/drivers/net/vxlan.c
-@@ -2600,7 +2600,7 @@ static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[])
-
- if (data[IFLA_VXLAN_ID]) {
- __u32 id = nla_get_u32(data[IFLA_VXLAN_ID]);
-- if (id >= VXLAN_VID_MASK)
-+ if (id >= VXLAN_N_VID)
- return -ERANGE;
- }
-
---
-2.12.2
-
-From f1b3aae1f1bfdbec1956670aa3aa28d25f88d4b3 Mon Sep 17 00:00:00 2001
-From: David Forster <dforster@brocade.com>
-Date: Fri, 24 Feb 2017 14:20:32 +0000
-Subject: [PATCH 076/251] vti6: return GRE_KEY for vti6
-Content-Length: 884
-Lines: 29
-
-[ Upstream commit 7dcdf941cdc96692ab99fd790c8cc68945514851 ]
-
-Align vti6 with vti by returning GRE_KEY flag. This enables iproute2
-to display tunnel keys on "ip -6 tunnel show"
-
-Signed-off-by: David Forster <dforster@brocade.com>
-Signed-off-by: David S. Miller <davem@davemloft.net>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- net/ipv6/ip6_vti.c | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
-index 0a8610b33d79..bdcc4d9cedd3 100644
---- a/net/ipv6/ip6_vti.c
-+++ b/net/ipv6/ip6_vti.c
-@@ -680,6 +680,10 @@ vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p)
- u->link = p->link;
- u->i_key = p->i_key;
- u->o_key = p->o_key;
-+ if (u->i_key)
-+ u->i_flags |= GRE_KEY;
-+ if (u->o_key)
-+ u->o_flags |= GRE_KEY;
- u->proto = p->proto;
-
- memcpy(u->name, p->name, sizeof(u->name));
---
-2.12.2
-
-From 354f79125f12bcd7352704e770c0b10c4a4b424e Mon Sep 17 00:00:00 2001
-From: Julian Anastasov <ja@ssi.bg>
-Date: Sun, 26 Feb 2017 17:14:35 +0200
-Subject: [PATCH 077/251] ipv4: mask tos for input route
-Content-Length: 916
-Lines: 31
-
-[ Upstream commit 6e28099d38c0e50d62c1afc054e37e573adf3d21 ]
-
-Restore the lost masking of TOS in input route code to
-allow ip rules to match it properly.
-
-Problem [1] noticed by Shmulik Ladkani <shmulik.ladkani@gmail.com>
-
-[1] http://marc.info/?t=137331755300040&r=1&w=2
-
-Fixes: 89aef8921bfb ("ipv4: Delete routing cache.")
-Signed-off-by: Julian Anastasov <ja@ssi.bg>
-Signed-off-by: David S. Miller <davem@davemloft.net>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- net/ipv4/route.c | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/net/ipv4/route.c b/net/ipv4/route.c
-index ef2f527a119b..da4d68d78590 100644
---- a/net/ipv4/route.c
-+++ b/net/ipv4/route.c
-@@ -1958,6 +1958,7 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- {
- int res;
-
-+ tos &= IPTOS_RT_MASK;
- rcu_read_lock();
-
- /* Multicast recognition logic is moved from route cache to here.
---
-2.12.2
-
-From 2cd0afc64e333f2ef62444300418883cff0e79da Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Paul=20H=C3=BCber?= <phueber@kernsp.in>
-Date: Sun, 26 Feb 2017 17:58:19 +0100
-Subject: [PATCH 078/251] l2tp: avoid use-after-free caused by
- l2tp_ip_backlog_recv
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-Content-Length: 923
-Lines: 28
-
-[ Upstream commit 51fb60eb162ab84c5edf2ae9c63cf0b878e5547e ]
-
-l2tp_ip_backlog_recv may not return -1 if the packet gets dropped.
-The return value is passed up to ip_local_deliver_finish, which treats
-negative values as an IP protocol number for resubmission.
-
-Signed-off-by: Paul Hüber <phueber@kernsp.in>
-Signed-off-by: David S. Miller <davem@davemloft.net>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- net/l2tp/l2tp_ip.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
-index 445b7cd0826a..48ab93842322 100644
---- a/net/l2tp/l2tp_ip.c
-+++ b/net/l2tp/l2tp_ip.c
-@@ -383,7 +383,7 @@ static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb)
- drop:
- IP_INC_STATS(sock_net(sk), IPSTATS_MIB_INDISCARDS);
- kfree_skb(skb);
-- return -1;
-+ return 0;
- }
-
- /* Userspace will call sendmsg() on the tunnel socket to send L2TP
---
-2.12.2
-
-From f331d6445a3e4013428b06169acf3ae33614e69b Mon Sep 17 00:00:00 2001
-From: Alexander Potapenko <glider@google.com>
-Date: Wed, 1 Mar 2017 12:57:20 +0100
-Subject: [PATCH 079/251] net: don't call strlen() on the user buffer in
- packet_bind_spkt()
-Content-Length: 3957
-Lines: 104
-
-[ Upstream commit 540e2894f7905538740aaf122bd8e0548e1c34a4 ]
-
-KMSAN (KernelMemorySanitizer, a new error detection tool) reports use of
-uninitialized memory in packet_bind_spkt():
-Acked-by: Eric Dumazet <edumazet@google.com>
-
-==================================================================
-BUG: KMSAN: use of unitialized memory
-CPU: 0 PID: 1074 Comm: packet Not tainted 4.8.0-rc6+ #1891
-Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs
-01/01/2011
- 0000000000000000 ffff88006b6dfc08 ffffffff82559ae8 ffff88006b6dfb48
- ffffffff818a7c91 ffffffff85b9c870 0000000000000092 ffffffff85b9c550
- 0000000000000000 0000000000000092 00000000ec400911 0000000000000002
-Call Trace:
- [< inline >] __dump_stack lib/dump_stack.c:15
- [<ffffffff82559ae8>] dump_stack+0x238/0x290 lib/dump_stack.c:51
- [<ffffffff818a6626>] kmsan_report+0x276/0x2e0 mm/kmsan/kmsan.c:1003
- [<ffffffff818a783b>] __msan_warning+0x5b/0xb0
-mm/kmsan/kmsan_instr.c:424
- [< inline >] strlen lib/string.c:484
- [<ffffffff8259b58d>] strlcpy+0x9d/0x200 lib/string.c:144
- [<ffffffff84b2eca4>] packet_bind_spkt+0x144/0x230
-net/packet/af_packet.c:3132
- [<ffffffff84242e4d>] SYSC_bind+0x40d/0x5f0 net/socket.c:1370
- [<ffffffff84242a22>] SyS_bind+0x82/0xa0 net/socket.c:1356
- [<ffffffff8515991b>] entry_SYSCALL_64_fastpath+0x13/0x8f
-arch/x86/entry/entry_64.o:?
-chained origin: 00000000eba00911
- [<ffffffff810bb787>] save_stack_trace+0x27/0x50
-arch/x86/kernel/stacktrace.c:67
- [< inline >] kmsan_save_stack_with_flags mm/kmsan/kmsan.c:322
- [< inline >] kmsan_save_stack mm/kmsan/kmsan.c:334
- [<ffffffff818a59f8>] kmsan_internal_chain_origin+0x118/0x1e0
-mm/kmsan/kmsan.c:527
- [<ffffffff818a7773>] __msan_set_alloca_origin4+0xc3/0x130
-mm/kmsan/kmsan_instr.c:380
- [<ffffffff84242b69>] SYSC_bind+0x129/0x5f0 net/socket.c:1356
- [<ffffffff84242a22>] SyS_bind+0x82/0xa0 net/socket.c:1356
- [<ffffffff8515991b>] entry_SYSCALL_64_fastpath+0x13/0x8f
-arch/x86/entry/entry_64.o:?
-origin description: ----address@SYSC_bind (origin=00000000eb400911)
-==================================================================
-(the line numbers are relative to 4.8-rc6, but the bug persists
-upstream)
-
-, when I run the following program as root:
-
-=====================================
- #include <string.h>
- #include <sys/socket.h>
- #include <netpacket/packet.h>
- #include <net/ethernet.h>
-
- int main() {
- struct sockaddr addr;
- memset(&addr, 0xff, sizeof(addr));
- addr.sa_family = AF_PACKET;
- int fd = socket(PF_PACKET, SOCK_PACKET, htons(ETH_P_ALL));
- bind(fd, &addr, sizeof(addr));
- return 0;
- }
-=====================================
-
-This happens because addr.sa_data copied from the userspace is not
-zero-terminated, and copying it with strlcpy() in packet_bind_spkt()
-results in calling strlen() on the kernel copy of that non-terminated
-buffer.
-
-Signed-off-by: Alexander Potapenko <glider@google.com>
-Signed-off-by: David S. Miller <davem@davemloft.net>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- net/packet/af_packet.c | 8 ++++++--
- 1 file changed, 6 insertions(+), 2 deletions(-)
-
-diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
-index d805cd577a60..3975ac809934 100644
---- a/net/packet/af_packet.c
-+++ b/net/packet/af_packet.c
-@@ -3021,7 +3021,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
- int addr_len)
- {
- struct sock *sk = sock->sk;
-- char name[15];
-+ char name[sizeof(uaddr->sa_data) + 1];
-
- /*
- * Check legality
-@@ -3029,7 +3029,11 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
-
- if (addr_len != sizeof(struct sockaddr))
- return -EINVAL;
-- strlcpy(name, uaddr->sa_data, sizeof(name));
-+ /* uaddr->sa_data comes from the userspace, it's not guaranteed to be
-+ * zero-terminated.
-+ */
-+ memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data));
-+ name[sizeof(uaddr->sa_data)] = 0;
-
- return packet_do_bind(sk, name, 0, pkt_sk(sk)->num);
- }
---
-2.12.2
-
-From a70c328597045be2962098916c88ddd172caa054 Mon Sep 17 00:00:00 2001
-From: Eric Dumazet <edumazet@google.com>
-Date: Wed, 1 Mar 2017 14:28:39 -0800
-Subject: [PATCH 080/251] net: net_enable_timestamp() can be called from irq
- contexts
-Content-Length: 2734
-Lines: 92
-
-[ Upstream commit 13baa00ad01bb3a9f893e3a08cbc2d072fc0c15d ]
-
-It is now very clear that silly TCP listeners might play with
-enabling/disabling timestamping while new children are added
-to their accept queue.
-
-Meaning net_enable_timestamp() can be called from BH context
-while current state of the static key is not enabled.
-
-Lets play safe and allow all contexts.
-
-The work queue is scheduled only under the problematic cases,
-which are the static key enable/disable transition, to not slow down
-critical paths.
-
-This extends and improves what we did in commit 5fa8bbda38c6 ("net: use
-a work queue to defer net_disable_timestamp() work")
-
-Fixes: b90e5794c5bd ("net: dont call jump_label_dec from irq context")
-Signed-off-by: Eric Dumazet <edumazet@google.com>
-Reported-by: Dmitry Vyukov <dvyukov@google.com>
-Signed-off-by: David S. Miller <davem@davemloft.net>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- net/core/dev.c | 35 +++++++++++++++++++++++++++++++----
- 1 file changed, 31 insertions(+), 4 deletions(-)
-
-diff --git a/net/core/dev.c b/net/core/dev.c
-index 08215a85c742..48399d8ce614 100644
---- a/net/core/dev.c
-+++ b/net/core/dev.c
-@@ -1677,27 +1677,54 @@ EXPORT_SYMBOL_GPL(net_dec_ingress_queue);
- static struct static_key netstamp_needed __read_mostly;
- #ifdef HAVE_JUMP_LABEL
- static atomic_t netstamp_needed_deferred;
-+static atomic_t netstamp_wanted;
- static void netstamp_clear(struct work_struct *work)
- {
- int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
-+ int wanted;
-
-- while (deferred--)
-- static_key_slow_dec(&netstamp_needed);
-+ wanted = atomic_add_return(deferred, &netstamp_wanted);
-+ if (wanted > 0)
-+ static_key_enable(&netstamp_needed);
-+ else
-+ static_key_disable(&netstamp_needed);
- }
- static DECLARE_WORK(netstamp_work, netstamp_clear);
- #endif
-
- void net_enable_timestamp(void)
- {
-+#ifdef HAVE_JUMP_LABEL
-+ int wanted;
-+
-+ while (1) {
-+ wanted = atomic_read(&netstamp_wanted);
-+ if (wanted <= 0)
-+ break;
-+ if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted + 1) == wanted)
-+ return;
-+ }
-+ atomic_inc(&netstamp_needed_deferred);
-+ schedule_work(&netstamp_work);
-+#else
- static_key_slow_inc(&netstamp_needed);
-+#endif
- }
- EXPORT_SYMBOL(net_enable_timestamp);
-
- void net_disable_timestamp(void)
- {
- #ifdef HAVE_JUMP_LABEL
-- /* net_disable_timestamp() can be called from non process context */
-- atomic_inc(&netstamp_needed_deferred);
-+ int wanted;
-+
-+ while (1) {
-+ wanted = atomic_read(&netstamp_wanted);
-+ if (wanted <= 1)
-+ break;
-+ if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted - 1) == wanted)
-+ return;
-+ }
-+ atomic_dec(&netstamp_needed_deferred);
- schedule_work(&netstamp_work);
- #else
- static_key_slow_dec(&netstamp_needed);
---
-2.12.2
-
-From 9216632bf4a0bafdc998d1c68b37b70446775900 Mon Sep 17 00:00:00 2001
-From: Arnaldo Carvalho de Melo <acme@redhat.com>
-Date: Wed, 1 Mar 2017 16:35:07 -0300
-Subject: [PATCH 081/251] dccp: Unlock sock before calling sk_free()
-Content-Length: 3158
-Lines: 77
-
-[ Upstream commit d5afb6f9b6bb2c57bd0c05e76e12489dc0d037d9 ]
-
-The code where sk_clone() came from created a new socket and locked it,
-but then, on the error path didn't unlock it.
-
-This problem stayed there for a long while, till b0691c8ee7c2 ("net:
-Unlock sock before calling sk_free()") fixed it, but unfortunately the
-callers of sk_clone() (now sk_clone_locked()) were not audited and the
-one in dccp_create_openreq_child() remained.
-
-Now in the age of the syskaller fuzzer, this was finally uncovered, as
-reported by Dmitry:
-
- ---- 8< ----
-
-I've got the following report while running syzkaller fuzzer on
-86292b33d4b7 ("Merge branch 'akpm' (patches from Andrew)")
-
- [ BUG: held lock freed! ]
- 4.10.0+ #234 Not tainted
- -------------------------
- syz-executor6/6898 is freeing memory
- ffff88006286cac0-ffff88006286d3b7, with a lock still held there!
- (slock-AF_INET6){+.-...}, at: [<ffffffff8362c2c9>] spin_lock
- include/linux/spinlock.h:299 [inline]
- (slock-AF_INET6){+.-...}, at: [<ffffffff8362c2c9>]
- sk_clone_lock+0x3d9/0x12c0 net/core/sock.c:1504
- 5 locks held by syz-executor6/6898:
- #0: (sk_lock-AF_INET6){+.+.+.}, at: [<ffffffff839a34b4>] lock_sock
- include/net/sock.h:1460 [inline]
- #0: (sk_lock-AF_INET6){+.+.+.}, at: [<ffffffff839a34b4>]
- inet_stream_connect+0x44/0xa0 net/ipv4/af_inet.c:681
- #1: (rcu_read_lock){......}, at: [<ffffffff83bc1c2a>]
- inet6_csk_xmit+0x12a/0x5d0 net/ipv6/inet6_connection_sock.c:126
- #2: (rcu_read_lock){......}, at: [<ffffffff8369b424>] __skb_unlink
- include/linux/skbuff.h:1767 [inline]
- #2: (rcu_read_lock){......}, at: [<ffffffff8369b424>] __skb_dequeue
- include/linux/skbuff.h:1783 [inline]
- #2: (rcu_read_lock){......}, at: [<ffffffff8369b424>]
- process_backlog+0x264/0x730 net/core/dev.c:4835
- #3: (rcu_read_lock){......}, at: [<ffffffff83aeb5c0>]
- ip6_input_finish+0x0/0x1700 net/ipv6/ip6_input.c:59
- #4: (slock-AF_INET6){+.-...}, at: [<ffffffff8362c2c9>] spin_lock
- include/linux/spinlock.h:299 [inline]
- #4: (slock-AF_INET6){+.-...}, at: [<ffffffff8362c2c9>]
- sk_clone_lock+0x3d9/0x12c0 net/core/sock.c:1504
-
-Fix it just like was done by b0691c8ee7c2 ("net: Unlock sock before calling
-sk_free()").
-
-Reported-by: Dmitry Vyukov <dvyukov@google.com>
-Cc: Cong Wang <xiyou.wangcong@gmail.com>
-Cc: Eric Dumazet <edumazet@google.com>
-Cc: Gerrit Renker <gerrit@erg.abdn.ac.uk>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/20170301153510.GE15145@kernel.org
-Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-Signed-off-by: David S. Miller <davem@davemloft.net>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- net/dccp/minisocks.c | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
-index 1994f8af646b..e314caa39176 100644
---- a/net/dccp/minisocks.c
-+++ b/net/dccp/minisocks.c
-@@ -122,6 +122,7 @@ struct sock *dccp_create_openreq_child(const struct sock *sk,
- /* It is still raw copy of parent, so invalidate
- * destructor and make plain sk_free() */
- newsk->sk_destruct = NULL;
-+ bh_unlock_sock(newsk);
- sk_free(newsk);
- return NULL;
- }
---
-2.12.2
-
From 2681a7853ad73bfebc3a683765a496bb283c6648 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 3 Mar 2017 14:08:21 -0800
--- /dev/null
+From 0c0be310ba29e4a053e8aac934aebe590c5da909 Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Thu, 18 Feb 2016 15:03:24 +0100
+Subject: netlink: remove mmapped netlink support
+
+From: Florian Westphal <fw@strlen.de>
+
+commit d1b4c689d4130bcfd3532680b64db562300716b6 upstream.
+
+mmapped netlink has a number of unresolved issues:
+
+- TX zerocopy support had to be disabled more than a year ago via
+ commit 4682a0358639b29cf ("netlink: Always copy on mmap TX.")
+ because the content of the mmapped area can change after netlink
+ attribute validation but before message processing.
+
+- RX support was implemented mainly to speed up nfqueue dumping packet
+ payload to userspace. However, since commit ae08ce0021087a5d812d2
+ ("netfilter: nfnetlink_queue: zero copy support") we avoid one copy
+ with the socket-based interface too (via the skb_zerocopy helper).
+
+The other problem is that skbs attached to mmaped netlink socket
+behave different from normal skbs:
+
+- they don't have a shinfo area, so all functions that use skb_shinfo()
+(e.g. skb_clone) cannot be used.
+
+- reserving headroom prevents userspace from seeing the content as
+it expects message to start at skb->head.
+See for instance
+commit aa3a022094fa ("netlink: not trim skb for mmaped socket when dump").
+
+- skbs handed e.g. to netlink_ack must have non-NULL skb->sk, else we
+crash because it needs the sk to check if a tx ring is attached.
+
+Also not obvious, leads to non-intuitive bug fixes such as 7c7bdf359
+("netfilter: nfnetlink: use original skbuff when acking batches").
+
+mmaped netlink also didn't play nicely with the skb_zerocopy helper
+used by nfqueue and openvswitch. Daniel Borkmann fixed this via
+commit 6bb0fef489f6 ("netlink, mmap: fix edge-case leakages in nf queue
+zero-copy")' but at the cost of also needing to provide remaining
+length to the allocation function.
+
+nfqueue also has problems when used with mmaped rx netlink:
+- mmaped netlink doesn't allow use of nfqueue batch verdict messages.
+ Problem is that in the mmap case, the allocation time also determines
+ the ordering in which the frame will be seen by userspace (A
+ allocating before B means that A is located in earlier ring slot,
+ but this also means that B might get a lower sequence number then A
+ since seqno is decided later. To fix this we would need to extend the
+ spinlocked region to also cover the allocation and message setup which
+ isn't desirable.
+- nfqueue can now be configured to queue large (GSO) skbs to userspace.
+ Queing GSO packets is faster than having to force a software segmentation
+ in the kernel, so this is a desirable option. However, with a mmap based
+ ring one has to use 64kb per ring slot element, else mmap has to fall back
+ to the socket path (NL_MMAP_STATUS_COPY) for all large packets.
+
+To use the mmap interface, userspace not only has to probe for mmap netlink
+support, it also has to implement a recv/socket receive path in order to
+handle messages that exceed the size of an rx ring element.
+
+Cc: Daniel Borkmann <daniel@iogearbox.net>
+Cc: Ken-ichirou MATSUZAWA <chamaken@gmail.com>
+Cc: Pablo Neira Ayuso <pablo@netfilter.org>
+Cc: Patrick McHardy <kaber@trash.net>
+Cc: Thomas Graf <tgraf@suug.ch>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Cc: Shi Yuejie <shiyuejie@outlook.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ Documentation/networking/netlink_mmap.txt | 339 -------------
+ include/uapi/linux/netlink.h | 4
+ include/uapi/linux/netlink_diag.h | 2
+ net/netlink/Kconfig | 9
+ net/netlink/af_netlink.c | 732 ------------------------------
+ net/netlink/af_netlink.h | 15
+ net/netlink/diag.c | 39 -
+ 7 files changed, 15 insertions(+), 1125 deletions(-)
+
+--- a/Documentation/networking/netlink_mmap.txt
++++ /dev/null
+@@ -1,339 +0,0 @@
+-This file documents how to use memory mapped I/O with netlink.
+-
+-Author: Patrick McHardy <kaber@trash.net>
+-
+-Overview
+---------
+-
+-Memory mapped netlink I/O can be used to increase throughput and decrease
+-overhead of unicast receive and transmit operations. Some netlink subsystems
+-require high throughput, these are mainly the netfilter subsystems
+-nfnetlink_queue and nfnetlink_log, but it can also help speed up large
+-dump operations of f.i. the routing database.
+-
+-Memory mapped netlink I/O used two circular ring buffers for RX and TX which
+-are mapped into the processes address space.
+-
+-The RX ring is used by the kernel to directly construct netlink messages into
+-user-space memory without copying them as done with regular socket I/O,
+-additionally as long as the ring contains messages no recvmsg() or poll()
+-syscalls have to be issued by user-space to get more message.
+-
+-The TX ring is used to process messages directly from user-space memory, the
+-kernel processes all messages contained in the ring using a single sendmsg()
+-call.
+-
+-Usage overview
+---------------
+-
+-In order to use memory mapped netlink I/O, user-space needs three main changes:
+-
+-- ring setup
+-- conversion of the RX path to get messages from the ring instead of recvmsg()
+-- conversion of the TX path to construct messages into the ring
+-
+-Ring setup is done using setsockopt() to provide the ring parameters to the
+-kernel, then a call to mmap() to map the ring into the processes address space:
+-
+-- setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, ¶ms, sizeof(params));
+-- setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, ¶ms, sizeof(params));
+-- ring = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0)
+-
+-Usage of either ring is optional, but even if only the RX ring is used the
+-mapping still needs to be writable in order to update the frame status after
+-processing.
+-
+-Conversion of the reception path involves calling poll() on the file
+-descriptor, once the socket is readable the frames from the ring are
+-processed in order until no more messages are available, as indicated by
+-a status word in the frame header.
+-
+-On kernel side, in order to make use of memory mapped I/O on receive, the
+-originating netlink subsystem needs to support memory mapped I/O, otherwise
+-it will use an allocated socket buffer as usual and the contents will be
+- copied to the ring on transmission, nullifying most of the performance gains.
+-Dumps of kernel databases automatically support memory mapped I/O.
+-
+-Conversion of the transmit path involves changing message construction to
+-use memory from the TX ring instead of (usually) a buffer declared on the
+-stack and setting up the frame header appropriately. Optionally poll() can
+-be used to wait for free frames in the TX ring.
+-
+-Structured and definitions for using memory mapped I/O are contained in
+-<linux/netlink.h>.
+-
+-RX and TX rings
+-----------------
+-
+-Each ring contains a number of continuous memory blocks, containing frames of
+-fixed size dependent on the parameters used for ring setup.
+-
+-Ring: [ block 0 ]
+- [ frame 0 ]
+- [ frame 1 ]
+- [ block 1 ]
+- [ frame 2 ]
+- [ frame 3 ]
+- ...
+- [ block n ]
+- [ frame 2 * n ]
+- [ frame 2 * n + 1 ]
+-
+-The blocks are only visible to the kernel, from the point of view of user-space
+-the ring just contains the frames in a continuous memory zone.
+-
+-The ring parameters used for setting up the ring are defined as follows:
+-
+-struct nl_mmap_req {
+- unsigned int nm_block_size;
+- unsigned int nm_block_nr;
+- unsigned int nm_frame_size;
+- unsigned int nm_frame_nr;
+-};
+-
+-Frames are grouped into blocks, where each block is a continuous region of memory
+-and holds nm_block_size / nm_frame_size frames. The total number of frames in
+-the ring is nm_frame_nr. The following invariants hold:
+-
+-- frames_per_block = nm_block_size / nm_frame_size
+-
+-- nm_frame_nr = frames_per_block * nm_block_nr
+-
+-Some parameters are constrained, specifically:
+-
+-- nm_block_size must be a multiple of the architectures memory page size.
+- The getpagesize() function can be used to get the page size.
+-
+-- nm_frame_size must be equal or larger to NL_MMAP_HDRLEN, IOW a frame must be
+- able to hold at least the frame header
+-
+-- nm_frame_size must be smaller or equal to nm_block_size
+-
+-- nm_frame_size must be a multiple of NL_MMAP_MSG_ALIGNMENT
+-
+-- nm_frame_nr must equal the actual number of frames as specified above.
+-
+-When the kernel can't allocate physically continuous memory for a ring block,
+-it will fall back to use physically discontinuous memory. This might affect
+-performance negatively, in order to avoid this the nm_frame_size parameter
+-should be chosen to be as small as possible for the required frame size and
+-the number of blocks should be increased instead.
+-
+-Ring frames
+-------------
+-
+-Each frames contain a frame header, consisting of a synchronization word and some
+-meta-data, and the message itself.
+-
+-Frame: [ header message ]
+-
+-The frame header is defined as follows:
+-
+-struct nl_mmap_hdr {
+- unsigned int nm_status;
+- unsigned int nm_len;
+- __u32 nm_group;
+- /* credentials */
+- __u32 nm_pid;
+- __u32 nm_uid;
+- __u32 nm_gid;
+-};
+-
+-- nm_status is used for synchronizing processing between the kernel and user-
+- space and specifies ownership of the frame as well as the operation to perform
+-
+-- nm_len contains the length of the message contained in the data area
+-
+-- nm_group specified the destination multicast group of message
+-
+-- nm_pid, nm_uid and nm_gid contain the netlink pid, UID and GID of the sending
+- process. These values correspond to the data available using SOCK_PASSCRED in
+- the SCM_CREDENTIALS cmsg.
+-
+-The possible values in the status word are:
+-
+-- NL_MMAP_STATUS_UNUSED:
+- RX ring: frame belongs to the kernel and contains no message
+- for user-space. Approriate action is to invoke poll()
+- to wait for new messages.
+-
+- TX ring: frame belongs to user-space and can be used for
+- message construction.
+-
+-- NL_MMAP_STATUS_RESERVED:
+- RX ring only: frame is currently used by the kernel for message
+- construction and contains no valid message yet.
+- Appropriate action is to invoke poll() to wait for
+- new messages.
+-
+-- NL_MMAP_STATUS_VALID:
+- RX ring: frame contains a valid message. Approriate action is
+- to process the message and release the frame back to
+- the kernel by setting the status to
+- NL_MMAP_STATUS_UNUSED or queue the frame by setting the
+- status to NL_MMAP_STATUS_SKIP.
+-
+- TX ring: the frame contains a valid message from user-space to
+- be processed by the kernel. After completing processing
+- the kernel will release the frame back to user-space by
+- setting the status to NL_MMAP_STATUS_UNUSED.
+-
+-- NL_MMAP_STATUS_COPY:
+- RX ring only: a message is ready to be processed but could not be
+- stored in the ring, either because it exceeded the
+- frame size or because the originating subsystem does
+- not support memory mapped I/O. Appropriate action is
+- to invoke recvmsg() to receive the message and release
+- the frame back to the kernel by setting the status to
+- NL_MMAP_STATUS_UNUSED.
+-
+-- NL_MMAP_STATUS_SKIP:
+- RX ring only: user-space queued the message for later processing, but
+- processed some messages following it in the ring. The
+- kernel should skip this frame when looking for unused
+- frames.
+-
+-The data area of a frame begins at a offset of NL_MMAP_HDRLEN relative to the
+-frame header.
+-
+-TX limitations
+---------------
+-
+-Kernel processing usually involves validation of the message received by
+-user-space, then processing its contents. The kernel must assure that
+-userspace is not able to modify the message contents after they have been
+-validated. In order to do so, the message is copied from the ring frame
+-to an allocated buffer if either of these conditions is false:
+-
+-- only a single mapping of the ring exists
+-- the file descriptor is not shared between processes
+-
+-This means that for threaded programs, the kernel will fall back to copying.
+-
+-Example
+--------
+-
+-Ring setup:
+-
+- unsigned int block_size = 16 * getpagesize();
+- struct nl_mmap_req req = {
+- .nm_block_size = block_size,
+- .nm_block_nr = 64,
+- .nm_frame_size = 16384,
+- .nm_frame_nr = 64 * block_size / 16384,
+- };
+- unsigned int ring_size;
+- void *rx_ring, *tx_ring;
+-
+- /* Configure ring parameters */
+- if (setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, &req, sizeof(req)) < 0)
+- exit(1);
+- if (setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, &req, sizeof(req)) < 0)
+- exit(1)
+-
+- /* Calculate size of each individual ring */
+- ring_size = req.nm_block_nr * req.nm_block_size;
+-
+- /* Map RX/TX rings. The TX ring is located after the RX ring */
+- rx_ring = mmap(NULL, 2 * ring_size, PROT_READ | PROT_WRITE,
+- MAP_SHARED, fd, 0);
+- if ((long)rx_ring == -1L)
+- exit(1);
+- tx_ring = rx_ring + ring_size:
+-
+-Message reception:
+-
+-This example assumes some ring parameters of the ring setup are available.
+-
+- unsigned int frame_offset = 0;
+- struct nl_mmap_hdr *hdr;
+- struct nlmsghdr *nlh;
+- unsigned char buf[16384];
+- ssize_t len;
+-
+- while (1) {
+- struct pollfd pfds[1];
+-
+- pfds[0].fd = fd;
+- pfds[0].events = POLLIN | POLLERR;
+- pfds[0].revents = 0;
+-
+- if (poll(pfds, 1, -1) < 0 && errno != -EINTR)
+- exit(1);
+-
+- /* Check for errors. Error handling omitted */
+- if (pfds[0].revents & POLLERR)
+- <handle error>
+-
+- /* If no new messages, poll again */
+- if (!(pfds[0].revents & POLLIN))
+- continue;
+-
+- /* Process all frames */
+- while (1) {
+- /* Get next frame header */
+- hdr = rx_ring + frame_offset;
+-
+- if (hdr->nm_status == NL_MMAP_STATUS_VALID) {
+- /* Regular memory mapped frame */
+- nlh = (void *)hdr + NL_MMAP_HDRLEN;
+- len = hdr->nm_len;
+-
+- /* Release empty message immediately. May happen
+- * on error during message construction.
+- */
+- if (len == 0)
+- goto release;
+- } else if (hdr->nm_status == NL_MMAP_STATUS_COPY) {
+- /* Frame queued to socket receive queue */
+- len = recv(fd, buf, sizeof(buf), MSG_DONTWAIT);
+- if (len <= 0)
+- break;
+- nlh = buf;
+- } else
+- /* No more messages to process, continue polling */
+- break;
+-
+- process_msg(nlh);
+-release:
+- /* Release frame back to the kernel */
+- hdr->nm_status = NL_MMAP_STATUS_UNUSED;
+-
+- /* Advance frame offset to next frame */
+- frame_offset = (frame_offset + frame_size) % ring_size;
+- }
+- }
+-
+-Message transmission:
+-
+-This example assumes some ring parameters of the ring setup are available.
+-A single message is constructed and transmitted, to send multiple messages
+-at once they would be constructed in consecutive frames before a final call
+-to sendto().
+-
+- unsigned int frame_offset = 0;
+- struct nl_mmap_hdr *hdr;
+- struct nlmsghdr *nlh;
+- struct sockaddr_nl addr = {
+- .nl_family = AF_NETLINK,
+- };
+-
+- hdr = tx_ring + frame_offset;
+- if (hdr->nm_status != NL_MMAP_STATUS_UNUSED)
+- /* No frame available. Use poll() to avoid. */
+- exit(1);
+-
+- nlh = (void *)hdr + NL_MMAP_HDRLEN;
+-
+- /* Build message */
+- build_message(nlh);
+-
+- /* Fill frame header: length and status need to be set */
+- hdr->nm_len = nlh->nlmsg_len;
+- hdr->nm_status = NL_MMAP_STATUS_VALID;
+-
+- if (sendto(fd, NULL, 0, 0, &addr, sizeof(addr)) < 0)
+- exit(1);
+-
+- /* Advance frame offset to next frame */
+- frame_offset = (frame_offset + frame_size) % ring_size;
+--- a/include/uapi/linux/netlink.h
++++ b/include/uapi/linux/netlink.h
+@@ -106,8 +106,10 @@ struct nlmsgerr {
+ #define NETLINK_PKTINFO 3
+ #define NETLINK_BROADCAST_ERROR 4
+ #define NETLINK_NO_ENOBUFS 5
++#ifndef __KERNEL__
+ #define NETLINK_RX_RING 6
+ #define NETLINK_TX_RING 7
++#endif
+
+ struct nl_pktinfo {
+ __u32 group;
+@@ -130,6 +132,7 @@ struct nl_mmap_hdr {
+ __u32 nm_gid;
+ };
+
++#ifndef __KERNEL__
+ enum nl_mmap_status {
+ NL_MMAP_STATUS_UNUSED,
+ NL_MMAP_STATUS_RESERVED,
+@@ -141,6 +144,7 @@ enum nl_mmap_status {
+ #define NL_MMAP_MSG_ALIGNMENT NLMSG_ALIGNTO
+ #define NL_MMAP_MSG_ALIGN(sz) __ALIGN_KERNEL(sz, NL_MMAP_MSG_ALIGNMENT)
+ #define NL_MMAP_HDRLEN NL_MMAP_MSG_ALIGN(sizeof(struct nl_mmap_hdr))
++#endif
+
+ #define NET_MAJOR 36 /* Major 36 is reserved for networking */
+
+--- a/include/uapi/linux/netlink_diag.h
++++ b/include/uapi/linux/netlink_diag.h
+@@ -48,6 +48,8 @@ enum {
+
+ #define NDIAG_SHOW_MEMINFO 0x00000001 /* show memory info of a socket */
+ #define NDIAG_SHOW_GROUPS 0x00000002 /* show groups of a netlink socket */
++#ifndef __KERNEL__
+ #define NDIAG_SHOW_RING_CFG 0x00000004 /* show ring configuration */
++#endif
+
+ #endif
+--- a/net/netlink/Kconfig
++++ b/net/netlink/Kconfig
+@@ -2,15 +2,6 @@
+ # Netlink Sockets
+ #
+
+-config NETLINK_MMAP
+- bool "NETLINK: mmaped IO"
+- ---help---
+- This option enables support for memory mapped netlink IO. This
+- reduces overhead by avoiding copying data between kernel- and
+- userspace.
+-
+- If unsure, say N.
+-
+ config NETLINK_DIAG
+ tristate "NETLINK: socket monitoring interface"
+ default n
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -234,7 +234,7 @@ static int __netlink_deliver_tap_skb(str
+
+ dev_hold(dev);
+
+- if (netlink_skb_is_mmaped(skb) || is_vmalloc_addr(skb->head))
++ if (is_vmalloc_addr(skb->head))
+ nskb = netlink_to_full_skb(skb, GFP_ATOMIC);
+ else
+ nskb = skb_clone(skb, GFP_ATOMIC);
+@@ -308,599 +308,8 @@ static void netlink_rcv_wake(struct sock
+ wake_up_interruptible(&nlk->wait);
+ }
+
+-#ifdef CONFIG_NETLINK_MMAP
+-static bool netlink_rx_is_mmaped(struct sock *sk)
+-{
+- return nlk_sk(sk)->rx_ring.pg_vec != NULL;
+-}
+-
+-static bool netlink_tx_is_mmaped(struct sock *sk)
+-{
+- return nlk_sk(sk)->tx_ring.pg_vec != NULL;
+-}
+-
+-static __pure struct page *pgvec_to_page(const void *addr)
+-{
+- if (is_vmalloc_addr(addr))
+- return vmalloc_to_page(addr);
+- else
+- return virt_to_page(addr);
+-}
+-
+-static void free_pg_vec(void **pg_vec, unsigned int order, unsigned int len)
+-{
+- unsigned int i;
+-
+- for (i = 0; i < len; i++) {
+- if (pg_vec[i] != NULL) {
+- if (is_vmalloc_addr(pg_vec[i]))
+- vfree(pg_vec[i]);
+- else
+- free_pages((unsigned long)pg_vec[i], order);
+- }
+- }
+- kfree(pg_vec);
+-}
+-
+-static void *alloc_one_pg_vec_page(unsigned long order)
+-{
+- void *buffer;
+- gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO |
+- __GFP_NOWARN | __GFP_NORETRY;
+-
+- buffer = (void *)__get_free_pages(gfp_flags, order);
+- if (buffer != NULL)
+- return buffer;
+-
+- buffer = vzalloc((1 << order) * PAGE_SIZE);
+- if (buffer != NULL)
+- return buffer;
+-
+- gfp_flags &= ~__GFP_NORETRY;
+- return (void *)__get_free_pages(gfp_flags, order);
+-}
+-
+-static void **alloc_pg_vec(struct netlink_sock *nlk,
+- struct nl_mmap_req *req, unsigned int order)
+-{
+- unsigned int block_nr = req->nm_block_nr;
+- unsigned int i;
+- void **pg_vec;
+-
+- pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL);
+- if (pg_vec == NULL)
+- return NULL;
+-
+- for (i = 0; i < block_nr; i++) {
+- pg_vec[i] = alloc_one_pg_vec_page(order);
+- if (pg_vec[i] == NULL)
+- goto err1;
+- }
+-
+- return pg_vec;
+-err1:
+- free_pg_vec(pg_vec, order, block_nr);
+- return NULL;
+-}
+-
+-
+-static void
+-__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec,
+- unsigned int order)
+-{
+- struct netlink_sock *nlk = nlk_sk(sk);
+- struct sk_buff_head *queue;
+- struct netlink_ring *ring;
+-
+- queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
+- ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
+-
+- spin_lock_bh(&queue->lock);
+-
+- ring->frame_max = req->nm_frame_nr - 1;
+- ring->head = 0;
+- ring->frame_size = req->nm_frame_size;
+- ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE;
+-
+- swap(ring->pg_vec_len, req->nm_block_nr);
+- swap(ring->pg_vec_order, order);
+- swap(ring->pg_vec, pg_vec);
+-
+- __skb_queue_purge(queue);
+- spin_unlock_bh(&queue->lock);
+-
+- WARN_ON(atomic_read(&nlk->mapped));
+-
+- if (pg_vec)
+- free_pg_vec(pg_vec, order, req->nm_block_nr);
+-}
+-
+-static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
+- bool tx_ring)
+-{
+- struct netlink_sock *nlk = nlk_sk(sk);
+- struct netlink_ring *ring;
+- void **pg_vec = NULL;
+- unsigned int order = 0;
+-
+- ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
+-
+- if (atomic_read(&nlk->mapped))
+- return -EBUSY;
+- if (atomic_read(&ring->pending))
+- return -EBUSY;
+-
+- if (req->nm_block_nr) {
+- if (ring->pg_vec != NULL)
+- return -EBUSY;
+-
+- if ((int)req->nm_block_size <= 0)
+- return -EINVAL;
+- if (!PAGE_ALIGNED(req->nm_block_size))
+- return -EINVAL;
+- if (req->nm_frame_size < NL_MMAP_HDRLEN)
+- return -EINVAL;
+- if (!IS_ALIGNED(req->nm_frame_size, NL_MMAP_MSG_ALIGNMENT))
+- return -EINVAL;
+-
+- ring->frames_per_block = req->nm_block_size /
+- req->nm_frame_size;
+- if (ring->frames_per_block == 0)
+- return -EINVAL;
+- if (ring->frames_per_block * req->nm_block_nr !=
+- req->nm_frame_nr)
+- return -EINVAL;
+-
+- order = get_order(req->nm_block_size);
+- pg_vec = alloc_pg_vec(nlk, req, order);
+- if (pg_vec == NULL)
+- return -ENOMEM;
+- } else {
+- if (req->nm_frame_nr)
+- return -EINVAL;
+- }
+-
+- mutex_lock(&nlk->pg_vec_lock);
+- if (atomic_read(&nlk->mapped) == 0) {
+- __netlink_set_ring(sk, req, tx_ring, pg_vec, order);
+- mutex_unlock(&nlk->pg_vec_lock);
+- return 0;
+- }
+-
+- mutex_unlock(&nlk->pg_vec_lock);
+-
+- if (pg_vec)
+- free_pg_vec(pg_vec, order, req->nm_block_nr);
+-
+- return -EBUSY;
+-}
+-
+-static void netlink_mm_open(struct vm_area_struct *vma)
+-{
+- struct file *file = vma->vm_file;
+- struct socket *sock = file->private_data;
+- struct sock *sk = sock->sk;
+-
+- if (sk)
+- atomic_inc(&nlk_sk(sk)->mapped);
+-}
+-
+-static void netlink_mm_close(struct vm_area_struct *vma)
+-{
+- struct file *file = vma->vm_file;
+- struct socket *sock = file->private_data;
+- struct sock *sk = sock->sk;
+-
+- if (sk)
+- atomic_dec(&nlk_sk(sk)->mapped);
+-}
+-
+-static const struct vm_operations_struct netlink_mmap_ops = {
+- .open = netlink_mm_open,
+- .close = netlink_mm_close,
+-};
+-
+-static int netlink_mmap(struct file *file, struct socket *sock,
+- struct vm_area_struct *vma)
+-{
+- struct sock *sk = sock->sk;
+- struct netlink_sock *nlk = nlk_sk(sk);
+- struct netlink_ring *ring;
+- unsigned long start, size, expected;
+- unsigned int i;
+- int err = -EINVAL;
+-
+- if (vma->vm_pgoff)
+- return -EINVAL;
+-
+- mutex_lock(&nlk->pg_vec_lock);
+-
+- expected = 0;
+- for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
+- if (ring->pg_vec == NULL)
+- continue;
+- expected += ring->pg_vec_len * ring->pg_vec_pages * PAGE_SIZE;
+- }
+-
+- if (expected == 0)
+- goto out;
+-
+- size = vma->vm_end - vma->vm_start;
+- if (size != expected)
+- goto out;
+-
+- start = vma->vm_start;
+- for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
+- if (ring->pg_vec == NULL)
+- continue;
+-
+- for (i = 0; i < ring->pg_vec_len; i++) {
+- struct page *page;
+- void *kaddr = ring->pg_vec[i];
+- unsigned int pg_num;
+-
+- for (pg_num = 0; pg_num < ring->pg_vec_pages; pg_num++) {
+- page = pgvec_to_page(kaddr);
+- err = vm_insert_page(vma, start, page);
+- if (err < 0)
+- goto out;
+- start += PAGE_SIZE;
+- kaddr += PAGE_SIZE;
+- }
+- }
+- }
+-
+- atomic_inc(&nlk->mapped);
+- vma->vm_ops = &netlink_mmap_ops;
+- err = 0;
+-out:
+- mutex_unlock(&nlk->pg_vec_lock);
+- return err;
+-}
+-
+-static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr, unsigned int nm_len)
+-{
+-#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
+- struct page *p_start, *p_end;
+-
+- /* First page is flushed through netlink_{get,set}_status */
+- p_start = pgvec_to_page(hdr + PAGE_SIZE);
+- p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + nm_len - 1);
+- while (p_start <= p_end) {
+- flush_dcache_page(p_start);
+- p_start++;
+- }
+-#endif
+-}
+-
+-static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr)
+-{
+- smp_rmb();
+- flush_dcache_page(pgvec_to_page(hdr));
+- return hdr->nm_status;
+-}
+-
+-static void netlink_set_status(struct nl_mmap_hdr *hdr,
+- enum nl_mmap_status status)
+-{
+- smp_mb();
+- hdr->nm_status = status;
+- flush_dcache_page(pgvec_to_page(hdr));
+-}
+-
+-static struct nl_mmap_hdr *
+-__netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos)
+-{
+- unsigned int pg_vec_pos, frame_off;
+-
+- pg_vec_pos = pos / ring->frames_per_block;
+- frame_off = pos % ring->frames_per_block;
+-
+- return ring->pg_vec[pg_vec_pos] + (frame_off * ring->frame_size);
+-}
+-
+-static struct nl_mmap_hdr *
+-netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos,
+- enum nl_mmap_status status)
+-{
+- struct nl_mmap_hdr *hdr;
+-
+- hdr = __netlink_lookup_frame(ring, pos);
+- if (netlink_get_status(hdr) != status)
+- return NULL;
+-
+- return hdr;
+-}
+-
+-static struct nl_mmap_hdr *
+-netlink_current_frame(const struct netlink_ring *ring,
+- enum nl_mmap_status status)
+-{
+- return netlink_lookup_frame(ring, ring->head, status);
+-}
+-
+-static struct nl_mmap_hdr *
+-netlink_previous_frame(const struct netlink_ring *ring,
+- enum nl_mmap_status status)
+-{
+- unsigned int prev;
+-
+- prev = ring->head ? ring->head - 1 : ring->frame_max;
+- return netlink_lookup_frame(ring, prev, status);
+-}
+-
+-static void netlink_increment_head(struct netlink_ring *ring)
+-{
+- ring->head = ring->head != ring->frame_max ? ring->head + 1 : 0;
+-}
+-
+-static void netlink_forward_ring(struct netlink_ring *ring)
+-{
+- unsigned int head = ring->head, pos = head;
+- const struct nl_mmap_hdr *hdr;
+-
+- do {
+- hdr = __netlink_lookup_frame(ring, pos);
+- if (hdr->nm_status == NL_MMAP_STATUS_UNUSED)
+- break;
+- if (hdr->nm_status != NL_MMAP_STATUS_SKIP)
+- break;
+- netlink_increment_head(ring);
+- } while (ring->head != head);
+-}
+-
+-static bool netlink_dump_space(struct netlink_sock *nlk)
+-{
+- struct netlink_ring *ring = &nlk->rx_ring;
+- struct nl_mmap_hdr *hdr;
+- unsigned int n;
+-
+- hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
+- if (hdr == NULL)
+- return false;
+-
+- n = ring->head + ring->frame_max / 2;
+- if (n > ring->frame_max)
+- n -= ring->frame_max;
+-
+- hdr = __netlink_lookup_frame(ring, n);
+-
+- return hdr->nm_status == NL_MMAP_STATUS_UNUSED;
+-}
+-
+-static unsigned int netlink_poll(struct file *file, struct socket *sock,
+- poll_table *wait)
+-{
+- struct sock *sk = sock->sk;
+- struct netlink_sock *nlk = nlk_sk(sk);
+- unsigned int mask;
+- int err;
+-
+- if (nlk->rx_ring.pg_vec != NULL) {
+- /* Memory mapped sockets don't call recvmsg(), so flow control
+- * for dumps is performed here. A dump is allowed to continue
+- * if at least half the ring is unused.
+- */
+- while (nlk->cb_running && netlink_dump_space(nlk)) {
+- err = netlink_dump(sk);
+- if (err < 0) {
+- sk->sk_err = -err;
+- sk->sk_error_report(sk);
+- break;
+- }
+- }
+- netlink_rcv_wake(sk);
+- }
+-
+- mask = datagram_poll(file, sock, wait);
+-
+- spin_lock_bh(&sk->sk_receive_queue.lock);
+- if (nlk->rx_ring.pg_vec) {
+- netlink_forward_ring(&nlk->rx_ring);
+- if (!netlink_previous_frame(&nlk->rx_ring, NL_MMAP_STATUS_UNUSED))
+- mask |= POLLIN | POLLRDNORM;
+- }
+- spin_unlock_bh(&sk->sk_receive_queue.lock);
+-
+- spin_lock_bh(&sk->sk_write_queue.lock);
+- if (nlk->tx_ring.pg_vec) {
+- if (netlink_current_frame(&nlk->tx_ring, NL_MMAP_STATUS_UNUSED))
+- mask |= POLLOUT | POLLWRNORM;
+- }
+- spin_unlock_bh(&sk->sk_write_queue.lock);
+-
+- return mask;
+-}
+-
+-static struct nl_mmap_hdr *netlink_mmap_hdr(struct sk_buff *skb)
+-{
+- return (struct nl_mmap_hdr *)(skb->head - NL_MMAP_HDRLEN);
+-}
+-
+-static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk,
+- struct netlink_ring *ring,
+- struct nl_mmap_hdr *hdr)
+-{
+- unsigned int size;
+- void *data;
+-
+- size = ring->frame_size - NL_MMAP_HDRLEN;
+- data = (void *)hdr + NL_MMAP_HDRLEN;
+-
+- skb->head = data;
+- skb->data = data;
+- skb_reset_tail_pointer(skb);
+- skb->end = skb->tail + size;
+- skb->len = 0;
+-
+- skb->destructor = netlink_skb_destructor;
+- NETLINK_CB(skb).flags |= NETLINK_SKB_MMAPED;
+- NETLINK_CB(skb).sk = sk;
+-}
+-
+-static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg,
+- u32 dst_portid, u32 dst_group,
+- struct sock_iocb *siocb)
+-{
+- struct netlink_sock *nlk = nlk_sk(sk);
+- struct netlink_ring *ring;
+- struct nl_mmap_hdr *hdr;
+- struct sk_buff *skb;
+- unsigned int maxlen;
+- int err = 0, len = 0;
+-
+- mutex_lock(&nlk->pg_vec_lock);
+-
+- ring = &nlk->tx_ring;
+- maxlen = ring->frame_size - NL_MMAP_HDRLEN;
+-
+- do {
+- unsigned int nm_len;
+-
+- hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID);
+- if (hdr == NULL) {
+- if (!(msg->msg_flags & MSG_DONTWAIT) &&
+- atomic_read(&nlk->tx_ring.pending))
+- schedule();
+- continue;
+- }
+-
+- nm_len = ACCESS_ONCE(hdr->nm_len);
+- if (nm_len > maxlen) {
+- err = -EINVAL;
+- goto out;
+- }
+-
+- netlink_frame_flush_dcache(hdr, nm_len);
+-
+- skb = alloc_skb(nm_len, GFP_KERNEL);
+- if (skb == NULL) {
+- err = -ENOBUFS;
+- goto out;
+- }
+- __skb_put(skb, nm_len);
+- memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, nm_len);
+- netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
+-
+- netlink_increment_head(ring);
+-
+- NETLINK_CB(skb).portid = nlk->portid;
+- NETLINK_CB(skb).dst_group = dst_group;
+- NETLINK_CB(skb).creds = siocb->scm->creds;
+-
+- err = security_netlink_send(sk, skb);
+- if (err) {
+- kfree_skb(skb);
+- goto out;
+- }
+-
+- if (unlikely(dst_group)) {
+- atomic_inc(&skb->users);
+- netlink_broadcast(sk, skb, dst_portid, dst_group,
+- GFP_KERNEL);
+- }
+- err = netlink_unicast(sk, skb, dst_portid,
+- msg->msg_flags & MSG_DONTWAIT);
+- if (err < 0)
+- goto out;
+- len += err;
+-
+- } while (hdr != NULL ||
+- (!(msg->msg_flags & MSG_DONTWAIT) &&
+- atomic_read(&nlk->tx_ring.pending)));
+-
+- if (len > 0)
+- err = len;
+-out:
+- mutex_unlock(&nlk->pg_vec_lock);
+- return err;
+-}
+-
+-static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb)
+-{
+- struct nl_mmap_hdr *hdr;
+-
+- hdr = netlink_mmap_hdr(skb);
+- hdr->nm_len = skb->len;
+- hdr->nm_group = NETLINK_CB(skb).dst_group;
+- hdr->nm_pid = NETLINK_CB(skb).creds.pid;
+- hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
+- hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
+- netlink_frame_flush_dcache(hdr, hdr->nm_len);
+- netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
+-
+- NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED;
+- kfree_skb(skb);
+-}
+-
+-static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb)
+-{
+- struct netlink_sock *nlk = nlk_sk(sk);
+- struct netlink_ring *ring = &nlk->rx_ring;
+- struct nl_mmap_hdr *hdr;
+-
+- spin_lock_bh(&sk->sk_receive_queue.lock);
+- hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
+- if (hdr == NULL) {
+- spin_unlock_bh(&sk->sk_receive_queue.lock);
+- kfree_skb(skb);
+- netlink_overrun(sk);
+- return;
+- }
+- netlink_increment_head(ring);
+- __skb_queue_tail(&sk->sk_receive_queue, skb);
+- spin_unlock_bh(&sk->sk_receive_queue.lock);
+-
+- hdr->nm_len = skb->len;
+- hdr->nm_group = NETLINK_CB(skb).dst_group;
+- hdr->nm_pid = NETLINK_CB(skb).creds.pid;
+- hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
+- hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
+- netlink_set_status(hdr, NL_MMAP_STATUS_COPY);
+-}
+-
+-#else /* CONFIG_NETLINK_MMAP */
+-#define netlink_rx_is_mmaped(sk) false
+-#define netlink_tx_is_mmaped(sk) false
+-#define netlink_mmap sock_no_mmap
+-#define netlink_poll datagram_poll
+-#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, siocb) 0
+-#endif /* CONFIG_NETLINK_MMAP */
+-
+ static void netlink_skb_destructor(struct sk_buff *skb)
+ {
+-#ifdef CONFIG_NETLINK_MMAP
+- struct nl_mmap_hdr *hdr;
+- struct netlink_ring *ring;
+- struct sock *sk;
+-
+- /* If a packet from the kernel to userspace was freed because of an
+- * error without being delivered to userspace, the kernel must reset
+- * the status. In the direction userspace to kernel, the status is
+- * always reset here after the packet was processed and freed.
+- */
+- if (netlink_skb_is_mmaped(skb)) {
+- hdr = netlink_mmap_hdr(skb);
+- sk = NETLINK_CB(skb).sk;
+-
+- if (NETLINK_CB(skb).flags & NETLINK_SKB_TX) {
+- netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
+- ring = &nlk_sk(sk)->tx_ring;
+- } else {
+- if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) {
+- hdr->nm_len = 0;
+- netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
+- }
+- ring = &nlk_sk(sk)->rx_ring;
+- }
+-
+- WARN_ON(atomic_read(&ring->pending) == 0);
+- atomic_dec(&ring->pending);
+- sock_put(sk);
+-
+- skb->head = NULL;
+- }
+-#endif
+ if (is_vmalloc_addr(skb->head)) {
+ if (!skb->cloned ||
+ !atomic_dec_return(&(skb_shinfo(skb)->dataref)))
+@@ -934,18 +343,6 @@ static void netlink_sock_destruct(struct
+ }
+
+ skb_queue_purge(&sk->sk_receive_queue);
+-#ifdef CONFIG_NETLINK_MMAP
+- if (1) {
+- struct nl_mmap_req req;
+-
+- memset(&req, 0, sizeof(req));
+- if (nlk->rx_ring.pg_vec)
+- __netlink_set_ring(sk, &req, false, NULL, 0);
+- memset(&req, 0, sizeof(req));
+- if (nlk->tx_ring.pg_vec)
+- __netlink_set_ring(sk, &req, true, NULL, 0);
+- }
+-#endif /* CONFIG_NETLINK_MMAP */
+
+ if (!sock_flag(sk, SOCK_DEAD)) {
+ printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
+@@ -1153,9 +550,6 @@ static int __netlink_create(struct net *
+ mutex_init(nlk->cb_mutex);
+ }
+ init_waitqueue_head(&nlk->wait);
+-#ifdef CONFIG_NETLINK_MMAP
+- mutex_init(&nlk->pg_vec_lock);
+-#endif
+
+ sk->sk_destruct = netlink_sock_destruct;
+ sk->sk_protocol = protocol;
+@@ -1653,9 +1047,8 @@ int netlink_attachskb(struct sock *sk, s
+
+ nlk = nlk_sk(sk);
+
+- if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
+- test_bit(NETLINK_CONGESTED, &nlk->state)) &&
+- !netlink_skb_is_mmaped(skb)) {
++ if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
++ test_bit(NETLINK_CONGESTED, &nlk->state)) {
+ DECLARE_WAITQUEUE(wait, current);
+ if (!*timeo) {
+ if (!ssk || netlink_is_kernel(ssk))
+@@ -1693,14 +1086,7 @@ static int __netlink_sendskb(struct sock
+
+ netlink_deliver_tap(skb);
+
+-#ifdef CONFIG_NETLINK_MMAP
+- if (netlink_skb_is_mmaped(skb))
+- netlink_queue_mmaped_skb(sk, skb);
+- else if (netlink_rx_is_mmaped(sk))
+- netlink_ring_set_copied(sk, skb);
+- else
+-#endif /* CONFIG_NETLINK_MMAP */
+- skb_queue_tail(&sk->sk_receive_queue, skb);
++ skb_queue_tail(&sk->sk_receive_queue, skb);
+ sk->sk_data_ready(sk);
+ return len;
+ }
+@@ -1724,9 +1110,6 @@ static struct sk_buff *netlink_trim(stru
+ int delta;
+
+ WARN_ON(skb->sk != NULL);
+- if (netlink_skb_is_mmaped(skb))
+- return skb;
+-
+ delta = skb->end - skb->tail;
+ if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize)
+ return skb;
+@@ -1805,66 +1188,6 @@ EXPORT_SYMBOL(netlink_unicast);
+ struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,
+ u32 dst_portid, gfp_t gfp_mask)
+ {
+-#ifdef CONFIG_NETLINK_MMAP
+- struct sock *sk = NULL;
+- struct sk_buff *skb;
+- struct netlink_ring *ring;
+- struct nl_mmap_hdr *hdr;
+- unsigned int maxlen;
+-
+- sk = netlink_getsockbyportid(ssk, dst_portid);
+- if (IS_ERR(sk))
+- goto out;
+-
+- ring = &nlk_sk(sk)->rx_ring;
+- /* fast-path without atomic ops for common case: non-mmaped receiver */
+- if (ring->pg_vec == NULL)
+- goto out_put;
+-
+- if (ring->frame_size - NL_MMAP_HDRLEN < size)
+- goto out_put;
+-
+- skb = alloc_skb_head(gfp_mask);
+- if (skb == NULL)
+- goto err1;
+-
+- spin_lock_bh(&sk->sk_receive_queue.lock);
+- /* check again under lock */
+- if (ring->pg_vec == NULL)
+- goto out_free;
+-
+- /* check again under lock */
+- maxlen = ring->frame_size - NL_MMAP_HDRLEN;
+- if (maxlen < size)
+- goto out_free;
+-
+- netlink_forward_ring(ring);
+- hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
+- if (hdr == NULL)
+- goto err2;
+- netlink_ring_setup_skb(skb, sk, ring, hdr);
+- netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
+- atomic_inc(&ring->pending);
+- netlink_increment_head(ring);
+-
+- spin_unlock_bh(&sk->sk_receive_queue.lock);
+- return skb;
+-
+-err2:
+- kfree_skb(skb);
+- spin_unlock_bh(&sk->sk_receive_queue.lock);
+- netlink_overrun(sk);
+-err1:
+- sock_put(sk);
+- return NULL;
+-
+-out_free:
+- kfree_skb(skb);
+- spin_unlock_bh(&sk->sk_receive_queue.lock);
+-out_put:
+- sock_put(sk);
+-out:
+-#endif
+ return alloc_skb(size, gfp_mask);
+ }
+ EXPORT_SYMBOL_GPL(netlink_alloc_skb);
+@@ -2126,8 +1449,7 @@ static int netlink_setsockopt(struct soc
+ if (level != SOL_NETLINK)
+ return -ENOPROTOOPT;
+
+- if (optname != NETLINK_RX_RING && optname != NETLINK_TX_RING &&
+- optlen >= sizeof(int) &&
++ if (optlen >= sizeof(int) &&
+ get_user(val, (unsigned int __user *)optval))
+ return -EFAULT;
+
+@@ -2180,25 +1502,6 @@ static int netlink_setsockopt(struct soc
+ }
+ err = 0;
+ break;
+-#ifdef CONFIG_NETLINK_MMAP
+- case NETLINK_RX_RING:
+- case NETLINK_TX_RING: {
+- struct nl_mmap_req req;
+-
+- /* Rings might consume more memory than queue limits, require
+- * CAP_NET_ADMIN.
+- */
+- if (!capable(CAP_NET_ADMIN))
+- return -EPERM;
+- if (optlen < sizeof(req))
+- return -EINVAL;
+- if (copy_from_user(&req, optval, sizeof(req)))
+- return -EFAULT;
+- err = netlink_set_ring(sk, &req,
+- optname == NETLINK_TX_RING);
+- break;
+- }
+-#endif /* CONFIG_NETLINK_MMAP */
+ default:
+ err = -ENOPROTOOPT;
+ }
+@@ -2311,13 +1614,6 @@ static int netlink_sendmsg(struct kiocb
+ goto out;
+ }
+
+- if (netlink_tx_is_mmaped(sk) &&
+- msg->msg_iov->iov_base == NULL) {
+- err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group,
+- siocb);
+- goto out;
+- }
+-
+ err = -EMSGSIZE;
+ if (len > sk->sk_sndbuf - 32)
+ goto out;
+@@ -2643,8 +1939,7 @@ static int netlink_dump(struct sock *sk)
+ cb = &nlk->cb;
+ alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE);
+
+- if (!netlink_rx_is_mmaped(sk) &&
+- atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
++ if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
+ goto errout_skb;
+
+ /* NLMSG_GOODSIZE is small to avoid high order allocations being
+@@ -2721,16 +2016,7 @@ int __netlink_dump_start(struct sock *ss
+ struct netlink_sock *nlk;
+ int ret;
+
+- /* Memory mapped dump requests need to be copied to avoid looping
+- * on the pending state in netlink_mmap_sendmsg() while the CB hold
+- * a reference to the skb.
+- */
+- if (netlink_skb_is_mmaped(skb)) {
+- skb = skb_copy(skb, GFP_KERNEL);
+- if (skb == NULL)
+- return -ENOBUFS;
+- } else
+- atomic_inc(&skb->users);
++ atomic_inc(&skb->users);
+
+ sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid);
+ if (sk == NULL) {
+@@ -3071,7 +2357,7 @@ static const struct proto_ops netlink_op
+ .socketpair = sock_no_socketpair,
+ .accept = sock_no_accept,
+ .getname = netlink_getname,
+- .poll = netlink_poll,
++ .poll = datagram_poll,
+ .ioctl = sock_no_ioctl,
+ .listen = sock_no_listen,
+ .shutdown = sock_no_shutdown,
+@@ -3079,7 +2365,7 @@ static const struct proto_ops netlink_op
+ .getsockopt = netlink_getsockopt,
+ .sendmsg = netlink_sendmsg,
+ .recvmsg = netlink_recvmsg,
+- .mmap = netlink_mmap,
++ .mmap = sock_no_mmap,
+ .sendpage = sock_no_sendpage,
+ };
+
+--- a/net/netlink/af_netlink.h
++++ b/net/netlink/af_netlink.h
+@@ -42,12 +42,6 @@ struct netlink_sock {
+ int (*netlink_bind)(int group);
+ void (*netlink_unbind)(int group);
+ struct module *module;
+-#ifdef CONFIG_NETLINK_MMAP
+- struct mutex pg_vec_lock;
+- struct netlink_ring rx_ring;
+- struct netlink_ring tx_ring;
+- atomic_t mapped;
+-#endif /* CONFIG_NETLINK_MMAP */
+
+ struct rhash_head node;
+ };
+@@ -57,15 +51,6 @@ static inline struct netlink_sock *nlk_s
+ return container_of(sk, struct netlink_sock, sk);
+ }
+
+-static inline bool netlink_skb_is_mmaped(const struct sk_buff *skb)
+-{
+-#ifdef CONFIG_NETLINK_MMAP
+- return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED;
+-#else
+- return false;
+-#endif /* CONFIG_NETLINK_MMAP */
+-}
+-
+ struct netlink_table {
+ struct rhashtable hash;
+ struct hlist_head mc_list;
+--- a/net/netlink/diag.c
++++ b/net/netlink/diag.c
+@@ -8,41 +8,6 @@
+
+ #include "af_netlink.h"
+
+-#ifdef CONFIG_NETLINK_MMAP
+-static int sk_diag_put_ring(struct netlink_ring *ring, int nl_type,
+- struct sk_buff *nlskb)
+-{
+- struct netlink_diag_ring ndr;
+-
+- ndr.ndr_block_size = ring->pg_vec_pages << PAGE_SHIFT;
+- ndr.ndr_block_nr = ring->pg_vec_len;
+- ndr.ndr_frame_size = ring->frame_size;
+- ndr.ndr_frame_nr = ring->frame_max + 1;
+-
+- return nla_put(nlskb, nl_type, sizeof(ndr), &ndr);
+-}
+-
+-static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb)
+-{
+- struct netlink_sock *nlk = nlk_sk(sk);
+- int ret;
+-
+- mutex_lock(&nlk->pg_vec_lock);
+- ret = sk_diag_put_ring(&nlk->rx_ring, NETLINK_DIAG_RX_RING, nlskb);
+- if (!ret)
+- ret = sk_diag_put_ring(&nlk->tx_ring, NETLINK_DIAG_TX_RING,
+- nlskb);
+- mutex_unlock(&nlk->pg_vec_lock);
+-
+- return ret;
+-}
+-#else
+-static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb)
+-{
+- return 0;
+-}
+-#endif
+-
+ static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb)
+ {
+ struct netlink_sock *nlk = nlk_sk(sk);
+@@ -87,10 +52,6 @@ static int sk_diag_fill(struct sock *sk,
+ sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO))
+ goto out_nlmsg_trim;
+
+- if ((req->ndiag_show & NDIAG_SHOW_RING_CFG) &&
+- sk_diag_put_rings_cfg(sk, skb))
+- goto out_nlmsg_trim;
+-
+ return nlmsg_end(skb, nlh);
+
+ out_nlmsg_trim: