From: Greg Kroah-Hartman Date: Mon, 3 Jul 2017 11:53:04 +0000 (+0200) Subject: 4.9-stable patches X-Git-Tag: v3.18.60~14 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=e466e5ab266ac511d80228812d2e0ec85320e6d5;p=thirdparty%2Fkernel%2Fstable-queue.git 4.9-stable patches added patches: aio-fix-lock-dep-warning.patch be2net-don-t-delete-mac-on-close-on-unprivileged-be3-vfs.patch be2net-fix-mac-addr-setting-on-privileged-be3-vfs.patch be2net-fix-status-check-in-be_cmd_pmac_add.patch coredump-ensure-proper-size-of-sparse-core-files.patch drm-amd-powerplay-fix-vce-cg-logic-error-on-cz-st.patch drm-amd-powerplay-refine-vce-dpm-update-code-on-cz.patch ip6_tunnel-ip6_gre-fix-setting-of-dscp-on-encapsulated-packets.patch mac80211-initialize-smps-field-in-ht-capabilities.patch mm-vmalloc.c-huge-vmap-fail-gracefully-on-unexpected-huge-vmap-mappings.patch net-mlx4_core-eliminate-warning-messages-for-srq_limit-under-sriov.patch net-phy-dp83867-allow-rgmii_txid-rgmii_rxid-interface-types.patch perf-core-fix-sys_perf_event_open-vs.-hotplug.patch perf-probe-fix-to-probe-on-gcc-generated-functions-in-modules.patch perf-probe-fix-to-show-correct-locations-for-events-on-modules.patch perf-x86-reject-non-sampling-events-with-precise_ip.patch pmem-return-eio-on-read_pmem-failure.patch ravb-fix-use-after-free-on-ifconfig-eth0-down.patch s390-ctl_reg-make-__ctl_load-a-full-memory-barrier.patch sctp-check-af-before-verify-address-in-sctp_addr_id2transport.patch swiotlb-ensure-that-page-sized-mappings-are-page-aligned.patch tipc-allocate-user-memory-with-gfp_kernel-flag.patch usb-dwc2-gadget-fix-gusbcfg.usbtrdtim-value.patch x86-mpx-use-compatible-types-in-comparison-to-fix-sparse-error.patch x86-tsc-add-the-intel-denverton-processor-to-native_calibrate_tsc.patch xfrm-fix-stack-access-out-of-bounds-with-config_xfrm_sub_policy.patch xfrm-null-dereference-on-allocation-failure.patch xfrm-oops-on-error-in-pfkey_msg2xfrm_state.patch --- diff --git a/queue-4.9/aio-fix-lock-dep-warning.patch b/queue-4.9/aio-fix-lock-dep-warning.patch new file mode 100644 index 00000000000..f37075e4260 --- /dev/null +++ b/queue-4.9/aio-fix-lock-dep-warning.patch @@ -0,0 +1,84 @@ +From foo@baz Mon Jul 3 13:33:55 CEST 2017 +From: Shaohua Li +Date: Tue, 13 Dec 2016 12:09:56 -0800 +Subject: aio: fix lock dep warning + +From: Shaohua Li + + +[ Upstream commit a12f1ae61c489076a9aeb90bddca7722bf330df3 ] + +lockdep reports a warnning. file_start_write/file_end_write only +acquire/release the lock for regular files. So checking the files in aio +side too. + +[ 453.532141] ------------[ cut here ]------------ +[ 453.533011] WARNING: CPU: 1 PID: 1298 at ../kernel/locking/lockdep.c:3514 lock_release+0x434/0x670 +[ 453.533011] DEBUG_LOCKS_WARN_ON(depth <= 0) +[ 453.533011] Modules linked in: +[ 453.533011] CPU: 1 PID: 1298 Comm: fio Not tainted 4.9.0+ #964 +[ 453.533011] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.0-1.fc24 04/01/2014 +[ 453.533011] ffff8803a24b7a70 ffffffff8196cffb ffff8803a24b7ae8 0000000000000000 +[ 453.533011] ffff8803a24b7ab8 ffffffff81091ee1 ffff8803a5dba700 00000dba00000008 +[ 453.533011] ffffed0074496f59 ffff8803a5dbaf54 ffff8803ae0f8488 fffffffffffffdef +[ 453.533011] Call Trace: +[ 453.533011] [] dump_stack+0x67/0x9c +[ 453.533011] [] __warn+0x111/0x130 +[ 453.533011] [] warn_slowpath_fmt+0x97/0xb0 +[ 453.533011] [] ? __warn+0x130/0x130 +[ 453.533011] [] ? blk_finish_plug+0x29/0x60 +[ 453.533011] [] lock_release+0x434/0x670 +[ 453.533011] [] ? import_single_range+0xd4/0x110 +[ 453.533011] [] ? rw_verify_area+0x65/0x140 +[ 453.533011] [] ? aio_write+0x1f6/0x280 +[ 453.533011] [] aio_write+0x229/0x280 +[ 453.533011] [] ? aio_complete+0x640/0x640 +[ 453.533011] [] ? debug_check_no_locks_freed+0x1a0/0x1a0 +[ 453.533011] [] ? debug_lockdep_rcu_enabled.part.2+0x1a/0x30 +[ 453.533011] [] ? debug_lockdep_rcu_enabled+0x35/0x40 +[ 453.533011] [] ? __might_fault+0x7e/0xf0 +[ 453.533011] [] do_io_submit+0x94c/0xb10 +[ 453.533011] [] ? do_io_submit+0x23e/0xb10 +[ 453.533011] [] ? SyS_io_destroy+0x270/0x270 +[ 453.533011] [] ? mark_held_locks+0x23/0xc0 +[ 453.533011] [] ? trace_hardirqs_on_thunk+0x1a/0x1c +[ 453.533011] [] SyS_io_submit+0x10/0x20 +[ 453.533011] [] entry_SYSCALL_64_fastpath+0x18/0xad +[ 453.533011] [] ? trace_hardirqs_off_caller+0xc0/0x110 +[ 453.533011] ---[ end trace b2fbe664d1cc0082 ]--- + +Cc: Dmitry Monakhov +Cc: Jan Kara +Cc: Christoph Hellwig +Cc: Al Viro +Reviewed-by: Christoph Hellwig +Signed-off-by: Shaohua Li +Signed-off-by: Al Viro +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/aio.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/fs/aio.c ++++ b/fs/aio.c +@@ -1085,7 +1085,8 @@ static void aio_complete(struct kiocb *k + * Tell lockdep we inherited freeze protection from submission + * thread. + */ +- __sb_writers_acquired(file_inode(file)->i_sb, SB_FREEZE_WRITE); ++ if (S_ISREG(file_inode(file)->i_mode)) ++ __sb_writers_acquired(file_inode(file)->i_sb, SB_FREEZE_WRITE); + file_end_write(file); + } + +@@ -1492,7 +1493,8 @@ static ssize_t aio_write(struct kiocb *r + * by telling it the lock got released so that it doesn't + * complain about held lock when we return to userspace. + */ +- __sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE); ++ if (S_ISREG(file_inode(file)->i_mode)) ++ __sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE); + } + kfree(iovec); + return ret; diff --git a/queue-4.9/be2net-don-t-delete-mac-on-close-on-unprivileged-be3-vfs.patch b/queue-4.9/be2net-don-t-delete-mac-on-close-on-unprivileged-be3-vfs.patch new file mode 100644 index 00000000000..b73ca506623 --- /dev/null +++ b/queue-4.9/be2net-don-t-delete-mac-on-close-on-unprivileged-be3-vfs.patch @@ -0,0 +1,40 @@ +From foo@baz Mon Jul 3 13:37:32 CEST 2017 +From: Ivan Vecera +Date: Fri, 13 Jan 2017 22:38:28 +0100 +Subject: be2net: don't delete MAC on close on unprivileged BE3 VFs + +From: Ivan Vecera + + +[ Upstream commit 6d928ae590c8d58cfd5cca997d54394de139cbb7 ] + +BE3 VFs without FILTMGMT privilege are not allowed to modify its MAC, +VLAN table and UC/MC lists. So don't try to delete MAC on such VFs. + +Cc: Sathya Perla +Cc: Ajit Khaparde +Cc: Sriharsha Basavapatna +Cc: Somnath Kotur +Signed-off-by: Ivan Vecera +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/emulex/benet/be_main.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/emulex/benet/be_main.c ++++ b/drivers/net/ethernet/emulex/benet/be_main.c +@@ -3630,7 +3630,11 @@ static void be_rx_qs_destroy(struct be_a + + static void be_disable_if_filters(struct be_adapter *adapter) + { +- be_dev_mac_del(adapter, adapter->pmac_id[0]); ++ /* Don't delete MAC on BE3 VFs without FILTMGMT privilege */ ++ if (!BEx_chip(adapter) || !be_virtfn(adapter) || ++ check_privilege(adapter, BE_PRIV_FILTMGMT)) ++ be_dev_mac_del(adapter, adapter->pmac_id[0]); ++ + be_clear_uc_list(adapter); + be_clear_mc_list(adapter); + diff --git a/queue-4.9/be2net-fix-mac-addr-setting-on-privileged-be3-vfs.patch b/queue-4.9/be2net-fix-mac-addr-setting-on-privileged-be3-vfs.patch new file mode 100644 index 00000000000..ef32fd6c1e3 --- /dev/null +++ b/queue-4.9/be2net-fix-mac-addr-setting-on-privileged-be3-vfs.patch @@ -0,0 +1,70 @@ +From foo@baz Mon Jul 3 13:37:32 CEST 2017 +From: Ivan Vecera +Date: Fri, 13 Jan 2017 22:38:29 +0100 +Subject: be2net: fix MAC addr setting on privileged BE3 VFs + +From: Ivan Vecera + + +[ Upstream commit 34393529163af7163ef8459808e3cf2af7db7f16 ] + +During interface opening MAC address stored in netdev->dev_addr is +programmed in the HW with exception of BE3 VFs where the initial +MAC is programmed by parent PF. This is OK when MAC address is not +changed when an interfaces is down. In this case the requested MAC is +stored to netdev->dev_addr and later is stored into HW during opening. +But this is not done for all BE3 VFs so the NIC HW does not know +anything about this change and all traffic is filtered. + +This is the case of bonding if fail_over_mac == 0 where the MACs of +the slaves are changed while they are down. + +The be2net behavior is too restrictive because if a BE3 VF has +the FILTMGMT privilege then it is able to modify its MAC without +any restriction. + +To solve the described problem the driver should take care about these +privileged BE3 VFs so the MAC is programmed during opening. And by +contrast unpriviled BE3 VFs should not be allowed to change its MAC +in any case. + +Cc: Sathya Perla +Cc: Ajit Khaparde +Cc: Sriharsha Basavapatna +Cc: Somnath Kotur +Signed-off-by: Ivan Vecera +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/emulex/benet/be_main.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/emulex/benet/be_main.c ++++ b/drivers/net/ethernet/emulex/benet/be_main.c +@@ -319,6 +319,13 @@ static int be_mac_addr_set(struct net_de + if (ether_addr_equal(addr->sa_data, adapter->dev_mac)) + return 0; + ++ /* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC ++ * address ++ */ ++ if (BEx_chip(adapter) && be_virtfn(adapter) && ++ !check_privilege(adapter, BE_PRIV_FILTMGMT)) ++ return -EPERM; ++ + /* if device is not running, copy MAC to netdev->dev_addr */ + if (!netif_running(netdev)) + goto done; +@@ -3787,8 +3794,9 @@ static int be_enable_if_filters(struct b + if (status) + return status; + +- /* For BE3 VFs, the PF programs the initial MAC address */ +- if (!(BEx_chip(adapter) && be_virtfn(adapter))) { ++ /* Don't add MAC on BE3 VFs without FILTMGMT privilege */ ++ if (!BEx_chip(adapter) || !be_virtfn(adapter) || ++ check_privilege(adapter, BE_PRIV_FILTMGMT)) { + status = be_dev_mac_add(adapter, adapter->netdev->dev_addr); + if (status) + return status; diff --git a/queue-4.9/be2net-fix-status-check-in-be_cmd_pmac_add.patch b/queue-4.9/be2net-fix-status-check-in-be_cmd_pmac_add.patch new file mode 100644 index 00000000000..c9056b489f4 --- /dev/null +++ b/queue-4.9/be2net-fix-status-check-in-be_cmd_pmac_add.patch @@ -0,0 +1,38 @@ +From foo@baz Mon Jul 3 13:37:32 CEST 2017 +From: Ivan Vecera +Date: Fri, 13 Jan 2017 22:38:27 +0100 +Subject: be2net: fix status check in be_cmd_pmac_add() + +From: Ivan Vecera + + +[ Upstream commit fe68d8bfe59c561664aa87d827aa4b320eb08895 ] + +Return value from be_mcc_notify_wait() contains a base completion status +together with an additional status. The base_status() macro need to be +used to access base status. + +Fixes: e3a7ae2 be2net: Changing MAC Address of a VF was broken +Cc: Sathya Perla +Cc: Ajit Khaparde +Cc: Sriharsha Basavapatna +Cc: Somnath Kotur +Signed-off-by: Ivan Vecera +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/emulex/benet/be_cmds.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/emulex/benet/be_cmds.c ++++ b/drivers/net/ethernet/emulex/benet/be_cmds.c +@@ -1118,7 +1118,7 @@ int be_cmd_pmac_add(struct be_adapter *a + err: + mutex_unlock(&adapter->mcc_lock); + +- if (status == MCC_STATUS_UNAUTHORIZED_REQUEST) ++ if (base_status(status) == MCC_STATUS_UNAUTHORIZED_REQUEST) + status = -EPERM; + + return status; diff --git a/queue-4.9/coredump-ensure-proper-size-of-sparse-core-files.patch b/queue-4.9/coredump-ensure-proper-size-of-sparse-core-files.patch new file mode 100644 index 00000000000..5bfbd9b7b7e --- /dev/null +++ b/queue-4.9/coredump-ensure-proper-size-of-sparse-core-files.patch @@ -0,0 +1,77 @@ +From foo@baz Mon Jul 3 13:33:55 CEST 2017 +From: Dave Kleikamp +Date: Wed, 11 Jan 2017 13:25:00 -0600 +Subject: coredump: Ensure proper size of sparse core files + +From: Dave Kleikamp + + +[ Upstream commit 4d22c75d4c7b5c5f4bd31054f09103ee490878fd ] + +If the last section of a core file ends with an unmapped or zero page, +the size of the file does not correspond with the last dump_skip() call. +gdb complains that the file is truncated and can be confusing to users. + +After all of the vma sections are written, make sure that the file size +is no smaller than the current file position. + +This problem can be demonstrated with gdb's bigcore testcase on the +sparc architecture. + +Signed-off-by: Dave Kleikamp +Cc: Alexander Viro +Cc: linux-fsdevel@vger.kernel.org +Cc: linux-kernel@vger.kernel.org +Signed-off-by: Al Viro +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/binfmt_elf.c | 1 + + fs/coredump.c | 18 ++++++++++++++++++ + include/linux/coredump.h | 1 + + 3 files changed, 20 insertions(+) + +--- a/fs/binfmt_elf.c ++++ b/fs/binfmt_elf.c +@@ -2296,6 +2296,7 @@ static int elf_core_dump(struct coredump + goto end_coredump; + } + } ++ dump_truncate(cprm); + + if (!elf_core_write_extra_data(cprm)) + goto end_coredump; +--- a/fs/coredump.c ++++ b/fs/coredump.c +@@ -833,3 +833,21 @@ int dump_align(struct coredump_params *c + return mod ? dump_skip(cprm, align - mod) : 1; + } + EXPORT_SYMBOL(dump_align); ++ ++/* ++ * Ensures that file size is big enough to contain the current file ++ * postion. This prevents gdb from complaining about a truncated file ++ * if the last "write" to the file was dump_skip. ++ */ ++void dump_truncate(struct coredump_params *cprm) ++{ ++ struct file *file = cprm->file; ++ loff_t offset; ++ ++ if (file->f_op->llseek && file->f_op->llseek != no_llseek) { ++ offset = file->f_op->llseek(file, 0, SEEK_CUR); ++ if (i_size_read(file->f_mapping->host) < offset) ++ do_truncate(file->f_path.dentry, offset, 0, file); ++ } ++} ++EXPORT_SYMBOL(dump_truncate); +--- a/include/linux/coredump.h ++++ b/include/linux/coredump.h +@@ -14,6 +14,7 @@ struct coredump_params; + extern int dump_skip(struct coredump_params *cprm, size_t nr); + extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr); + extern int dump_align(struct coredump_params *cprm, int align); ++extern void dump_truncate(struct coredump_params *cprm); + #ifdef CONFIG_COREDUMP + extern void do_coredump(const siginfo_t *siginfo); + #else diff --git a/queue-4.9/drm-amd-powerplay-fix-vce-cg-logic-error-on-cz-st.patch b/queue-4.9/drm-amd-powerplay-fix-vce-cg-logic-error-on-cz-st.patch new file mode 100644 index 00000000000..a296ef79e8c --- /dev/null +++ b/queue-4.9/drm-amd-powerplay-fix-vce-cg-logic-error-on-cz-st.patch @@ -0,0 +1,46 @@ +From foo@baz Mon Jul 3 13:33:55 CEST 2017 +From: Rex Zhu +Date: Tue, 10 Jan 2017 19:26:49 +0800 +Subject: drm/amd/powerplay: fix vce cg logic error on CZ/St. + +From: Rex Zhu + + +[ Upstream commit 3731d12dce83d47b357753ffc450ce03f1b49688 ] + +can fix Bug 191281: vce ib test failed. + +when vce idle, set vce clock gate, so the clock +in vce domain will be disabled. +when need to encode, disable vce clock gate, +enable the clocks to vce engine. + +Signed-off-by: Rex Zhu +Reviewed-by: Alex Deucher +Signed-off-by: Alex Deucher +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c ++++ b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c +@@ -200,7 +200,7 @@ int cz_dpm_powergate_vce(struct pp_hwmgr + cgs_set_clockgating_state( + hwmgr->device, + AMD_IP_BLOCK_TYPE_VCE, +- AMD_CG_STATE_UNGATE); ++ AMD_CG_STATE_GATE); + cgs_set_powergating_state( + hwmgr->device, + AMD_IP_BLOCK_TYPE_VCE, +@@ -218,7 +218,7 @@ int cz_dpm_powergate_vce(struct pp_hwmgr + cgs_set_clockgating_state( + hwmgr->device, + AMD_IP_BLOCK_TYPE_VCE, +- AMD_PG_STATE_GATE); ++ AMD_PG_STATE_UNGATE); + cz_dpm_update_vce_dpm(hwmgr); + cz_enable_disable_vce_dpm(hwmgr, true); + return 0; diff --git a/queue-4.9/drm-amd-powerplay-refine-vce-dpm-update-code-on-cz.patch b/queue-4.9/drm-amd-powerplay-refine-vce-dpm-update-code-on-cz.patch new file mode 100644 index 00000000000..13982201111 --- /dev/null +++ b/queue-4.9/drm-amd-powerplay-refine-vce-dpm-update-code-on-cz.patch @@ -0,0 +1,56 @@ +From foo@baz Mon Jul 3 13:33:55 CEST 2017 +From: Rex Zhu +Date: Tue, 10 Jan 2017 15:47:50 +0800 +Subject: drm/amd/powerplay: refine vce dpm update code on Cz. + +From: Rex Zhu + + +[ Upstream commit ab8db87b8256e13a62f10af1d32f5fc233c398cc ] + +Program HardMin based on the vce_arbiter.ecclk +if ecclk is 0, disable ECLK DPM 0. Otherwise VCE +could hang if switching SCLK from DPM 0 to 6/7 + +Signed-off-by: Rex Zhu +Acked-by: Alex Deucher +Signed-off-by: Alex Deucher +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c | 24 ++++++++++++++++-------- + 1 file changed, 16 insertions(+), 8 deletions(-) + +--- a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c ++++ b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c +@@ -1402,14 +1402,22 @@ int cz_dpm_update_vce_dpm(struct pp_hwm + cz_hwmgr->vce_dpm.hard_min_clk, + PPSMC_MSG_SetEclkHardMin)); + } else { +- /*EPR# 419220 -HW limitation to to */ +- cz_hwmgr->vce_dpm.hard_min_clk = hwmgr->vce_arbiter.ecclk; +- smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, +- PPSMC_MSG_SetEclkHardMin, +- cz_get_eclk_level(hwmgr, +- cz_hwmgr->vce_dpm.hard_min_clk, +- PPSMC_MSG_SetEclkHardMin)); +- ++ /*Program HardMin based on the vce_arbiter.ecclk */ ++ if (hwmgr->vce_arbiter.ecclk == 0) { ++ smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, ++ PPSMC_MSG_SetEclkHardMin, 0); ++ /* disable ECLK DPM 0. Otherwise VCE could hang if ++ * switching SCLK from DPM 0 to 6/7 */ ++ smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, ++ PPSMC_MSG_SetEclkSoftMin, 1); ++ } else { ++ cz_hwmgr->vce_dpm.hard_min_clk = hwmgr->vce_arbiter.ecclk; ++ smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, ++ PPSMC_MSG_SetEclkHardMin, ++ cz_get_eclk_level(hwmgr, ++ cz_hwmgr->vce_dpm.hard_min_clk, ++ PPSMC_MSG_SetEclkHardMin)); ++ } + } + return 0; + } diff --git a/queue-4.9/ip6_tunnel-ip6_gre-fix-setting-of-dscp-on-encapsulated-packets.patch b/queue-4.9/ip6_tunnel-ip6_gre-fix-setting-of-dscp-on-encapsulated-packets.patch new file mode 100644 index 00000000000..3d1a8ee556c --- /dev/null +++ b/queue-4.9/ip6_tunnel-ip6_gre-fix-setting-of-dscp-on-encapsulated-packets.patch @@ -0,0 +1,156 @@ +From foo@baz Mon Jul 3 13:37:32 CEST 2017 +From: Peter Dawson +Date: Fri, 26 May 2017 06:35:18 +1000 +Subject: ip6_tunnel, ip6_gre: fix setting of DSCP on encapsulated packets + +From: Peter Dawson + + +[ Upstream commit 0e9a709560dbcfbace8bf4019dc5298619235891 ] + +This fix addresses two problems in the way the DSCP field is formulated + on the encapsulating header of IPv6 tunnels. +Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=195661 + +1) The IPv6 tunneling code was manipulating the DSCP field of the + encapsulating packet using the 32b flowlabel. Since the flowlabel is + only the lower 20b it was incorrect to assume that the upper 12b + containing the DSCP and ECN fields would remain intact when formulating + the encapsulating header. This fix handles the 'inherit' and + 'fixed-value' DSCP cases explicitly using the extant dsfield u8 variable. + +2) The use of INET_ECN_encapsulate(0, dsfield) in ip6_tnl_xmit was + incorrect and resulted in the DSCP value always being set to 0. + +Commit 90427ef5d2a4 ("ipv6: fix flow labels when the traffic class + is non-0") caused the regression by masking out the flowlabel + which exposed the incorrect handling of the DSCP portion of the + flowlabel in ip6_tunnel and ip6_gre. + +Fixes: 90427ef5d2a4 ("ipv6: fix flow labels when the traffic class is non-0") +Signed-off-by: Peter Dawson +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_gre.c | 13 +++++++------ + net/ipv6/ip6_tunnel.c | 21 +++++++++++++-------- + 2 files changed, 20 insertions(+), 14 deletions(-) + +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -542,11 +542,10 @@ static inline int ip6gre_xmit_ipv4(struc + + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + +- dsfield = ipv4_get_dsfield(iph); +- + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) +- fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) +- & IPV6_TCLASS_MASK; ++ dsfield = ipv4_get_dsfield(iph); ++ else ++ dsfield = ip6_tclass(t->parms.flowinfo); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) + fl6.flowi6_mark = skb->mark; + +@@ -599,9 +598,11 @@ static inline int ip6gre_xmit_ipv6(struc + + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + +- dsfield = ipv6_get_dsfield(ipv6h); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) +- fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); ++ dsfield = ipv6_get_dsfield(ipv6h); ++ else ++ dsfield = ip6_tclass(t->parms.flowinfo); ++ + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) + fl6.flowlabel |= ip6_flowlabel(ipv6h); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) +--- a/net/ipv6/ip6_tunnel.c ++++ b/net/ipv6/ip6_tunnel.c +@@ -1196,7 +1196,7 @@ route_lookup: + skb_push(skb, sizeof(struct ipv6hdr)); + skb_reset_network_header(skb); + ipv6h = ipv6_hdr(skb); +- ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), ++ ip6_flow_hdr(ipv6h, dsfield, + ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6)); + ipv6h->hop_limit = hop_limit; + ipv6h->nexthdr = proto; +@@ -1231,8 +1231,6 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, str + if (tproto != IPPROTO_IPIP && tproto != 0) + return -1; + +- dsfield = ipv4_get_dsfield(iph); +- + if (t->parms.collect_md) { + struct ip_tunnel_info *tun_info; + const struct ip_tunnel_key *key; +@@ -1246,6 +1244,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, str + fl6.flowi6_proto = IPPROTO_IPIP; + fl6.daddr = key->u.ipv6.dst; + fl6.flowlabel = key->label; ++ dsfield = ip6_tclass(key->label); + } else { + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + encap_limit = t->parms.encap_limit; +@@ -1254,8 +1253,9 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, str + fl6.flowi6_proto = IPPROTO_IPIP; + + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) +- fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) +- & IPV6_TCLASS_MASK; ++ dsfield = ipv4_get_dsfield(iph); ++ else ++ dsfield = ip6_tclass(t->parms.flowinfo); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) + fl6.flowi6_mark = skb->mark; + } +@@ -1263,6 +1263,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, str + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) + return -1; + ++ dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph)); ++ + skb_set_inner_ipproto(skb, IPPROTO_IPIP); + + err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, +@@ -1296,8 +1298,6 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, str + ip6_tnl_addr_conflict(t, ipv6h)) + return -1; + +- dsfield = ipv6_get_dsfield(ipv6h); +- + if (t->parms.collect_md) { + struct ip_tunnel_info *tun_info; + const struct ip_tunnel_key *key; +@@ -1311,6 +1311,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, str + fl6.flowi6_proto = IPPROTO_IPV6; + fl6.daddr = key->u.ipv6.dst; + fl6.flowlabel = key->label; ++ dsfield = ip6_tclass(key->label); + } else { + offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); + /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */ +@@ -1333,7 +1334,9 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, str + fl6.flowi6_proto = IPPROTO_IPV6; + + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) +- fl6.flowlabel |= (*(__be32 *)ipv6h & IPV6_TCLASS_MASK); ++ dsfield = ipv6_get_dsfield(ipv6h); ++ else ++ dsfield = ip6_tclass(t->parms.flowinfo); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) + fl6.flowlabel |= ip6_flowlabel(ipv6h); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) +@@ -1343,6 +1346,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, str + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) + return -1; + ++ dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h)); ++ + skb_set_inner_ipproto(skb, IPPROTO_IPV6); + + err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, diff --git a/queue-4.9/mac80211-initialize-smps-field-in-ht-capabilities.patch b/queue-4.9/mac80211-initialize-smps-field-in-ht-capabilities.patch new file mode 100644 index 00000000000..75d9d7e1761 --- /dev/null +++ b/queue-4.9/mac80211-initialize-smps-field-in-ht-capabilities.patch @@ -0,0 +1,56 @@ +From foo@baz Mon Jul 3 13:33:55 CEST 2017 +From: Felix Fietkau +Date: Fri, 13 Jan 2017 11:28:25 +0100 +Subject: mac80211: initialize SMPS field in HT capabilities + +From: Felix Fietkau + + +[ Upstream commit 43071d8fb3b7f589d72663c496a6880fb097533c ] + +ibss and mesh modes copy the ht capabilites from the band without +overriding the SMPS state. Unfortunately the default value 0 for the +SMPS field means static SMPS instead of disabled. + +This results in HT ibss and mesh setups using only single-stream rates, +even though SMPS is not supposed to be active. + +Initialize SMPS to disabled for all bands on ieee80211_hw_register to +ensure that the value is sane where it is not overriden with the real +SMPS state. + +Reported-by: Elektra Wagenrad +Signed-off-by: Felix Fietkau +[move VHT TODO comment to a better place] +Signed-off-by: Johannes Berg + +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/mac80211/main.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +--- a/net/mac80211/main.c ++++ b/net/mac80211/main.c +@@ -908,12 +908,17 @@ int ieee80211_register_hw(struct ieee802 + supp_ht = supp_ht || sband->ht_cap.ht_supported; + supp_vht = supp_vht || sband->vht_cap.vht_supported; + +- if (sband->ht_cap.ht_supported) +- local->rx_chains = +- max(ieee80211_mcs_to_chains(&sband->ht_cap.mcs), +- local->rx_chains); ++ if (!sband->ht_cap.ht_supported) ++ continue; + + /* TODO: consider VHT for RX chains, hopefully it's the same */ ++ local->rx_chains = ++ max(ieee80211_mcs_to_chains(&sband->ht_cap.mcs), ++ local->rx_chains); ++ ++ /* no need to mask, SM_PS_DISABLED has all bits set */ ++ sband->ht_cap.cap |= WLAN_HT_CAP_SM_PS_DISABLED << ++ IEEE80211_HT_CAP_SM_PS_SHIFT; + } + + /* if low-level driver supports AP, we also support VLAN */ diff --git a/queue-4.9/mm-vmalloc.c-huge-vmap-fail-gracefully-on-unexpected-huge-vmap-mappings.patch b/queue-4.9/mm-vmalloc.c-huge-vmap-fail-gracefully-on-unexpected-huge-vmap-mappings.patch new file mode 100644 index 00000000000..04069e1afb8 --- /dev/null +++ b/queue-4.9/mm-vmalloc.c-huge-vmap-fail-gracefully-on-unexpected-huge-vmap-mappings.patch @@ -0,0 +1,74 @@ +From 029c54b09599573015a5c18dbe59cbdf42742237 Mon Sep 17 00:00:00 2001 +From: Ard Biesheuvel +Date: Fri, 23 Jun 2017 15:08:41 -0700 +Subject: mm/vmalloc.c: huge-vmap: fail gracefully on unexpected huge vmap mappings + +From: Ard Biesheuvel + +commit 029c54b09599573015a5c18dbe59cbdf42742237 upstream. + +Existing code that uses vmalloc_to_page() may assume that any address +for which is_vmalloc_addr() returns true may be passed into +vmalloc_to_page() to retrieve the associated struct page. + +This is not un unreasonable assumption to make, but on architectures +that have CONFIG_HAVE_ARCH_HUGE_VMAP=y, it no longer holds, and we need +to ensure that vmalloc_to_page() does not go off into the weeds trying +to dereference huge PUDs or PMDs as table entries. + +Given that vmalloc() and vmap() themselves never create huge mappings or +deal with compound pages at all, there is no correct answer in this +case, so return NULL instead, and issue a warning. + +When reading /proc/kcore on arm64, you will hit an oops as soon as you +hit the huge mappings used for the various segments that make up the +mapping of vmlinux. With this patch applied, you will no longer hit the +oops, but the kcore contents willl be incorrect (these regions will be +zeroed out) + +We are fixing this for kcore specifically, so it avoids vread() for +those regions. At least one other problematic user exists, i.e., +/dev/kmem, but that is currently broken on arm64 for other reasons. + +Link: http://lkml.kernel.org/r/20170609082226.26152-1-ard.biesheuvel@linaro.org +Signed-off-by: Ard Biesheuvel +Acked-by: Mark Rutland +Reviewed-by: Laura Abbott +Cc: Michal Hocko +Cc: zhong jiang +Cc: Dave Hansen +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +[ardb: non-trivial backport to v4.9] +Signed-off-by: Ard Biesheuvel +Signed-off-by: Greg Kroah-Hartman +--- + mm/vmalloc.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +--- a/mm/vmalloc.c ++++ b/mm/vmalloc.c +@@ -244,11 +244,21 @@ struct page *vmalloc_to_page(const void + */ + VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr)); + ++ /* ++ * Don't dereference bad PUD or PMD (below) entries. This will also ++ * identify huge mappings, which we may encounter on architectures ++ * that define CONFIG_HAVE_ARCH_HUGE_VMAP=y. Such regions will be ++ * identified as vmalloc addresses by is_vmalloc_addr(), but are ++ * not [unambiguously] associated with a struct page, so there is ++ * no correct value to return for them. ++ */ + if (!pgd_none(*pgd)) { + pud_t *pud = pud_offset(pgd, addr); +- if (!pud_none(*pud)) { ++ WARN_ON_ONCE(pud_bad(*pud)); ++ if (!pud_none(*pud) && !pud_bad(*pud)) { + pmd_t *pmd = pmd_offset(pud, addr); +- if (!pmd_none(*pmd)) { ++ WARN_ON_ONCE(pmd_bad(*pmd)); ++ if (!pmd_none(*pmd) && !pmd_bad(*pmd)) { + pte_t *ptep, pte; + + ptep = pte_offset_map(pmd, addr); diff --git a/queue-4.9/net-mlx4_core-eliminate-warning-messages-for-srq_limit-under-sriov.patch b/queue-4.9/net-mlx4_core-eliminate-warning-messages-for-srq_limit-under-sriov.patch new file mode 100644 index 00000000000..f3b502ddf1f --- /dev/null +++ b/queue-4.9/net-mlx4_core-eliminate-warning-messages-for-srq_limit-under-sriov.patch @@ -0,0 +1,69 @@ +From foo@baz Mon Jul 3 13:37:32 CEST 2017 +From: Jack Morgenstein +Date: Mon, 16 Jan 2017 18:31:39 +0200 +Subject: net/mlx4_core: Eliminate warning messages for SRQ_LIMIT under SRIOV + +From: Jack Morgenstein + + +[ Upstream commit 9577b174cd0323d287c994ef0891db71666d0765 ] + +When running SRIOV, warnings for SRQ LIMIT events flood the Hypervisor's +message log when (correct, normally operating) apps use SRQ LIMIT events +as a trigger to post WQEs to SRQs. + +Add more information to the existing debug printout for SRQ_LIMIT, and +output the warning messages only for the SRQ CATAS ERROR event. + +Fixes: acba2420f9d2 ("mlx4_core: Add wrapper functions and comm channel and slave event support to EQs") +Fixes: e0debf9cb50d ("mlx4_core: Reduce warning message for SRQ_LIMIT event to debug level") +Signed-off-by: Jack Morgenstein +Signed-off-by: Tariq Toukan +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx4/eq.c | 23 ++++++++++++++--------- + 1 file changed, 14 insertions(+), 9 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx4/eq.c ++++ b/drivers/net/ethernet/mellanox/mlx4/eq.c +@@ -554,8 +554,9 @@ static int mlx4_eq_int(struct mlx4_dev * + break; + + case MLX4_EVENT_TYPE_SRQ_LIMIT: +- mlx4_dbg(dev, "%s: MLX4_EVENT_TYPE_SRQ_LIMIT\n", +- __func__); ++ mlx4_dbg(dev, "%s: MLX4_EVENT_TYPE_SRQ_LIMIT. srq_no=0x%x, eq 0x%x\n", ++ __func__, be32_to_cpu(eqe->event.srq.srqn), ++ eq->eqn); + case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR: + if (mlx4_is_master(dev)) { + /* forward only to slave owning the SRQ */ +@@ -570,15 +571,19 @@ static int mlx4_eq_int(struct mlx4_dev * + eq->eqn, eq->cons_index, ret); + break; + } +- mlx4_warn(dev, "%s: slave:%d, srq_no:0x%x, event: %02x(%02x)\n", +- __func__, slave, +- be32_to_cpu(eqe->event.srq.srqn), +- eqe->type, eqe->subtype); ++ if (eqe->type == ++ MLX4_EVENT_TYPE_SRQ_CATAS_ERROR) ++ mlx4_warn(dev, "%s: slave:%d, srq_no:0x%x, event: %02x(%02x)\n", ++ __func__, slave, ++ be32_to_cpu(eqe->event.srq.srqn), ++ eqe->type, eqe->subtype); + + if (!ret && slave != dev->caps.function) { +- mlx4_warn(dev, "%s: sending event %02x(%02x) to slave:%d\n", +- __func__, eqe->type, +- eqe->subtype, slave); ++ if (eqe->type == ++ MLX4_EVENT_TYPE_SRQ_CATAS_ERROR) ++ mlx4_warn(dev, "%s: sending event %02x(%02x) to slave:%d\n", ++ __func__, eqe->type, ++ eqe->subtype, slave); + mlx4_slave_event(dev, slave, eqe); + break; + } diff --git a/queue-4.9/net-phy-dp83867-allow-rgmii_txid-rgmii_rxid-interface-types.patch b/queue-4.9/net-phy-dp83867-allow-rgmii_txid-rgmii_rxid-interface-types.patch new file mode 100644 index 00000000000..739ec293090 --- /dev/null +++ b/queue-4.9/net-phy-dp83867-allow-rgmii_txid-rgmii_rxid-interface-types.patch @@ -0,0 +1,64 @@ +From foo@baz Mon Jul 3 13:37:32 CEST 2017 +From: "Karicheri, Muralidharan" +Date: Fri, 13 Jan 2017 09:32:34 -0500 +Subject: net: phy: dp83867: allow RGMII_TXID/RGMII_RXID interface types + +From: "Karicheri, Muralidharan" + + +[ Upstream commit 34c55cf2fc75f8bf6ba87df321038c064cf2d426 ] + +Currently dp83867 driver returns error if phy interface type +PHY_INTERFACE_MODE_RGMII_RXID is used to set the rx only internal +delay. Similarly issue happens for PHY_INTERFACE_MODE_RGMII_TXID. +Fix this by checking also the interface type if a particular delay +value is missing in the phy dt bindings. Also update the DT document +accordingly. + +Signed-off-by: Murali Karicheri +Signed-off-by: Sekhar Nori +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/devicetree/bindings/net/ti,dp83867.txt | 6 ++++-- + drivers/net/phy/dp83867.c | 8 ++++++-- + 2 files changed, 10 insertions(+), 4 deletions(-) + +--- a/Documentation/devicetree/bindings/net/ti,dp83867.txt ++++ b/Documentation/devicetree/bindings/net/ti,dp83867.txt +@@ -3,9 +3,11 @@ + Required properties: + - reg - The ID number for the phy, usually a small integer + - ti,rx-internal-delay - RGMII Receive Clock Delay - see dt-bindings/net/ti-dp83867.h +- for applicable values ++ for applicable values. Required only if interface type is ++ PHY_INTERFACE_MODE_RGMII_ID or PHY_INTERFACE_MODE_RGMII_RXID + - ti,tx-internal-delay - RGMII Transmit Clock Delay - see dt-bindings/net/ti-dp83867.h +- for applicable values ++ for applicable values. Required only if interface type is ++ PHY_INTERFACE_MODE_RGMII_ID or PHY_INTERFACE_MODE_RGMII_TXID + - ti,fifo-depth - Transmitt FIFO depth- see dt-bindings/net/ti-dp83867.h + for applicable values + +--- a/drivers/net/phy/dp83867.c ++++ b/drivers/net/phy/dp83867.c +@@ -113,12 +113,16 @@ static int dp83867_of_init(struct phy_de + + ret = of_property_read_u32(of_node, "ti,rx-internal-delay", + &dp83867->rx_id_delay); +- if (ret) ++ if (ret && ++ (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || ++ phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID)) + return ret; + + ret = of_property_read_u32(of_node, "ti,tx-internal-delay", + &dp83867->tx_id_delay); +- if (ret) ++ if (ret && ++ (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || ++ phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID)) + return ret; + + return of_property_read_u32(of_node, "ti,fifo-depth", diff --git a/queue-4.9/perf-core-fix-sys_perf_event_open-vs.-hotplug.patch b/queue-4.9/perf-core-fix-sys_perf_event_open-vs.-hotplug.patch new file mode 100644 index 00000000000..97e8590dcef --- /dev/null +++ b/queue-4.9/perf-core-fix-sys_perf_event_open-vs.-hotplug.patch @@ -0,0 +1,275 @@ +From foo@baz Mon Jul 3 13:33:55 CEST 2017 +From: Peter Zijlstra +Date: Fri, 9 Dec 2016 14:59:00 +0100 +Subject: perf/core: Fix sys_perf_event_open() vs. hotplug + +From: Peter Zijlstra + + +[ Upstream commit 63cae12bce9861cec309798d34701cf3da20bc71 ] + +There is problem with installing an event in a task that is 'stuck' on +an offline CPU. + +Blocked tasks are not dis-assosciated from offlined CPUs, after all, a +blocked task doesn't run and doesn't require a CPU etc.. Only on +wakeup do we ammend the situation and place the task on a available +CPU. + +If we hit such a task with perf_install_in_context() we'll loop until +either that task wakes up or the CPU comes back online, if the task +waking depends on the event being installed, we're stuck. + +While looking into this issue, I also spotted another problem, if we +hit a task with perf_install_in_context() that is in the middle of +being migrated, that is we observe the old CPU before sending the IPI, +but run the IPI (on the old CPU) while the task is already running on +the new CPU, things also go sideways. + +Rework things to rely on task_curr() -- outside of rq->lock -- which +is rather tricky. Imagine the following scenario where we're trying to +install the first event into our task 't': + +CPU0 CPU1 CPU2 + + (current == t) + +t->perf_event_ctxp[] = ctx; +smp_mb(); +cpu = task_cpu(t); + + switch(t, n); + migrate(t, 2); + switch(p, t); + + ctx = t->perf_event_ctxp[]; // must not be NULL + +smp_function_call(cpu, ..); + + generic_exec_single() + func(); + spin_lock(ctx->lock); + if (task_curr(t)) // false + + add_event_to_ctx(); + spin_unlock(ctx->lock); + + perf_event_context_sched_in(); + spin_lock(ctx->lock); + // sees event + +So its CPU0's store of t->perf_event_ctxp[] that must not go 'missing'. +Because if CPU2's load of that variable were to observe NULL, it would +not try to schedule the ctx and we'd have a task running without its +counter, which would be 'bad'. + +As long as we observe !NULL, we'll acquire ctx->lock. If we acquire it +first and not see the event yet, then CPU0 must observe task_curr() +and retry. If the install happens first, then we must see the event on +sched-in and all is well. + +I think we can translate the first part (until the 'must not be NULL') +of the scenario to a litmus test like: + + C C-peterz + + { + } + + P0(int *x, int *y) + { + int r1; + + WRITE_ONCE(*x, 1); + smp_mb(); + r1 = READ_ONCE(*y); + } + + P1(int *y, int *z) + { + WRITE_ONCE(*y, 1); + smp_store_release(z, 1); + } + + P2(int *x, int *z) + { + int r1; + int r2; + + r1 = smp_load_acquire(z); + smp_mb(); + r2 = READ_ONCE(*x); + } + + exists + (0:r1=0 /\ 2:r1=1 /\ 2:r2=0) + +Where: + x is perf_event_ctxp[], + y is our tasks's CPU, and + z is our task being placed on the rq of CPU2. + +The P0 smp_mb() is the one added by this patch, ordering the store to +perf_event_ctxp[] from find_get_context() and the load of task_cpu() +in task_function_call(). + +The smp_store_release/smp_load_acquire model the RCpc locking of the +rq->lock and the smp_mb() of P2 is the context switch switching from +whatever CPU2 was running to our task 't'. + +This litmus test evaluates into: + + Test C-peterz Allowed + States 7 + 0:r1=0; 2:r1=0; 2:r2=0; + 0:r1=0; 2:r1=0; 2:r2=1; + 0:r1=0; 2:r1=1; 2:r2=1; + 0:r1=1; 2:r1=0; 2:r2=0; + 0:r1=1; 2:r1=0; 2:r2=1; + 0:r1=1; 2:r1=1; 2:r2=0; + 0:r1=1; 2:r1=1; 2:r2=1; + No + Witnesses + Positive: 0 Negative: 7 + Condition exists (0:r1=0 /\ 2:r1=1 /\ 2:r2=0) + Observation C-peterz Never 0 7 + Hash=e427f41d9146b2a5445101d3e2fcaa34 + +And the strong and weak model agree. + +Reported-by: Mark Rutland +Tested-by: Mark Rutland +Signed-off-by: Peter Zijlstra (Intel) +Cc: Alexander Shishkin +Cc: Arnaldo Carvalho de Melo +Cc: Arnaldo Carvalho de Melo +Cc: Jiri Olsa +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Sebastian Andrzej Siewior +Cc: Stephane Eranian +Cc: Thomas Gleixner +Cc: Vince Weaver +Cc: Will Deacon +Cc: jeremy.linton@arm.com +Link: http://lkml.kernel.org/r/20161209135900.GU3174@twins.programming.kicks-ass.net +Signed-off-by: Ingo Molnar +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + kernel/events/core.c | 70 ++++++++++++++++++++++++++++++++++----------------- + 1 file changed, 48 insertions(+), 22 deletions(-) + +--- a/kernel/events/core.c ++++ b/kernel/events/core.c +@@ -2272,7 +2272,7 @@ static int __perf_install_in_context(vo + struct perf_event_context *ctx = event->ctx; + struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); + struct perf_event_context *task_ctx = cpuctx->task_ctx; +- bool activate = true; ++ bool reprogram = true; + int ret = 0; + + raw_spin_lock(&cpuctx->ctx.lock); +@@ -2280,27 +2280,26 @@ static int __perf_install_in_context(vo + raw_spin_lock(&ctx->lock); + task_ctx = ctx; + +- /* If we're on the wrong CPU, try again */ +- if (task_cpu(ctx->task) != smp_processor_id()) { +- ret = -ESRCH; +- goto unlock; +- } ++ reprogram = (ctx->task == current); + + /* +- * If we're on the right CPU, see if the task we target is +- * current, if not we don't have to activate the ctx, a future +- * context switch will do that for us. ++ * If the task is running, it must be running on this CPU, ++ * otherwise we cannot reprogram things. ++ * ++ * If its not running, we don't care, ctx->lock will ++ * serialize against it becoming runnable. + */ +- if (ctx->task != current) +- activate = false; +- else +- WARN_ON_ONCE(cpuctx->task_ctx && cpuctx->task_ctx != ctx); ++ if (task_curr(ctx->task) && !reprogram) { ++ ret = -ESRCH; ++ goto unlock; ++ } + ++ WARN_ON_ONCE(reprogram && cpuctx->task_ctx && cpuctx->task_ctx != ctx); + } else if (task_ctx) { + raw_spin_lock(&task_ctx->lock); + } + +- if (activate) { ++ if (reprogram) { + ctx_sched_out(ctx, cpuctx, EVENT_TIME); + add_event_to_ctx(event, ctx); + ctx_resched(cpuctx, task_ctx); +@@ -2351,13 +2350,36 @@ perf_install_in_context(struct perf_even + /* + * Installing events is tricky because we cannot rely on ctx->is_active + * to be set in case this is the nr_events 0 -> 1 transition. ++ * ++ * Instead we use task_curr(), which tells us if the task is running. ++ * However, since we use task_curr() outside of rq::lock, we can race ++ * against the actual state. This means the result can be wrong. ++ * ++ * If we get a false positive, we retry, this is harmless. ++ * ++ * If we get a false negative, things are complicated. If we are after ++ * perf_event_context_sched_in() ctx::lock will serialize us, and the ++ * value must be correct. If we're before, it doesn't matter since ++ * perf_event_context_sched_in() will program the counter. ++ * ++ * However, this hinges on the remote context switch having observed ++ * our task->perf_event_ctxp[] store, such that it will in fact take ++ * ctx::lock in perf_event_context_sched_in(). ++ * ++ * We do this by task_function_call(), if the IPI fails to hit the task ++ * we know any future context switch of task must see the ++ * perf_event_ctpx[] store. + */ +-again: ++ + /* +- * Cannot use task_function_call() because we need to run on the task's +- * CPU regardless of whether its current or not. ++ * This smp_mb() orders the task->perf_event_ctxp[] store with the ++ * task_cpu() load, such that if the IPI then does not find the task ++ * running, a future context switch of that task must observe the ++ * store. + */ +- if (!cpu_function_call(task_cpu(task), __perf_install_in_context, event)) ++ smp_mb(); ++again: ++ if (!task_function_call(task, __perf_install_in_context, event)) + return; + + raw_spin_lock_irq(&ctx->lock); +@@ -2371,12 +2393,16 @@ again: + raw_spin_unlock_irq(&ctx->lock); + return; + } +- raw_spin_unlock_irq(&ctx->lock); + /* +- * Since !ctx->is_active doesn't mean anything, we must IPI +- * unconditionally. ++ * If the task is not running, ctx->lock will avoid it becoming so, ++ * thus we can safely install the event. + */ +- goto again; ++ if (task_curr(task)) { ++ raw_spin_unlock_irq(&ctx->lock); ++ goto again; ++ } ++ add_event_to_ctx(event, ctx); ++ raw_spin_unlock_irq(&ctx->lock); + } + + /* diff --git a/queue-4.9/perf-probe-fix-to-probe-on-gcc-generated-functions-in-modules.patch b/queue-4.9/perf-probe-fix-to-probe-on-gcc-generated-functions-in-modules.patch new file mode 100644 index 00000000000..96ef2a24ca1 --- /dev/null +++ b/queue-4.9/perf-probe-fix-to-probe-on-gcc-generated-functions-in-modules.patch @@ -0,0 +1,194 @@ +From foo@baz Mon Jul 3 13:37:32 CEST 2017 +From: Masami Hiramatsu +Date: Wed, 11 Jan 2017 15:01:57 +0900 +Subject: perf probe: Fix to probe on gcc generated functions in modules + +From: Masami Hiramatsu + + +[ Upstream commit 613f050d68a8ed3c0b18b9568698908ef7bbc1f7 ] + +Fix to probe on gcc generated functions on modules. Since +probing on a module is based on its symbol name, it should +be adjusted on actual symbols. + +E.g. without this fix, perf probe shows probe definition +on non-exist symbol as below. + + $ perf probe -m build-x86_64/net/netfilter/nf_nat.ko -F in_range* + in_range.isra.12 + $ perf probe -m build-x86_64/net/netfilter/nf_nat.ko -D in_range + p:probe/in_range nf_nat:in_range+0 + +With this fix, perf probe correctly shows a probe on +gcc-generated symbol. + + $ perf probe -m build-x86_64/net/netfilter/nf_nat.ko -D in_range + p:probe/in_range nf_nat:in_range.isra.12+0 + +This also fixes same problem on online module as below. + + $ perf probe -m i915 -D assert_plane + p:probe/assert_plane i915:assert_plane.constprop.134+0 + +Signed-off-by: Masami Hiramatsu +Tested-by: Arnaldo Carvalho de Melo +Cc: Jiri Olsa +Cc: Namhyung Kim +Cc: Peter Zijlstra +Link: http://lkml.kernel.org/r/148411450673.9978.14905987549651656075.stgit@devbox +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + tools/perf/util/probe-event.c | 45 ++++++++++++++++++++++++++--------------- + tools/perf/util/probe-finder.c | 7 ++++-- + tools/perf/util/probe-finder.h | 3 ++ + 3 files changed, 37 insertions(+), 18 deletions(-) + +--- a/tools/perf/util/probe-event.c ++++ b/tools/perf/util/probe-event.c +@@ -645,18 +645,31 @@ static int add_exec_to_probe_trace_event + return ret; + } + +-static int add_module_to_probe_trace_events(struct probe_trace_event *tevs, +- int ntevs, const char *module) ++static int ++post_process_module_probe_trace_events(struct probe_trace_event *tevs, ++ int ntevs, const char *module, ++ struct debuginfo *dinfo) + { ++ Dwarf_Addr text_offs = 0; + int i, ret = 0; + char *mod_name = NULL; ++ struct map *map; + + if (!module) + return 0; + +- mod_name = find_module_name(module); ++ map = get_target_map(module, false); ++ if (!map || debuginfo__get_text_offset(dinfo, &text_offs, true) < 0) { ++ pr_warning("Failed to get ELF symbols for %s\n", module); ++ return -EINVAL; ++ } + ++ mod_name = find_module_name(module); + for (i = 0; i < ntevs; i++) { ++ ret = post_process_probe_trace_point(&tevs[i].point, ++ map, (unsigned long)text_offs); ++ if (ret < 0) ++ break; + tevs[i].point.module = + strdup(mod_name ? mod_name : module); + if (!tevs[i].point.module) { +@@ -666,6 +679,8 @@ static int add_module_to_probe_trace_eve + } + + free(mod_name); ++ map__put(map); ++ + return ret; + } + +@@ -722,7 +737,7 @@ arch__post_process_probe_trace_events(st + static int post_process_probe_trace_events(struct perf_probe_event *pev, + struct probe_trace_event *tevs, + int ntevs, const char *module, +- bool uprobe) ++ bool uprobe, struct debuginfo *dinfo) + { + int ret; + +@@ -730,7 +745,8 @@ static int post_process_probe_trace_even + ret = add_exec_to_probe_trace_events(tevs, ntevs, module); + else if (module) + /* Currently ref_reloc_sym based probe is not for drivers */ +- ret = add_module_to_probe_trace_events(tevs, ntevs, module); ++ ret = post_process_module_probe_trace_events(tevs, ntevs, ++ module, dinfo); + else + ret = post_process_kernel_probe_trace_events(tevs, ntevs); + +@@ -774,30 +790,27 @@ static int try_to_find_probe_trace_event + } + } + +- debuginfo__delete(dinfo); +- + if (ntevs > 0) { /* Succeeded to find trace events */ + pr_debug("Found %d probe_trace_events.\n", ntevs); + ret = post_process_probe_trace_events(pev, *tevs, ntevs, +- pev->target, pev->uprobes); ++ pev->target, pev->uprobes, dinfo); + if (ret < 0 || ret == ntevs) { ++ pr_debug("Post processing failed or all events are skipped. (%d)\n", ret); + clear_probe_trace_events(*tevs, ntevs); + zfree(tevs); ++ ntevs = 0; + } +- if (ret != ntevs) +- return ret < 0 ? ret : ntevs; +- ntevs = 0; +- /* Fall through */ + } + ++ debuginfo__delete(dinfo); ++ + if (ntevs == 0) { /* No error but failed to find probe point. */ + pr_warning("Probe point '%s' not found.\n", + synthesize_perf_probe_point(&pev->point)); + return -ENOENT; +- } +- /* Error path : ntevs < 0 */ +- pr_debug("An error occurred in debuginfo analysis (%d).\n", ntevs); +- if (ntevs < 0) { ++ } else if (ntevs < 0) { ++ /* Error path : ntevs < 0 */ ++ pr_debug("An error occurred in debuginfo analysis (%d).\n", ntevs); + if (ntevs == -EBADF) + pr_warning("Warning: No dwarf info found in the vmlinux - " + "please rebuild kernel with CONFIG_DEBUG_INFO=y.\n"); +--- a/tools/perf/util/probe-finder.c ++++ b/tools/perf/util/probe-finder.c +@@ -1501,7 +1501,8 @@ int debuginfo__find_available_vars_at(st + } + + /* For the kernel module, we need a special code to get a DIE */ +-static int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs) ++int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, ++ bool adjust_offset) + { + int n, i; + Elf32_Word shndx; +@@ -1530,6 +1531,8 @@ static int debuginfo__get_text_offset(st + if (!shdr) + return -ENOENT; + *offs = shdr->sh_addr; ++ if (adjust_offset) ++ *offs -= shdr->sh_offset; + } + } + return 0; +@@ -1545,7 +1548,7 @@ int debuginfo__find_probe_point(struct d + int baseline = 0, lineno = 0, ret = 0; + + /* We always need to relocate the address for aranges */ +- if (debuginfo__get_text_offset(dbg, &baseaddr) == 0) ++ if (debuginfo__get_text_offset(dbg, &baseaddr, false) == 0) + addr += baseaddr; + /* Find cu die */ + if (!dwarf_addrdie(dbg->dbg, (Dwarf_Addr)addr, &cudie)) { +--- a/tools/perf/util/probe-finder.h ++++ b/tools/perf/util/probe-finder.h +@@ -46,6 +46,9 @@ int debuginfo__find_trace_events(struct + int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr, + struct perf_probe_point *ppt); + ++int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, ++ bool adjust_offset); ++ + /* Find a line range */ + int debuginfo__find_line_range(struct debuginfo *dbg, struct line_range *lr); + diff --git a/queue-4.9/perf-probe-fix-to-show-correct-locations-for-events-on-modules.patch b/queue-4.9/perf-probe-fix-to-show-correct-locations-for-events-on-modules.patch new file mode 100644 index 00000000000..8703e6d949f --- /dev/null +++ b/queue-4.9/perf-probe-fix-to-show-correct-locations-for-events-on-modules.patch @@ -0,0 +1,136 @@ +From foo@baz Mon Jul 3 13:37:32 CEST 2017 +From: Masami Hiramatsu +Date: Wed, 11 Jan 2017 14:59:38 +0900 +Subject: perf probe: Fix to show correct locations for events on modules + +From: Masami Hiramatsu + + +[ Upstream commit d2d4edbebe07ddb77980656abe7b9bc7a9e0cdf7 ] + +Fix to show correct locations for events on modules by relocating given +address instead of retrying after failure. + +This happens when the module text size is big enough, bigger than +sh_addr, because the original code retries with given address + sh_addr +if it failed to find CU DIE at the given address. + +Any address smaller than sh_addr always fails and it retries with the +correct address, but addresses bigger than sh_addr will get a CU DIE +which is on the given address (not adjusted by sh_addr). + +In my environment(x86-64), the sh_addr of ".text" section is 0x10030. +Since i915 is a huge kernel module, we can see this issue as below. + + $ grep "[Tt] .*\[i915\]" /proc/kallsyms | sort | head -n1 + ffffffffc0270000 t i915_switcheroo_can_switch [i915] + +ffffffffc0270000 + 0x10030 = ffffffffc0280030, so we'll check +symbols cross this boundary. + + $ grep "[Tt] .*\[i915\]" /proc/kallsyms | grep -B1 ^ffffffffc028\ + | head -n 2 + ffffffffc027ff80 t haswell_init_clock_gating [i915] + ffffffffc0280110 t valleyview_init_clock_gating [i915] + +So setup probes on both function and see what happen. + + $ sudo ./perf probe -m i915 -a haswell_init_clock_gating \ + -a valleyview_init_clock_gating + Added new events: + probe:haswell_init_clock_gating (on haswell_init_clock_gating in i915) + probe:valleyview_init_clock_gating (on valleyview_init_clock_gating in i915) + + You can now use it in all perf tools, such as: + + perf record -e probe:valleyview_init_clock_gating -aR sleep 1 + + $ sudo ./perf probe -l + probe:haswell_init_clock_gating (on haswell_init_clock_gating@gpu/drm/i915/intel_pm.c in i915) + probe:valleyview_init_clock_gating (on i915_vga_set_decode:4@gpu/drm/i915/i915_drv.c in i915) + +As you can see, haswell_init_clock_gating is correctly shown, +but valleyview_init_clock_gating is not. + +With this patch, both events are shown correctly. + + $ sudo ./perf probe -l + probe:haswell_init_clock_gating (on haswell_init_clock_gating@gpu/drm/i915/intel_pm.c in i915) + probe:valleyview_init_clock_gating (on valleyview_init_clock_gating@gpu/drm/i915/intel_pm.c in i915) + +Committer notes: + +In my case: + + # perf probe -m i915 -a haswell_init_clock_gating -a valleyview_init_clock_gating + Added new events: + probe:haswell_init_clock_gating (on haswell_init_clock_gating in i915) + probe:valleyview_init_clock_gating (on valleyview_init_clock_gating in i915) + + You can now use it in all perf tools, such as: + + perf record -e probe:valleyview_init_clock_gating -aR sleep 1 + + # perf probe -l + probe:haswell_init_clock_gating (on i915_getparam+432@gpu/drm/i915/i915_drv.c in i915) + probe:valleyview_init_clock_gating (on __i915_printk+240@gpu/drm/i915/i915_drv.c in i915) + # + + # readelf -SW /lib/modules/4.9.0+/build/vmlinux | egrep -w '.text|Name' + [Nr] Name Type Address Off Size ES Flg Lk Inf Al + [ 1] .text PROGBITS ffffffff81000000 200000 822fd3 00 AX 0 0 4096 + # + + So both are b0rked, now with the fix: + + # perf probe -m i915 -a haswell_init_clock_gating -a valleyview_init_clock_gating + Added new events: + probe:haswell_init_clock_gating (on haswell_init_clock_gating in i915) + probe:valleyview_init_clock_gating (on valleyview_init_clock_gating in i915) + + You can now use it in all perf tools, such as: + + perf record -e probe:valleyview_init_clock_gating -aR sleep 1 + + # perf probe -l + probe:haswell_init_clock_gating (on haswell_init_clock_gating@gpu/drm/i915/intel_pm.c in i915) + probe:valleyview_init_clock_gating (on valleyview_init_clock_gating@gpu/drm/i915/intel_pm.c in i915) + # + +Both looks correct. + +Signed-off-by: Masami Hiramatsu +Tested-by: Arnaldo Carvalho de Melo +Cc: Jiri Olsa +Cc: Namhyung Kim +Cc: Peter Zijlstra +Link: http://lkml.kernel.org/r/148411436777.9978.1440275861947194930.stgit@devbox +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + tools/perf/util/probe-finder.c | 10 +++------- + 1 file changed, 3 insertions(+), 7 deletions(-) + +--- a/tools/perf/util/probe-finder.c ++++ b/tools/perf/util/probe-finder.c +@@ -1543,16 +1543,12 @@ int debuginfo__find_probe_point(struct d + Dwarf_Addr _addr = 0, baseaddr = 0; + const char *fname = NULL, *func = NULL, *basefunc = NULL, *tmp; + int baseline = 0, lineno = 0, ret = 0; +- bool reloc = false; + +-retry: ++ /* We always need to relocate the address for aranges */ ++ if (debuginfo__get_text_offset(dbg, &baseaddr) == 0) ++ addr += baseaddr; + /* Find cu die */ + if (!dwarf_addrdie(dbg->dbg, (Dwarf_Addr)addr, &cudie)) { +- if (!reloc && debuginfo__get_text_offset(dbg, &baseaddr) == 0) { +- addr += baseaddr; +- reloc = true; +- goto retry; +- } + pr_warning("Failed to find debug information for address %lx\n", + addr); + ret = -EINVAL; diff --git a/queue-4.9/perf-x86-reject-non-sampling-events-with-precise_ip.patch b/queue-4.9/perf-x86-reject-non-sampling-events-with-precise_ip.patch new file mode 100644 index 00000000000..3db1ce8659e --- /dev/null +++ b/queue-4.9/perf-x86-reject-non-sampling-events-with-precise_ip.patch @@ -0,0 +1,50 @@ +From foo@baz Mon Jul 3 13:33:55 CEST 2017 +From: Jiri Olsa +Date: Tue, 3 Jan 2017 15:24:54 +0100 +Subject: perf/x86: Reject non sampling events with precise_ip + +From: Jiri Olsa + + +[ Upstream commit 18e7a45af91acdde99d3aa1372cc40e1f8142f7b ] + +As Peter suggested [1] rejecting non sampling PEBS events, +because they dont make any sense and could cause bugs +in the NMI handler [2]. + + [1] http://lkml.kernel.org/r/20170103094059.GC3093@worktop + [2] http://lkml.kernel.org/r/1482931866-6018-3-git-send-email-jolsa@kernel.org + +Signed-off-by: Jiri Olsa +Signed-off-by: Peter Zijlstra (Intel) +Cc: Alexander Shishkin +Cc: Arnaldo Carvalho de Melo +Cc: Arnaldo Carvalho de Melo +Cc: Jiri Olsa +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Stephane Eranian +Cc: Thomas Gleixner +Cc: Vince Weaver +Cc: Vince Weaver +Link: http://lkml.kernel.org/r/20170103142454.GA26251@krava +Signed-off-by: Ingo Molnar +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/events/core.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/arch/x86/events/core.c ++++ b/arch/x86/events/core.c +@@ -505,6 +505,10 @@ int x86_pmu_hw_config(struct perf_event + + if (event->attr.precise_ip > precise) + return -EOPNOTSUPP; ++ ++ /* There's no sense in having PEBS for non sampling events: */ ++ if (!is_sampling_event(event)) ++ return -EINVAL; + } + /* + * check that PEBS LBR correction does not conflict with diff --git a/queue-4.9/pmem-return-eio-on-read_pmem-failure.patch b/queue-4.9/pmem-return-eio-on-read_pmem-failure.patch new file mode 100644 index 00000000000..487549d7a53 --- /dev/null +++ b/queue-4.9/pmem-return-eio-on-read_pmem-failure.patch @@ -0,0 +1,40 @@ +From foo@baz Mon Jul 3 13:33:55 CEST 2017 +From: Stefan Hajnoczi +Date: Thu, 5 Jan 2017 10:05:46 +0000 +Subject: pmem: return EIO on read_pmem() failure + +From: Stefan Hajnoczi + + +[ Upstream commit d47d1d27fd6206c18806440f6ebddf51a806be4f ] + +The read_pmem() function uses memcpy_mcsafe() on x86 where an EFAULT +error code indicates a failed read. Block I/O should use EIO to +indicate failure. Other pmem code paths (like bad blocks) already use +EIO so let's be consistent. + +This fixes compatibility with consumers like btrfs that try to parse the +specific error code rather than treat all errors the same. + +Reviewed-by: Jeff Moyer +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Dan Williams +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/nvdimm/pmem.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/nvdimm/pmem.c ++++ b/drivers/nvdimm/pmem.c +@@ -87,7 +87,9 @@ static int read_pmem(struct page *page, + + rc = memcpy_from_pmem(mem + off, pmem_addr, len); + kunmap_atomic(mem); +- return rc; ++ if (rc) ++ return -EIO; ++ return 0; + } + + static int pmem_do_bvec(struct pmem_device *pmem, struct page *page, diff --git a/queue-4.9/ravb-fix-use-after-free-on-ifconfig-eth0-down.patch b/queue-4.9/ravb-fix-use-after-free-on-ifconfig-eth0-down.patch new file mode 100644 index 00000000000..b19557f23a2 --- /dev/null +++ b/queue-4.9/ravb-fix-use-after-free-on-ifconfig-eth0-down.patch @@ -0,0 +1,131 @@ +From foo@baz Mon Jul 3 13:37:32 CEST 2017 +From: Eugeniu Rosca +Date: Tue, 6 Jun 2017 00:08:10 +0200 +Subject: ravb: Fix use-after-free on `ifconfig eth0 down` + +From: Eugeniu Rosca + + +[ Upstream commit 79514ef670e9e575a1fe36922268c439d0f0ca8a ] + +Commit a47b70ea86bd ("ravb: unmap descriptors when freeing rings") has +introduced the issue seen in [1] reproduced on H3ULCB board. + +Fix this by relocating the RX skb ringbuffer free operation, so that +swiotlb page unmapping can be done first. Freeing of aligned TX buffers +is not relevant to the issue seen in [1]. Still, reposition TX free +calls as well, to have all kfree() operations performed consistently +_after_ dma_unmap_*()/dma_free_*(). + +[1] Console screenshot with the problem reproduced: + +salvator-x login: root +root@salvator-x:~# ifconfig eth0 up +Micrel KSZ9031 Gigabit PHY e6800000.ethernet-ffffffff:00: \ + attached PHY driver [Micrel KSZ9031 Gigabit PHY] \ + (mii_bus:phy_addr=e6800000.ethernet-ffffffff:00, irq=235) +IPv6: ADDRCONF(NETDEV_UP): eth0: link is not ready +root@salvator-x:~# +root@salvator-x:~# ifconfig eth0 down + +================================================================== +BUG: KASAN: use-after-free in swiotlb_tbl_unmap_single+0xc4/0x35c +Write of size 1538 at addr ffff8006d884f780 by task ifconfig/1649 + +CPU: 0 PID: 1649 Comm: ifconfig Not tainted 4.12.0-rc4-00004-g112eb07287d1 #32 +Hardware name: Renesas H3ULCB board based on r8a7795 (DT) +Call trace: +[] dump_backtrace+0x0/0x3a4 +[] show_stack+0x14/0x1c +[] dump_stack+0xf8/0x150 +[] print_address_description+0x7c/0x330 +[] kasan_report+0x2e0/0x2f4 +[] check_memory_region+0x20/0x14c +[] memcpy+0x48/0x68 +[] swiotlb_tbl_unmap_single+0xc4/0x35c +[] unmap_single+0x90/0xa4 +[] swiotlb_unmap_page+0xc/0x14 +[] __swiotlb_unmap_page+0xcc/0xe4 +[] ravb_ring_free+0x514/0x870 +[] ravb_close+0x288/0x36c +[] __dev_close_many+0x14c/0x174 +[] __dev_close+0xc8/0x144 +[] __dev_change_flags+0xd8/0x194 +[] dev_change_flags+0x60/0xb0 +[] devinet_ioctl+0x484/0x9d4 +[] inet_ioctl+0x190/0x194 +[] sock_do_ioctl+0x78/0xa8 +[] sock_ioctl+0x110/0x3c4 +[] vfs_ioctl+0x90/0xa0 +[] do_vfs_ioctl+0x148/0xc38 +[] SyS_ioctl+0x44/0x74 +[] el0_svc_naked+0x24/0x28 + +The buggy address belongs to the page: +page:ffff7e001b6213c0 count:0 mapcount:0 mapping: (null) index:0x0 +flags: 0x4000000000000000() +raw: 4000000000000000 0000000000000000 0000000000000000 00000000ffffffff +raw: 0000000000000000 ffff7e001b6213e0 0000000000000000 0000000000000000 +page dumped because: kasan: bad access detected + +Memory state around the buggy address: + ffff8006d884f680: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff + ffff8006d884f700: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff +>ffff8006d884f780: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff + ^ + ffff8006d884f800: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff + ffff8006d884f880: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff +================================================================== +Disabling lock debugging due to kernel taint +root@salvator-x:~# + +Fixes: a47b70ea86bd ("ravb: unmap descriptors when freeing rings") +Signed-off-by: Eugeniu Rosca +Acked-by: Sergei Shtylyov +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/renesas/ravb_main.c | 24 ++++++++++++------------ + 1 file changed, 12 insertions(+), 12 deletions(-) + +--- a/drivers/net/ethernet/renesas/ravb_main.c ++++ b/drivers/net/ethernet/renesas/ravb_main.c +@@ -229,18 +229,6 @@ static void ravb_ring_free(struct net_de + int ring_size; + int i; + +- /* Free RX skb ringbuffer */ +- if (priv->rx_skb[q]) { +- for (i = 0; i < priv->num_rx_ring[q]; i++) +- dev_kfree_skb(priv->rx_skb[q][i]); +- } +- kfree(priv->rx_skb[q]); +- priv->rx_skb[q] = NULL; +- +- /* Free aligned TX buffers */ +- kfree(priv->tx_align[q]); +- priv->tx_align[q] = NULL; +- + if (priv->rx_ring[q]) { + for (i = 0; i < priv->num_rx_ring[q]; i++) { + struct ravb_ex_rx_desc *desc = &priv->rx_ring[q][i]; +@@ -269,6 +257,18 @@ static void ravb_ring_free(struct net_de + priv->tx_ring[q] = NULL; + } + ++ /* Free RX skb ringbuffer */ ++ if (priv->rx_skb[q]) { ++ for (i = 0; i < priv->num_rx_ring[q]; i++) ++ dev_kfree_skb(priv->rx_skb[q][i]); ++ } ++ kfree(priv->rx_skb[q]); ++ priv->rx_skb[q] = NULL; ++ ++ /* Free aligned TX buffers */ ++ kfree(priv->tx_align[q]); ++ priv->tx_align[q] = NULL; ++ + /* Free TX skb ringbuffer. + * SKBs are freed by ravb_tx_free() call above. + */ diff --git a/queue-4.9/s390-ctl_reg-make-__ctl_load-a-full-memory-barrier.patch b/queue-4.9/s390-ctl_reg-make-__ctl_load-a-full-memory-barrier.patch new file mode 100644 index 00000000000..c0a19505b2c --- /dev/null +++ b/queue-4.9/s390-ctl_reg-make-__ctl_load-a-full-memory-barrier.patch @@ -0,0 +1,43 @@ +From foo@baz Mon Jul 3 13:37:32 CEST 2017 +From: Heiko Carstens +Date: Wed, 28 Dec 2016 11:33:48 +0100 +Subject: s390/ctl_reg: make __ctl_load a full memory barrier + +From: Heiko Carstens + + +[ Upstream commit e991c24d68b8c0ba297eeb7af80b1e398e98c33f ] + +We have quite a lot of code that depends on the order of the +__ctl_load inline assemby and subsequent memory accesses, like +e.g. disabling lowcore protection and the writing to lowcore. + +Since the __ctl_load macro does not have memory barrier semantics, nor +any other dependencies the compiler is, theoretically, free to shuffle +code around. Or in other words: storing to lowcore could happen before +lowcore protection is disabled. + +In order to avoid this class of potential bugs simply add a full +memory barrier to the __ctl_load macro. + +Signed-off-by: Heiko Carstens +Signed-off-by: Martin Schwidefsky +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/include/asm/ctl_reg.h | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/arch/s390/include/asm/ctl_reg.h ++++ b/arch/s390/include/asm/ctl_reg.h +@@ -15,7 +15,9 @@ + BUILD_BUG_ON(sizeof(addrtype) != (high - low + 1) * sizeof(long));\ + asm volatile( \ + " lctlg %1,%2,%0\n" \ +- : : "Q" (*(addrtype *)(&array)), "i" (low), "i" (high));\ ++ : \ ++ : "Q" (*(addrtype *)(&array)), "i" (low), "i" (high) \ ++ : "memory"); \ + } + + #define __ctl_store(array, low, high) { \ diff --git a/queue-4.9/sctp-check-af-before-verify-address-in-sctp_addr_id2transport.patch b/queue-4.9/sctp-check-af-before-verify-address-in-sctp_addr_id2transport.patch new file mode 100644 index 00000000000..9d587831e03 --- /dev/null +++ b/queue-4.9/sctp-check-af-before-verify-address-in-sctp_addr_id2transport.patch @@ -0,0 +1,41 @@ +From foo@baz Mon Jul 3 13:37:32 CEST 2017 +From: Xin Long +Date: Tue, 7 Feb 2017 20:56:08 +0800 +Subject: sctp: check af before verify address in sctp_addr_id2transport + +From: Xin Long + + +[ Upstream commit 912964eacb111551db73429719eb5fadcab0ff8a ] + +Commit 6f29a1306131 ("sctp: sctp_addr_id2transport should verify the +addr before looking up assoc") invoked sctp_verify_addr to verify the +addr. + +But it didn't check af variable beforehand, once users pass an address +with family = 0 through sockopt, sctp_get_af_specific will return NULL +and NULL pointer dereference will be caused by af->sockaddr_len. + +This patch is to fix it by returning NULL if af variable is NULL. + +Fixes: 6f29a1306131 ("sctp: sctp_addr_id2transport should verify the addr before looking up assoc") +Signed-off-by: Xin Long +Acked-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/socket.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -239,7 +239,7 @@ static struct sctp_transport *sctp_addr_ + union sctp_addr *laddr = (union sctp_addr *)addr; + struct sctp_transport *transport; + +- if (sctp_verify_addr(sk, laddr, af->sockaddr_len)) ++ if (!af || sctp_verify_addr(sk, laddr, af->sockaddr_len)) + return NULL; + + addr_asoc = sctp_endpoint_lookup_assoc(sctp_sk(sk)->ep, diff --git a/queue-4.9/series b/queue-4.9/series index 0adb0c631ac..cb40f0b72c6 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -109,3 +109,31 @@ spi-davinci-use-dma_mapping_error.patch arm64-assembler-make-adr_l-work-in-modules-under-kaslr.patch net-thunderx-acpi-fix-lmac-initialization.patch drm-radeon-si-load-special-ucode-for-certain-mc-configs.patch +drm-amd-powerplay-fix-vce-cg-logic-error-on-cz-st.patch +drm-amd-powerplay-refine-vce-dpm-update-code-on-cz.patch +pmem-return-eio-on-read_pmem-failure.patch +mac80211-initialize-smps-field-in-ht-capabilities.patch +x86-tsc-add-the-intel-denverton-processor-to-native_calibrate_tsc.patch +x86-mpx-use-compatible-types-in-comparison-to-fix-sparse-error.patch +perf-core-fix-sys_perf_event_open-vs.-hotplug.patch +perf-x86-reject-non-sampling-events-with-precise_ip.patch +aio-fix-lock-dep-warning.patch +coredump-ensure-proper-size-of-sparse-core-files.patch +swiotlb-ensure-that-page-sized-mappings-are-page-aligned.patch +s390-ctl_reg-make-__ctl_load-a-full-memory-barrier.patch +usb-dwc2-gadget-fix-gusbcfg.usbtrdtim-value.patch +be2net-fix-status-check-in-be_cmd_pmac_add.patch +be2net-don-t-delete-mac-on-close-on-unprivileged-be3-vfs.patch +be2net-fix-mac-addr-setting-on-privileged-be3-vfs.patch +perf-probe-fix-to-show-correct-locations-for-events-on-modules.patch +net-phy-dp83867-allow-rgmii_txid-rgmii_rxid-interface-types.patch +tipc-allocate-user-memory-with-gfp_kernel-flag.patch +perf-probe-fix-to-probe-on-gcc-generated-functions-in-modules.patch +net-mlx4_core-eliminate-warning-messages-for-srq_limit-under-sriov.patch +sctp-check-af-before-verify-address-in-sctp_addr_id2transport.patch +ip6_tunnel-ip6_gre-fix-setting-of-dscp-on-encapsulated-packets.patch +ravb-fix-use-after-free-on-ifconfig-eth0-down.patch +mm-vmalloc.c-huge-vmap-fail-gracefully-on-unexpected-huge-vmap-mappings.patch +xfrm-fix-stack-access-out-of-bounds-with-config_xfrm_sub_policy.patch +xfrm-null-dereference-on-allocation-failure.patch +xfrm-oops-on-error-in-pfkey_msg2xfrm_state.patch diff --git a/queue-4.9/swiotlb-ensure-that-page-sized-mappings-are-page-aligned.patch b/queue-4.9/swiotlb-ensure-that-page-sized-mappings-are-page-aligned.patch new file mode 100644 index 00000000000..37601048944 --- /dev/null +++ b/queue-4.9/swiotlb-ensure-that-page-sized-mappings-are-page-aligned.patch @@ -0,0 +1,45 @@ +From foo@baz Mon Jul 3 13:37:32 CEST 2017 +From: Nikita Yushchenko +Date: Wed, 11 Jan 2017 21:56:31 +0300 +Subject: swiotlb: ensure that page-sized mappings are page-aligned + +From: Nikita Yushchenko + + +[ Upstream commit 602d9858f07c72eab64f5f00e2fae55f9902cfbe ] + +Some drivers do depend on page mappings to be page aligned. + +Swiotlb already enforces such alignment for mappings greater than page, +extend that to page-sized mappings as well. + +Without this fix, nvme hits BUG() in nvme_setup_prps(), because that routine +assumes page-aligned mappings. + +Signed-off-by: Nikita Yushchenko +Reviewed-by: Christoph Hellwig +Reviewed-by: Sagi Grimberg +Signed-off-by: Konrad Rzeszutek Wilk +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + lib/swiotlb.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/lib/swiotlb.c ++++ b/lib/swiotlb.c +@@ -456,11 +456,11 @@ phys_addr_t swiotlb_tbl_map_single(struc + : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT); + + /* +- * For mappings greater than a page, we limit the stride (and +- * hence alignment) to a page size. ++ * For mappings greater than or equal to a page, we limit the stride ++ * (and hence alignment) to a page size. + */ + nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; +- if (size > PAGE_SIZE) ++ if (size >= PAGE_SIZE) + stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); + else + stride = 1; diff --git a/queue-4.9/tipc-allocate-user-memory-with-gfp_kernel-flag.patch b/queue-4.9/tipc-allocate-user-memory-with-gfp_kernel-flag.patch new file mode 100644 index 00000000000..adbc730a91d --- /dev/null +++ b/queue-4.9/tipc-allocate-user-memory-with-gfp_kernel-flag.patch @@ -0,0 +1,156 @@ +From foo@baz Mon Jul 3 13:37:32 CEST 2017 +From: Parthasarathy Bhuvaragan +Date: Fri, 13 Jan 2017 15:46:25 +0100 +Subject: tipc: allocate user memory with GFP_KERNEL flag + +From: Parthasarathy Bhuvaragan + + +[ Upstream commit 57d5f64d83ab5b5a5118b1597386dd76eaf4340d ] + +Until now, we allocate memory always with GFP_ATOMIC flag. +When the system is under memory pressure and a user tries to send, +the send fails due to low memory. However, the user application +can wait for free memory if we allocate it using GFP_KERNEL flag. + +In this commit, we use allocate memory with GFP_KERNEL for all user +allocation. + +Reported-by: Rune Torgersen +Acked-by: Jon Maloy +Signed-off-by: Parthasarathy Bhuvaragan +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/tipc/discover.c | 4 ++-- + net/tipc/link.c | 2 +- + net/tipc/msg.c | 16 ++++++++-------- + net/tipc/msg.h | 2 +- + net/tipc/name_distr.c | 2 +- + 5 files changed, 13 insertions(+), 13 deletions(-) + +--- a/net/tipc/discover.c ++++ b/net/tipc/discover.c +@@ -169,7 +169,7 @@ void tipc_disc_rcv(struct net *net, stru + + /* Send response, if necessary */ + if (respond && (mtyp == DSC_REQ_MSG)) { +- rskb = tipc_buf_acquire(MAX_H_SIZE); ++ rskb = tipc_buf_acquire(MAX_H_SIZE, GFP_ATOMIC); + if (!rskb) + return; + tipc_disc_init_msg(net, rskb, DSC_RESP_MSG, bearer); +@@ -278,7 +278,7 @@ int tipc_disc_create(struct net *net, st + req = kmalloc(sizeof(*req), GFP_ATOMIC); + if (!req) + return -ENOMEM; +- req->buf = tipc_buf_acquire(MAX_H_SIZE); ++ req->buf = tipc_buf_acquire(MAX_H_SIZE, GFP_ATOMIC); + if (!req->buf) { + kfree(req); + return -ENOMEM; +--- a/net/tipc/link.c ++++ b/net/tipc/link.c +@@ -1395,7 +1395,7 @@ tnl: + msg_set_seqno(hdr, seqno++); + pktlen = msg_size(hdr); + msg_set_size(&tnlhdr, pktlen + INT_H_SIZE); +- tnlskb = tipc_buf_acquire(pktlen + INT_H_SIZE); ++ tnlskb = tipc_buf_acquire(pktlen + INT_H_SIZE, GFP_ATOMIC); + if (!tnlskb) { + pr_warn("%sunable to send packet\n", link_co_err); + return; +--- a/net/tipc/msg.c ++++ b/net/tipc/msg.c +@@ -58,12 +58,12 @@ static unsigned int align(unsigned int i + * NOTE: Headroom is reserved to allow prepending of a data link header. + * There may also be unrequested tailroom present at the buffer's end. + */ +-struct sk_buff *tipc_buf_acquire(u32 size) ++struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp) + { + struct sk_buff *skb; + unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u; + +- skb = alloc_skb_fclone(buf_size, GFP_ATOMIC); ++ skb = alloc_skb_fclone(buf_size, gfp); + if (skb) { + skb_reserve(skb, BUF_HEADROOM); + skb_put(skb, size); +@@ -95,7 +95,7 @@ struct sk_buff *tipc_msg_create(uint use + struct tipc_msg *msg; + struct sk_buff *buf; + +- buf = tipc_buf_acquire(hdr_sz + data_sz); ++ buf = tipc_buf_acquire(hdr_sz + data_sz, GFP_ATOMIC); + if (unlikely(!buf)) + return NULL; + +@@ -261,7 +261,7 @@ int tipc_msg_build(struct tipc_msg *mhdr + + /* No fragmentation needed? */ + if (likely(msz <= pktmax)) { +- skb = tipc_buf_acquire(msz); ++ skb = tipc_buf_acquire(msz, GFP_KERNEL); + if (unlikely(!skb)) + return -ENOMEM; + skb_orphan(skb); +@@ -282,7 +282,7 @@ int tipc_msg_build(struct tipc_msg *mhdr + msg_set_importance(&pkthdr, msg_importance(mhdr)); + + /* Prepare first fragment */ +- skb = tipc_buf_acquire(pktmax); ++ skb = tipc_buf_acquire(pktmax, GFP_KERNEL); + if (!skb) + return -ENOMEM; + skb_orphan(skb); +@@ -313,7 +313,7 @@ int tipc_msg_build(struct tipc_msg *mhdr + pktsz = drem + INT_H_SIZE; + else + pktsz = pktmax; +- skb = tipc_buf_acquire(pktsz); ++ skb = tipc_buf_acquire(pktsz, GFP_KERNEL); + if (!skb) { + rc = -ENOMEM; + goto error; +@@ -448,7 +448,7 @@ bool tipc_msg_make_bundle(struct sk_buff + if (msz > (max / 2)) + return false; + +- _skb = tipc_buf_acquire(max); ++ _skb = tipc_buf_acquire(max, GFP_ATOMIC); + if (!_skb) + return false; + +@@ -496,7 +496,7 @@ bool tipc_msg_reverse(u32 own_node, str + + /* Never return SHORT header; expand by replacing buffer if necessary */ + if (msg_short(hdr)) { +- *skb = tipc_buf_acquire(BASIC_H_SIZE + dlen); ++ *skb = tipc_buf_acquire(BASIC_H_SIZE + dlen, GFP_ATOMIC); + if (!*skb) + goto exit; + memcpy((*skb)->data + BASIC_H_SIZE, msg_data(hdr), dlen); +--- a/net/tipc/msg.h ++++ b/net/tipc/msg.h +@@ -820,7 +820,7 @@ static inline bool msg_is_reset(struct t + return (msg_user(hdr) == LINK_PROTOCOL) && (msg_type(hdr) == RESET_MSG); + } + +-struct sk_buff *tipc_buf_acquire(u32 size); ++struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp); + bool tipc_msg_validate(struct sk_buff *skb); + bool tipc_msg_reverse(u32 own_addr, struct sk_buff **skb, int err); + void tipc_msg_init(u32 own_addr, struct tipc_msg *m, u32 user, u32 type, +--- a/net/tipc/name_distr.c ++++ b/net/tipc/name_distr.c +@@ -69,7 +69,7 @@ static struct sk_buff *named_prepare_buf + u32 dest) + { + struct tipc_net *tn = net_generic(net, tipc_net_id); +- struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE + size); ++ struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE + size, GFP_ATOMIC); + struct tipc_msg *msg; + + if (buf != NULL) { diff --git a/queue-4.9/usb-dwc2-gadget-fix-gusbcfg.usbtrdtim-value.patch b/queue-4.9/usb-dwc2-gadget-fix-gusbcfg.usbtrdtim-value.patch new file mode 100644 index 00000000000..4678033a4d3 --- /dev/null +++ b/queue-4.9/usb-dwc2-gadget-fix-gusbcfg.usbtrdtim-value.patch @@ -0,0 +1,46 @@ +From foo@baz Mon Jul 3 13:37:32 CEST 2017 +From: Amelie Delaunay +Date: Thu, 12 Jan 2017 16:09:44 +0100 +Subject: usb: dwc2: gadget: Fix GUSBCFG.USBTRDTIM value + +From: Amelie Delaunay + + +[ Upstream commit ca02954ada711b08e5b0d84590a631fd63ed39f9 ] + +USBTrdTim must be programmed to 0x5 when phy has a UTMI+ 16-bit wide +interface or 0x9 when it has a 8-bit wide interface. +GUSBCFG reset value (Value After Reset: 0x1400) sets USBTrdTim to 0x5. +In case of 8-bit UTMI+, without clearing GUSBCFG.USBTRDTIM mask, USBTrdTim +results in 0xD (0x5 | 0x9). +That's why we need to clear GUSBCFG.USBTRDTIM mask before setting USBTrdTim +value, to ensure USBTrdTim is correctly set in case of 8-bit UTMI+. + +Signed-off-by: Amelie Delaunay +Signed-off-by: Felipe Balbi +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/usb/dwc2/gadget.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/usb/dwc2/gadget.c ++++ b/drivers/usb/dwc2/gadget.c +@@ -2532,7 +2532,7 @@ void dwc2_hsotg_core_init_disconnected(s + /* keep other bits untouched (so e.g. forced modes are not lost) */ + usbcfg = dwc2_readl(hsotg->regs + GUSBCFG); + usbcfg &= ~(GUSBCFG_TOUTCAL_MASK | GUSBCFG_PHYIF16 | GUSBCFG_SRPCAP | +- GUSBCFG_HNPCAP); ++ GUSBCFG_HNPCAP | GUSBCFG_USBTRDTIM_MASK); + + /* set the PLL on, remove the HNP/SRP and set the PHY */ + val = (hsotg->phyif == GUSBCFG_PHYIF8) ? 9 : 5; +@@ -3403,7 +3403,7 @@ static void dwc2_hsotg_init(struct dwc2_ + /* keep other bits untouched (so e.g. forced modes are not lost) */ + usbcfg = dwc2_readl(hsotg->regs + GUSBCFG); + usbcfg &= ~(GUSBCFG_TOUTCAL_MASK | GUSBCFG_PHYIF16 | GUSBCFG_SRPCAP | +- GUSBCFG_HNPCAP); ++ GUSBCFG_HNPCAP | GUSBCFG_USBTRDTIM_MASK); + + /* set the PLL on, remove the HNP/SRP and set the PHY */ + trdtim = (hsotg->phyif == GUSBCFG_PHYIF8) ? 9 : 5; diff --git a/queue-4.9/x86-mpx-use-compatible-types-in-comparison-to-fix-sparse-error.patch b/queue-4.9/x86-mpx-use-compatible-types-in-comparison-to-fix-sparse-error.patch new file mode 100644 index 00000000000..fdfbc2f8255 --- /dev/null +++ b/queue-4.9/x86-mpx-use-compatible-types-in-comparison-to-fix-sparse-error.patch @@ -0,0 +1,41 @@ +From foo@baz Mon Jul 3 13:33:55 CEST 2017 +From: Tobias Klauser +Date: Thu, 12 Jan 2017 16:53:11 +0100 +Subject: x86/mpx: Use compatible types in comparison to fix sparse error + +From: Tobias Klauser + + +[ Upstream commit 453828625731d0ba7218242ef6ec88f59408f368 ] + +info->si_addr is of type void __user *, so it should be compared against +something from the same address space. + +This fixes the following sparse error: + + arch/x86/mm/mpx.c:296:27: error: incompatible types in comparison expression (different address spaces) + +Signed-off-by: Tobias Klauser +Cc: Dave Hansen +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: linux-kernel@vger.kernel.org +Signed-off-by: Ingo Molnar +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/mm/mpx.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/mm/mpx.c ++++ b/arch/x86/mm/mpx.c +@@ -293,7 +293,7 @@ siginfo_t *mpx_generate_siginfo(struct p + * We were not able to extract an address from the instruction, + * probably because there was something invalid in it. + */ +- if (info->si_addr == (void *)-1) { ++ if (info->si_addr == (void __user *)-1) { + err = -EINVAL; + goto err_out; + } diff --git a/queue-4.9/x86-tsc-add-the-intel-denverton-processor-to-native_calibrate_tsc.patch b/queue-4.9/x86-tsc-add-the-intel-denverton-processor-to-native_calibrate_tsc.patch new file mode 100644 index 00000000000..68e40fc2021 --- /dev/null +++ b/queue-4.9/x86-tsc-add-the-intel-denverton-processor-to-native_calibrate_tsc.patch @@ -0,0 +1,40 @@ +From foo@baz Mon Jul 3 13:33:55 CEST 2017 +From: Len Brown +Date: Fri, 13 Jan 2017 01:11:18 -0500 +Subject: x86/tsc: Add the Intel Denverton Processor to native_calibrate_tsc() + +From: Len Brown + + +[ Upstream commit 695085b4bc7603551db0b3da897b8bf9893ca218 ] + +The Intel Denverton microserver uses a 25 MHz TSC crystal, +so we can derive its exact [*] TSC frequency +using CPUID and some arithmetic, eg.: + + TSC: 1800 MHz (25000000 Hz * 216 / 3 / 1000000) + +[*] 'exact' is only as good as the crystal, which should be +/- 20ppm + +Signed-off-by: Len Brown +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/306899f94804aece6d8fa8b4223ede3b48dbb59c.1484287748.git.len.brown@intel.com +Signed-off-by: Ingo Molnar +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/tsc.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/x86/kernel/tsc.c ++++ b/arch/x86/kernel/tsc.c +@@ -694,6 +694,7 @@ unsigned long native_calibrate_tsc(void) + crystal_khz = 24000; /* 24.0 MHz */ + break; + case INTEL_FAM6_SKYLAKE_X: ++ case INTEL_FAM6_ATOM_DENVERTON: + crystal_khz = 25000; /* 25.0 MHz */ + break; + case INTEL_FAM6_ATOM_GOLDMONT: diff --git a/queue-4.9/xfrm-fix-stack-access-out-of-bounds-with-config_xfrm_sub_policy.patch b/queue-4.9/xfrm-fix-stack-access-out-of-bounds-with-config_xfrm_sub_policy.patch new file mode 100644 index 00000000000..d6eac2d3be5 --- /dev/null +++ b/queue-4.9/xfrm-fix-stack-access-out-of-bounds-with-config_xfrm_sub_policy.patch @@ -0,0 +1,121 @@ +From 9b3eb54106cf6acd03f07cf0ab01c13676a226c2 Mon Sep 17 00:00:00 2001 +From: Sabrina Dubroca +Date: Wed, 3 May 2017 16:43:19 +0200 +Subject: xfrm: fix stack access out of bounds with CONFIG_XFRM_SUB_POLICY + +From: Sabrina Dubroca + +commit 9b3eb54106cf6acd03f07cf0ab01c13676a226c2 upstream. + +When CONFIG_XFRM_SUB_POLICY=y, xfrm_dst stores a copy of the flowi for +that dst. Unfortunately, the code that allocates and fills this copy +doesn't care about what type of flowi (flowi, flowi4, flowi6) gets +passed. In multiple code paths (from raw_sendmsg, from TCP when +replying to a FIN, in vxlan, geneve, and gre), the flowi that gets +passed to xfrm is actually an on-stack flowi4, so we end up reading +stuff from the stack past the end of the flowi4 struct. + +Since xfrm_dst->origin isn't used anywhere following commit +ca116922afa8 ("xfrm: Eliminate "fl" and "pol" args to +xfrm_bundle_ok()."), just get rid of it. xfrm_dst->partner isn't used +either, so get rid of that too. + +Fixes: 9d6ec938019c ("ipv4: Use flowi4 in public route lookup interfaces.") +Signed-off-by: Sabrina Dubroca +Signed-off-by: Steffen Klassert +Signed-off-by: Greg Kroah-Hartman + +--- + include/net/xfrm.h | 10 ---------- + net/xfrm/xfrm_policy.c | 47 ----------------------------------------------- + 2 files changed, 57 deletions(-) + +--- a/include/net/xfrm.h ++++ b/include/net/xfrm.h +@@ -944,10 +944,6 @@ struct xfrm_dst { + struct flow_cache_object flo; + struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; + int num_pols, num_xfrms; +-#ifdef CONFIG_XFRM_SUB_POLICY +- struct flowi *origin; +- struct xfrm_selector *partner; +-#endif + u32 xfrm_genid; + u32 policy_genid; + u32 route_mtu_cached; +@@ -963,12 +959,6 @@ static inline void xfrm_dst_destroy(stru + dst_release(xdst->route); + if (likely(xdst->u.dst.xfrm)) + xfrm_state_put(xdst->u.dst.xfrm); +-#ifdef CONFIG_XFRM_SUB_POLICY +- kfree(xdst->origin); +- xdst->origin = NULL; +- kfree(xdst->partner); +- xdst->partner = NULL; +-#endif + } + #endif + +--- a/net/xfrm/xfrm_policy.c ++++ b/net/xfrm/xfrm_policy.c +@@ -1808,43 +1808,6 @@ free_dst: + goto out; + } + +-#ifdef CONFIG_XFRM_SUB_POLICY +-static int xfrm_dst_alloc_copy(void **target, const void *src, int size) +-{ +- if (!*target) { +- *target = kmalloc(size, GFP_ATOMIC); +- if (!*target) +- return -ENOMEM; +- } +- +- memcpy(*target, src, size); +- return 0; +-} +-#endif +- +-static int xfrm_dst_update_parent(struct dst_entry *dst, +- const struct xfrm_selector *sel) +-{ +-#ifdef CONFIG_XFRM_SUB_POLICY +- struct xfrm_dst *xdst = (struct xfrm_dst *)dst; +- return xfrm_dst_alloc_copy((void **)&(xdst->partner), +- sel, sizeof(*sel)); +-#else +- return 0; +-#endif +-} +- +-static int xfrm_dst_update_origin(struct dst_entry *dst, +- const struct flowi *fl) +-{ +-#ifdef CONFIG_XFRM_SUB_POLICY +- struct xfrm_dst *xdst = (struct xfrm_dst *)dst; +- return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl)); +-#else +- return 0; +-#endif +-} +- + static int xfrm_expand_policies(const struct flowi *fl, u16 family, + struct xfrm_policy **pols, + int *num_pols, int *num_xfrms) +@@ -1916,16 +1879,6 @@ xfrm_resolve_and_create_bundle(struct xf + + xdst = (struct xfrm_dst *)dst; + xdst->num_xfrms = err; +- if (num_pols > 1) +- err = xfrm_dst_update_parent(dst, &pols[1]->selector); +- else +- err = xfrm_dst_update_origin(dst, fl); +- if (unlikely(err)) { +- dst_free(dst); +- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); +- return ERR_PTR(err); +- } +- + xdst->num_pols = num_pols; + memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); + xdst->policy_genid = atomic_read(&pols[0]->genid); diff --git a/queue-4.9/xfrm-null-dereference-on-allocation-failure.patch b/queue-4.9/xfrm-null-dereference-on-allocation-failure.patch new file mode 100644 index 00000000000..2a9fbda4b05 --- /dev/null +++ b/queue-4.9/xfrm-null-dereference-on-allocation-failure.patch @@ -0,0 +1,34 @@ +From e747f64336fc15e1c823344942923195b800aa1e Mon Sep 17 00:00:00 2001 +From: Dan Carpenter +Date: Wed, 14 Jun 2017 13:35:37 +0300 +Subject: xfrm: NULL dereference on allocation failure + +From: Dan Carpenter + +commit e747f64336fc15e1c823344942923195b800aa1e upstream. + +The default error code in pfkey_msg2xfrm_state() is -ENOBUFS. We +added a new call to security_xfrm_state_alloc() which sets "err" to zero +so there several places where we can return ERR_PTR(0) if kmalloc() +fails. The caller is expecting error pointers so it leads to a NULL +dereference. + +Fixes: df71837d5024 ("[LSM-IPSec]: Security association restriction.") +Signed-off-by: Dan Carpenter +Signed-off-by: Steffen Klassert +Signed-off-by: Greg Kroah-Hartman + +--- + net/key/af_key.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/key/af_key.c ++++ b/net/key/af_key.c +@@ -1135,6 +1135,7 @@ static struct xfrm_state * pfkey_msg2xfr + goto out; + } + ++ err = -ENOBUFS; + key = ext_hdrs[SADB_EXT_KEY_AUTH - 1]; + if (sa->sadb_sa_auth) { + int keysize = 0; diff --git a/queue-4.9/xfrm-oops-on-error-in-pfkey_msg2xfrm_state.patch b/queue-4.9/xfrm-oops-on-error-in-pfkey_msg2xfrm_state.patch new file mode 100644 index 00000000000..aa10f5af18b --- /dev/null +++ b/queue-4.9/xfrm-oops-on-error-in-pfkey_msg2xfrm_state.patch @@ -0,0 +1,71 @@ +From 1e3d0c2c70cd3edb5deed186c5f5c75f2b84a633 Mon Sep 17 00:00:00 2001 +From: Dan Carpenter +Date: Wed, 14 Jun 2017 13:34:05 +0300 +Subject: xfrm: Oops on error in pfkey_msg2xfrm_state() + +From: Dan Carpenter + +commit 1e3d0c2c70cd3edb5deed186c5f5c75f2b84a633 upstream. + +There are some missing error codes here so we accidentally return NULL +instead of an error pointer. It results in a NULL pointer dereference. + +Fixes: df71837d5024 ("[LSM-IPSec]: Security association restriction.") +Signed-off-by: Dan Carpenter +Signed-off-by: Steffen Klassert +Signed-off-by: Greg Kroah-Hartman + +--- + net/key/af_key.c | 16 ++++++++++++---- + 1 file changed, 12 insertions(+), 4 deletions(-) + +--- a/net/key/af_key.c ++++ b/net/key/af_key.c +@@ -1147,8 +1147,10 @@ static struct xfrm_state * pfkey_msg2xfr + if (key) + keysize = (key->sadb_key_bits + 7) / 8; + x->aalg = kmalloc(sizeof(*x->aalg) + keysize, GFP_KERNEL); +- if (!x->aalg) ++ if (!x->aalg) { ++ err = -ENOMEM; + goto out; ++ } + strcpy(x->aalg->alg_name, a->name); + x->aalg->alg_key_len = 0; + if (key) { +@@ -1167,8 +1169,10 @@ static struct xfrm_state * pfkey_msg2xfr + goto out; + } + x->calg = kmalloc(sizeof(*x->calg), GFP_KERNEL); +- if (!x->calg) ++ if (!x->calg) { ++ err = -ENOMEM; + goto out; ++ } + strcpy(x->calg->alg_name, a->name); + x->props.calgo = sa->sadb_sa_encrypt; + } else { +@@ -1182,8 +1186,10 @@ static struct xfrm_state * pfkey_msg2xfr + if (key) + keysize = (key->sadb_key_bits + 7) / 8; + x->ealg = kmalloc(sizeof(*x->ealg) + keysize, GFP_KERNEL); +- if (!x->ealg) ++ if (!x->ealg) { ++ err = -ENOMEM; + goto out; ++ } + strcpy(x->ealg->alg_name, a->name); + x->ealg->alg_key_len = 0; + if (key) { +@@ -1228,8 +1234,10 @@ static struct xfrm_state * pfkey_msg2xfr + struct xfrm_encap_tmpl *natt; + + x->encap = kmalloc(sizeof(*x->encap), GFP_KERNEL); +- if (!x->encap) ++ if (!x->encap) { ++ err = -ENOMEM; + goto out; ++ } + + natt = x->encap; + n_type = ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1];