From ac8423f5d2347de436e4064a81e05fd1d2773eb3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 29 Apr 2019 15:14:59 +0200 Subject: [PATCH] 4.14-stable patches added patches: dm-integrity-change-memcmp-to-strncmp-in-dm_integrity_ctr.patch mm-fix-warning-in-insert_pfn.patch x86-retpolines-disable-switch-jump-tables-when-retpolines-are-enabled.patch x86-retpolines-raise-limit-for-generating-indirect-calls-from-switch-case.patch --- ...emcmp-to-strncmp-in-dm_integrity_ctr.patch | 48 +++++ queue-4.14/mm-fix-warning-in-insert_pfn.patch | 74 ++++++++ queue-4.14/series | 4 + ...p-tables-when-retpolines-are-enabled.patch | 70 +++++++ ...ting-indirect-calls-from-switch-case.patch | 175 ++++++++++++++++++ 5 files changed, 371 insertions(+) create mode 100644 queue-4.14/dm-integrity-change-memcmp-to-strncmp-in-dm_integrity_ctr.patch create mode 100644 queue-4.14/mm-fix-warning-in-insert_pfn.patch create mode 100644 queue-4.14/x86-retpolines-disable-switch-jump-tables-when-retpolines-are-enabled.patch create mode 100644 queue-4.14/x86-retpolines-raise-limit-for-generating-indirect-calls-from-switch-case.patch diff --git a/queue-4.14/dm-integrity-change-memcmp-to-strncmp-in-dm_integrity_ctr.patch b/queue-4.14/dm-integrity-change-memcmp-to-strncmp-in-dm_integrity_ctr.patch new file mode 100644 index 00000000000..894e59f97ce --- /dev/null +++ b/queue-4.14/dm-integrity-change-memcmp-to-strncmp-in-dm_integrity_ctr.patch @@ -0,0 +1,48 @@ +From 0d74e6a3b6421d98eeafbed26f29156d469bc0b5 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Wed, 13 Mar 2019 07:56:02 -0400 +Subject: dm integrity: change memcmp to strncmp in dm_integrity_ctr + +From: Mikulas Patocka + +commit 0d74e6a3b6421d98eeafbed26f29156d469bc0b5 upstream. + +If the string opt_string is small, the function memcmp can access bytes +that are beyond the terminating nul character. In theory, it could cause +segfault, if opt_string were located just below some unmapped memory. + +Change from memcmp to strncmp so that we don't read bytes beyond the end +of the string. + +Cc: stable@vger.kernel.org # v4.12+ +Signed-off-by: Mikulas Patocka +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/dm-integrity.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/drivers/md/dm-integrity.c ++++ b/drivers/md/dm-integrity.c +@@ -2917,17 +2917,17 @@ static int dm_integrity_ctr(struct dm_ta + goto bad; + } + ic->sectors_per_block = val >> SECTOR_SHIFT; +- } else if (!memcmp(opt_string, "internal_hash:", strlen("internal_hash:"))) { ++ } else if (!strncmp(opt_string, "internal_hash:", strlen("internal_hash:"))) { + r = get_alg_and_key(opt_string, &ic->internal_hash_alg, &ti->error, + "Invalid internal_hash argument"); + if (r) + goto bad; +- } else if (!memcmp(opt_string, "journal_crypt:", strlen("journal_crypt:"))) { ++ } else if (!strncmp(opt_string, "journal_crypt:", strlen("journal_crypt:"))) { + r = get_alg_and_key(opt_string, &ic->journal_crypt_alg, &ti->error, + "Invalid journal_crypt argument"); + if (r) + goto bad; +- } else if (!memcmp(opt_string, "journal_mac:", strlen("journal_mac:"))) { ++ } else if (!strncmp(opt_string, "journal_mac:", strlen("journal_mac:"))) { + r = get_alg_and_key(opt_string, &ic->journal_mac_alg, &ti->error, + "Invalid journal_mac argument"); + if (r) diff --git a/queue-4.14/mm-fix-warning-in-insert_pfn.patch b/queue-4.14/mm-fix-warning-in-insert_pfn.patch new file mode 100644 index 00000000000..b76c463efe3 --- /dev/null +++ b/queue-4.14/mm-fix-warning-in-insert_pfn.patch @@ -0,0 +1,74 @@ +From f2c57d91b0d96aa13ccff4e3b178038f17b00658 Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Tue, 30 Oct 2018 15:10:47 -0700 +Subject: mm: Fix warning in insert_pfn() + +From: Jan Kara + +commit f2c57d91b0d96aa13ccff4e3b178038f17b00658 upstream. + +In DAX mode a write pagefault can race with write(2) in the following +way: + +CPU0 CPU1 + write fault for mapped zero page (hole) +dax_iomap_rw() + iomap_apply() + xfs_file_iomap_begin() + - allocates blocks + dax_iomap_actor() + invalidate_inode_pages2_range() + - invalidates radix tree entries in given range + dax_iomap_pte_fault() + grab_mapping_entry() + - no entry found, creates empty + ... + xfs_file_iomap_begin() + - finds already allocated block + ... + vmf_insert_mixed_mkwrite() + - WARNs and does nothing because there + is still zero page mapped in PTE + unmap_mapping_pages() + +This race results in WARN_ON from insert_pfn() and is occasionally +triggered by fstest generic/344. Note that the race is otherwise +harmless as before write(2) on CPU0 is finished, we will invalidate page +tables properly and thus user of mmap will see modified data from +write(2) from that point on. So just restrict the warning only to the +case when the PFN in PTE is not zero page. + +Link: http://lkml.kernel.org/r/20180824154542.26872-1-jack@suse.cz +Signed-off-by: Jan Kara +Reviewed-by: Andrew Morton +Cc: Ross Zwisler +Cc: Dan Williams +Cc: Dave Jiang +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/memory.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -1804,10 +1804,15 @@ static int insert_pfn(struct vm_area_str + * in may not match the PFN we have mapped if the + * mapped PFN is a writeable COW page. In the mkwrite + * case we are creating a writable PTE for a shared +- * mapping and we expect the PFNs to match. ++ * mapping and we expect the PFNs to match. If they ++ * don't match, we are likely racing with block ++ * allocation and mapping invalidation so just skip the ++ * update. + */ +- if (WARN_ON_ONCE(pte_pfn(*pte) != pfn_t_to_pfn(pfn))) ++ if (pte_pfn(*pte) != pfn_t_to_pfn(pfn)) { ++ WARN_ON_ONCE(!is_zero_pfn(pte_pfn(*pte))); + goto out_unlock; ++ } + entry = *pte; + goto out_mkwrite; + } else diff --git a/queue-4.14/series b/queue-4.14/series index 9fede67354f..02071a6c394 100644 --- a/queue-4.14/series +++ b/queue-4.14/series @@ -36,3 +36,7 @@ netfilter-ebtables-config_compat-drop-a-bogus-warn_on.patch fm10k-fix-a-potential-null-pointer-dereference.patch tipc-check-bearer-name-with-right-length-in-tipc_nl_compat_bearer_enable.patch tipc-check-link-name-with-right-length-in-tipc_nl_compat_link_set.patch +dm-integrity-change-memcmp-to-strncmp-in-dm_integrity_ctr.patch +x86-retpolines-raise-limit-for-generating-indirect-calls-from-switch-case.patch +x86-retpolines-disable-switch-jump-tables-when-retpolines-are-enabled.patch +mm-fix-warning-in-insert_pfn.patch diff --git a/queue-4.14/x86-retpolines-disable-switch-jump-tables-when-retpolines-are-enabled.patch b/queue-4.14/x86-retpolines-disable-switch-jump-tables-when-retpolines-are-enabled.patch new file mode 100644 index 00000000000..47027929957 --- /dev/null +++ b/queue-4.14/x86-retpolines-disable-switch-jump-tables-when-retpolines-are-enabled.patch @@ -0,0 +1,70 @@ +From a9d57ef15cbe327fe54416dd194ee0ea66ae53a4 Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann +Date: Mon, 25 Mar 2019 14:56:20 +0100 +Subject: x86/retpolines: Disable switch jump tables when retpolines are enabled +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Daniel Borkmann + +commit a9d57ef15cbe327fe54416dd194ee0ea66ae53a4 upstream. + +Commit ce02ef06fcf7 ("x86, retpolines: Raise limit for generating indirect +calls from switch-case") raised the limit under retpolines to 20 switch +cases where gcc would only then start to emit jump tables, and therefore +effectively disabling the emission of slow indirect calls in this area. + +After this has been brought to attention to gcc folks [0], Martin Liska +has then fixed gcc to align with clang by avoiding to generate switch jump +tables entirely under retpolines. This is taking effect in gcc starting +from stable version 8.4.0. Given kernel supports compilation with older +versions of gcc where the fix is not being available or backported anymore, +we need to keep the extra KBUILD_CFLAGS around for some time and generally +set the -fno-jump-tables to align with what more recent gcc is doing +automatically today. + +More than 20 switch cases are not expected to be fast-path critical, but +it would still be good to align with gcc behavior for versions < 8.4.0 in +order to have consistency across supported gcc versions. vmlinux size is +slightly growing by 0.27% for older gcc. This flag is only set to work +around affected gcc, no change for clang. + + [0] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86952 + +Suggested-by: Martin Liska +Signed-off-by: Daniel Borkmann +Signed-off-by: Thomas Gleixner +Cc: David Woodhouse +Cc: Linus Torvalds +Cc: Jesper Dangaard Brouer +Cc: Björn Töpel +Cc: Magnus Karlsson +Cc: Alexei Starovoitov +Cc: H.J. Lu +Cc: Alexei Starovoitov +Cc: David S. Miller +Link: https://lkml.kernel.org/r/20190325135620.14882-1-daniel@iogearbox.net +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/Makefile | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/arch/x86/Makefile ++++ b/arch/x86/Makefile +@@ -245,8 +245,12 @@ ifdef CONFIG_RETPOLINE + # Additionally, avoid generating expensive indirect jumps which + # are subject to retpolines for small number of switch cases. + # clang turns off jump table generation by default when under +- # retpoline builds, however, gcc does not for x86. +- KBUILD_CFLAGS += $(call cc-option,--param=case-values-threshold=20) ++ # retpoline builds, however, gcc does not for x86. This has ++ # only been fixed starting from gcc stable version 8.4.0 and ++ # onwards, but not for older ones. See gcc bug #86952. ++ ifndef CONFIG_CC_IS_CLANG ++ KBUILD_CFLAGS += $(call cc-option,-fno-jump-tables) ++ endif + endif + + archscripts: scripts_basic diff --git a/queue-4.14/x86-retpolines-raise-limit-for-generating-indirect-calls-from-switch-case.patch b/queue-4.14/x86-retpolines-raise-limit-for-generating-indirect-calls-from-switch-case.patch new file mode 100644 index 00000000000..5630118a7bc --- /dev/null +++ b/queue-4.14/x86-retpolines-raise-limit-for-generating-indirect-calls-from-switch-case.patch @@ -0,0 +1,175 @@ +From ce02ef06fcf7a399a6276adb83f37373d10cbbe1 Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann +Date: Thu, 21 Feb 2019 23:19:41 +0100 +Subject: x86, retpolines: Raise limit for generating indirect calls from switch-case +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Daniel Borkmann + +commit ce02ef06fcf7a399a6276adb83f37373d10cbbe1 upstream. + +From networking side, there are numerous attempts to get rid of indirect +calls in fast-path wherever feasible in order to avoid the cost of +retpolines, for example, just to name a few: + + * 283c16a2dfd3 ("indirect call wrappers: helpers to speed-up indirect calls of builtin") + * aaa5d90b395a ("net: use indirect call wrappers at GRO network layer") + * 028e0a476684 ("net: use indirect call wrappers at GRO transport layer") + * 356da6d0cde3 ("dma-mapping: bypass indirect calls for dma-direct") + * 09772d92cd5a ("bpf: avoid retpoline for lookup/update/delete calls on maps") + * 10870dd89e95 ("netfilter: nf_tables: add direct calls for all builtin expressions") + [...] + +Recent work on XDP from Björn and Magnus additionally found that manually +transforming the XDP return code switch statement with more than 5 cases +into if-else combination would result in a considerable speedup in XDP +layer due to avoidance of indirect calls in CONFIG_RETPOLINE enabled +builds. On i40e driver with XDP prog attached, a 20-26% speedup has been +observed [0]. Aside from XDP, there are many other places later in the +networking stack's critical path with similar switch-case +processing. Rather than fixing every XDP-enabled driver and locations in +stack by hand, it would be good to instead raise the limit where gcc would +emit expensive indirect calls from the switch under retpolines and stick +with the default as-is in case of !retpoline configured kernels. This would +also have the advantage that for archs where this is not necessary, we let +compiler select the underlying target optimization for these constructs and +avoid potential slow-downs by if-else hand-rewrite. + +In case of gcc, this setting is controlled by case-values-threshold which +has an architecture global default that selects 4 or 5 (latter if target +does not have a case insn that compares the bounds) where some arch back +ends like arm64 or s390 override it with their own target hooks, for +example, in gcc commit db7a90aa0de5 ("S/390: Disable prediction of indirect +branches") the threshold pretty much disables jump tables by limit of 20 +under retpoline builds. Comparing gcc's and clang's default code +generation on x86-64 under O2 level with retpoline build results in the +following outcome for 5 switch cases: + +* gcc with -mindirect-branch=thunk-inline -mindirect-branch-register: + + # gdb -batch -ex 'disassemble dispatch' ./c-switch + Dump of assembler code for function dispatch: + 0x0000000000400be0 <+0>: cmp $0x4,%edi + 0x0000000000400be3 <+3>: ja 0x400c35 + 0x0000000000400be5 <+5>: lea 0x915f8(%rip),%rdx # 0x4921e4 + 0x0000000000400bec <+12>: mov %edi,%edi + 0x0000000000400bee <+14>: movslq (%rdx,%rdi,4),%rax + 0x0000000000400bf2 <+18>: add %rdx,%rax + 0x0000000000400bf5 <+21>: callq 0x400c01 + 0x0000000000400bfa <+26>: pause + 0x0000000000400bfc <+28>: lfence + 0x0000000000400bff <+31>: jmp 0x400bfa + 0x0000000000400c01 <+33>: mov %rax,(%rsp) + 0x0000000000400c05 <+37>: retq + 0x0000000000400c06 <+38>: nopw %cs:0x0(%rax,%rax,1) + 0x0000000000400c10 <+48>: jmpq 0x400c90 + 0x0000000000400c15 <+53>: nopl (%rax) + 0x0000000000400c18 <+56>: jmpq 0x400c70 + 0x0000000000400c1d <+61>: nopl (%rax) + 0x0000000000400c20 <+64>: jmpq 0x400c50 + 0x0000000000400c25 <+69>: nopl (%rax) + 0x0000000000400c28 <+72>: jmpq 0x400c40 + 0x0000000000400c2d <+77>: nopl (%rax) + 0x0000000000400c30 <+80>: jmpq 0x400cb0 + 0x0000000000400c35 <+85>: push %rax + 0x0000000000400c36 <+86>: callq 0x40dd80 + End of assembler dump. + +* clang with -mretpoline emitting search tree: + + # gdb -batch -ex 'disassemble dispatch' ./c-switch + Dump of assembler code for function dispatch: + 0x0000000000400b30 <+0>: cmp $0x1,%edi + 0x0000000000400b33 <+3>: jle 0x400b44 + 0x0000000000400b35 <+5>: cmp $0x2,%edi + 0x0000000000400b38 <+8>: je 0x400b4d + 0x0000000000400b3a <+10>: cmp $0x3,%edi + 0x0000000000400b3d <+13>: jne 0x400b52 + 0x0000000000400b3f <+15>: jmpq 0x400c50 + 0x0000000000400b44 <+20>: test %edi,%edi + 0x0000000000400b46 <+22>: jne 0x400b5c + 0x0000000000400b48 <+24>: jmpq 0x400c20 + 0x0000000000400b4d <+29>: jmpq 0x400c40 + 0x0000000000400b52 <+34>: cmp $0x4,%edi + 0x0000000000400b55 <+37>: jne 0x400b66 + 0x0000000000400b57 <+39>: jmpq 0x400c60 + 0x0000000000400b5c <+44>: cmp $0x1,%edi + 0x0000000000400b5f <+47>: jne 0x400b66 + 0x0000000000400b61 <+49>: jmpq 0x400c30 + 0x0000000000400b66 <+54>: push %rax + 0x0000000000400b67 <+55>: callq 0x40dd20 + End of assembler dump. + + For sake of comparison, clang without -mretpoline: + + # gdb -batch -ex 'disassemble dispatch' ./c-switch + Dump of assembler code for function dispatch: + 0x0000000000400b30 <+0>: cmp $0x4,%edi + 0x0000000000400b33 <+3>: ja 0x400b57 + 0x0000000000400b35 <+5>: mov %edi,%eax + 0x0000000000400b37 <+7>: jmpq *0x492148(,%rax,8) + 0x0000000000400b3e <+14>: jmpq 0x400bf0 + 0x0000000000400b43 <+19>: jmpq 0x400c30 + 0x0000000000400b48 <+24>: jmpq 0x400c10 + 0x0000000000400b4d <+29>: jmpq 0x400c20 + 0x0000000000400b52 <+34>: jmpq 0x400c00 + 0x0000000000400b57 <+39>: push %rax + 0x0000000000400b58 <+40>: callq 0x40dcf0 + End of assembler dump. + +Raising the cases to a high number (e.g. 100) will still result in similar +code generation pattern with clang and gcc as above, in other words clang +generally turns off jump table emission by having an extra expansion pass +under retpoline build to turn indirectbr instructions from their IR into +switch instructions as a built-in -mno-jump-table lowering of a switch (in +this case, even if IR input already contained an indirect branch). + +For gcc, adding --param=case-values-threshold=20 as in similar fashion as +s390 in order to raise the limit for x86 retpoline enabled builds results +in a small vmlinux size increase of only 0.13% (before=18,027,528 +after=18,051,192). For clang this option is ignored due to i) not being +needed as mentioned and ii) not having above cmdline +parameter. Non-retpoline-enabled builds with gcc continue to use the +default case-values-threshold setting, so nothing changes here. + +[0] https://lore.kernel.org/netdev/20190129095754.9390-1-bjorn.topel@gmail.com/ + and "The Path to DPDK Speeds for AF_XDP", LPC 2018, networking track: + - http://vger.kernel.org/lpc_net2018_talks/lpc18_pres_af_xdp_perf-v3.pdf + - http://vger.kernel.org/lpc_net2018_talks/lpc18_paper_af_xdp_perf-v2.pdf + +Signed-off-by: Daniel Borkmann +Signed-off-by: Thomas Gleixner +Acked-by: Jesper Dangaard Brouer +Acked-by: Björn Töpel +Acked-by: Linus Torvalds +Cc: netdev@vger.kernel.org +Cc: David S. Miller +Cc: Magnus Karlsson +Cc: Alexei Starovoitov +Cc: Peter Zijlstra +Cc: David Woodhouse +Cc: Andy Lutomirski +Cc: Borislav Petkov +Link: https://lkml.kernel.org/r/20190221221941.29358-1-daniel@iogearbox.net +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/Makefile | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/arch/x86/Makefile ++++ b/arch/x86/Makefile +@@ -242,6 +242,11 @@ KBUILD_CFLAGS += -fno-asynchronous-unwin + # Avoid indirect branches in kernel to deal with Spectre + ifdef CONFIG_RETPOLINE + KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) ++ # Additionally, avoid generating expensive indirect jumps which ++ # are subject to retpolines for small number of switch cases. ++ # clang turns off jump table generation by default when under ++ # retpoline builds, however, gcc does not for x86. ++ KBUILD_CFLAGS += $(call cc-option,--param=case-values-threshold=20) + endif + + archscripts: scripts_basic -- 2.39.5