From: Greg Kroah-Hartman Date: Thu, 30 Aug 2018 17:04:02 +0000 (-0700) Subject: 4.18-stable patches X-Git-Tag: v3.18.121~40 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=174b40510ed975d2e1d37af500d79f8a11d9e3e5;p=thirdparty%2Fkernel%2Fstable-queue.git 4.18-stable patches added patches: arm64-dts-rockchip-corrected-uart1-clock-names-for-rk3328.patch arm64-fix-mismatched-cache-line-size-detection.patch arm64-handle-mismatched-cache-type.patch arm64-mm-check-for-upper-page_shift-bits-in-pfn_valid.patch ext4-check-for-nul-characters-in-extended-attribute-s-name.patch ext4-fix-race-when-setting-the-bitmap-corrupted-flag.patch ext4-reset-error-code-in-ext4_find_entry-in-fallback.patch ext4-sysfs-print-ext4_super_block-fields-as-little-endian.patch ext4-use-ext4_warning-for-sb_getblk-failure.patch iommu-arm-smmu-error-out-only-if-not-enough-context-interrupts.patch kprobes-arm64-fix-p-uses-in-error-messages.patch kvm-arm-arm64-fix-lost-irqs-from-emulated-physcial-timer-when-blocked.patch kvm-arm-arm64-fix-potential-loss-of-ptimer-interrupts.patch kvm-arm-arm64-skip-updating-pmd-entry-if-no-change.patch kvm-arm-arm64-skip-updating-pte-entry-if-no-change.patch perf-kvm-fix-subcommands-on-s390.patch printk-nmi-prevent-deadlock-when-accessing-the-main-log-buffer-in-nmi.patch s390-kvm-fix-deadlock-when-killed-by-oom.patch stop_machine-atomically-queue-and-wake-stopper-threads.patch stop_machine-reflow-cpu_stop_queue_two_works.patch --- diff --git a/queue-4.18/arm64-dts-rockchip-corrected-uart1-clock-names-for-rk3328.patch b/queue-4.18/arm64-dts-rockchip-corrected-uart1-clock-names-for-rk3328.patch new file mode 100644 index 00000000000..8b70a0addd2 --- /dev/null +++ b/queue-4.18/arm64-dts-rockchip-corrected-uart1-clock-names-for-rk3328.patch @@ -0,0 +1,32 @@ +From d0414fdd58eb51ffd6528280fd66705123663964 Mon Sep 17 00:00:00 2001 +From: Huibin Hong +Date: Fri, 6 Jul 2018 16:03:57 +0800 +Subject: arm64: dts: rockchip: corrected uart1 clock-names for rk3328 + +From: Huibin Hong + +commit d0414fdd58eb51ffd6528280fd66705123663964 upstream. + +Corrected the uart clock-names or the uart driver might fail. + +Fixes: 52e02d377a72 ("arm64: dts: rockchip: add core dtsi file for RK3328 SoCs") +Cc: stable@vger.kernel.org +Signed-off-by: Huibin Hong +Signed-off-by: Heiko Stuebner +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm64/boot/dts/rockchip/rk3328.dtsi | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi ++++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi +@@ -331,7 +331,7 @@ + reg = <0x0 0xff120000 0x0 0x100>; + interrupts = ; + clocks = <&cru SCLK_UART1>, <&cru PCLK_UART1>; +- clock-names = "sclk_uart", "pclk_uart"; ++ clock-names = "baudclk", "apb_pclk"; + dmas = <&dmac 4>, <&dmac 5>; + dma-names = "tx", "rx"; + pinctrl-names = "default"; diff --git a/queue-4.18/arm64-fix-mismatched-cache-line-size-detection.patch b/queue-4.18/arm64-fix-mismatched-cache-line-size-detection.patch new file mode 100644 index 00000000000..e2c7203e2c9 --- /dev/null +++ b/queue-4.18/arm64-fix-mismatched-cache-line-size-detection.patch @@ -0,0 +1,81 @@ +From 4c4a39dd5fe2d13e2d2fa5fceb8ef95d19fc389a Mon Sep 17 00:00:00 2001 +From: Suzuki K Poulose +Date: Wed, 4 Jul 2018 23:07:45 +0100 +Subject: arm64: Fix mismatched cache line size detection + +From: Suzuki K Poulose + +commit 4c4a39dd5fe2d13e2d2fa5fceb8ef95d19fc389a upstream. + +If there is a mismatch in the I/D min line size, we must +always use the system wide safe value both in applications +and in the kernel, while performing cache operations. However, +we have been checking more bits than just the min line sizes, +which triggers false negatives. We may need to trap the user +accesses in such cases, but not necessarily patch the kernel. + +This patch fixes the check to do the right thing as advertised. +A new capability will be added to check mismatches in other +fields and ensure we trap the CTR accesses. + +Fixes: be68a8aaf925 ("arm64: cpufeature: Fix CTR_EL0 field definitions") +Cc: +Cc: Mark Rutland +Cc: Catalin Marinas +Reported-by: Will Deacon +Signed-off-by: Suzuki K Poulose +Signed-off-by: Will Deacon +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm64/include/asm/cache.h | 4 ++++ + arch/arm64/kernel/cpu_errata.c | 6 ++++-- + arch/arm64/kernel/cpufeature.c | 2 +- + 3 files changed, 9 insertions(+), 3 deletions(-) + +--- a/arch/arm64/include/asm/cache.h ++++ b/arch/arm64/include/asm/cache.h +@@ -21,12 +21,16 @@ + #define CTR_L1IP_SHIFT 14 + #define CTR_L1IP_MASK 3 + #define CTR_DMINLINE_SHIFT 16 ++#define CTR_IMINLINE_SHIFT 0 + #define CTR_ERG_SHIFT 20 + #define CTR_CWG_SHIFT 24 + #define CTR_CWG_MASK 15 + #define CTR_IDC_SHIFT 28 + #define CTR_DIC_SHIFT 29 + ++#define CTR_CACHE_MINLINE_MASK \ ++ (0xf << CTR_DMINLINE_SHIFT | 0xf << CTR_IMINLINE_SHIFT) ++ + #define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK) + + #define ICACHE_POLICY_VPIPT 0 +--- a/arch/arm64/kernel/cpu_errata.c ++++ b/arch/arm64/kernel/cpu_errata.c +@@ -68,9 +68,11 @@ static bool + has_mismatched_cache_line_size(const struct arm64_cpu_capabilities *entry, + int scope) + { ++ u64 mask = CTR_CACHE_MINLINE_MASK; ++ + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); +- return (read_cpuid_cachetype() & arm64_ftr_reg_ctrel0.strict_mask) != +- (arm64_ftr_reg_ctrel0.sys_val & arm64_ftr_reg_ctrel0.strict_mask); ++ return (read_cpuid_cachetype() & mask) != ++ (arm64_ftr_reg_ctrel0.sys_val & mask); + } + + static void +--- a/arch/arm64/kernel/cpufeature.c ++++ b/arch/arm64/kernel/cpufeature.c +@@ -214,7 +214,7 @@ static const struct arm64_ftr_bits ftr_c + * If we have differing I-cache policies, report it as the weakest - VIPT. + */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_VIPT), /* L1Ip */ +- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */ ++ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_IMINLINE_SHIFT, 4, 0), + ARM64_FTR_END, + }; + diff --git a/queue-4.18/arm64-handle-mismatched-cache-type.patch b/queue-4.18/arm64-handle-mismatched-cache-type.patch new file mode 100644 index 00000000000..8b44e92492d --- /dev/null +++ b/queue-4.18/arm64-handle-mismatched-cache-type.patch @@ -0,0 +1,74 @@ +From 314d53d297980676011e6fd83dac60db4a01dc70 Mon Sep 17 00:00:00 2001 +From: Suzuki K Poulose +Date: Wed, 4 Jul 2018 23:07:46 +0100 +Subject: arm64: Handle mismatched cache type + +From: Suzuki K Poulose + +commit 314d53d297980676011e6fd83dac60db4a01dc70 upstream. + +Track mismatches in the cache type register (CTR_EL0), other +than the D/I min line sizes and trap user accesses if there are any. + +Fixes: be68a8aaf925 ("arm64: cpufeature: Fix CTR_EL0 field definitions") +Cc: +Cc: Mark Rutland +Cc: Will Deacon +Cc: Catalin Marinas +Signed-off-by: Suzuki K Poulose +Signed-off-by: Will Deacon +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm64/include/asm/cpucaps.h | 3 ++- + arch/arm64/kernel/cpu_errata.c | 17 ++++++++++++++--- + 2 files changed, 16 insertions(+), 4 deletions(-) + +--- a/arch/arm64/include/asm/cpucaps.h ++++ b/arch/arm64/include/asm/cpucaps.h +@@ -49,7 +49,8 @@ + #define ARM64_HAS_CACHE_DIC 28 + #define ARM64_HW_DBM 29 + #define ARM64_SSBD 30 ++#define ARM64_MISMATCHED_CACHE_TYPE 31 + +-#define ARM64_NCAPS 31 ++#define ARM64_NCAPS 32 + + #endif /* __ASM_CPUCAPS_H */ +--- a/arch/arm64/kernel/cpu_errata.c ++++ b/arch/arm64/kernel/cpu_errata.c +@@ -65,11 +65,15 @@ is_kryo_midr(const struct arm64_cpu_capa + } + + static bool +-has_mismatched_cache_line_size(const struct arm64_cpu_capabilities *entry, +- int scope) ++has_mismatched_cache_type(const struct arm64_cpu_capabilities *entry, ++ int scope) + { + u64 mask = CTR_CACHE_MINLINE_MASK; + ++ /* Skip matching the min line sizes for cache type check */ ++ if (entry->capability == ARM64_MISMATCHED_CACHE_TYPE) ++ mask ^= arm64_ftr_reg_ctrel0.strict_mask; ++ + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + return (read_cpuid_cachetype() & mask) != + (arm64_ftr_reg_ctrel0.sys_val & mask); +@@ -615,7 +619,14 @@ const struct arm64_cpu_capabilities arm6 + { + .desc = "Mismatched cache line size", + .capability = ARM64_MISMATCHED_CACHE_LINE_SIZE, +- .matches = has_mismatched_cache_line_size, ++ .matches = has_mismatched_cache_type, ++ .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, ++ .cpu_enable = cpu_enable_trap_ctr_access, ++ }, ++ { ++ .desc = "Mismatched cache type", ++ .capability = ARM64_MISMATCHED_CACHE_TYPE, ++ .matches = has_mismatched_cache_type, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .cpu_enable = cpu_enable_trap_ctr_access, + }, diff --git a/queue-4.18/arm64-mm-check-for-upper-page_shift-bits-in-pfn_valid.patch b/queue-4.18/arm64-mm-check-for-upper-page_shift-bits-in-pfn_valid.patch new file mode 100644 index 00000000000..c4fe5b57caa --- /dev/null +++ b/queue-4.18/arm64-mm-check-for-upper-page_shift-bits-in-pfn_valid.patch @@ -0,0 +1,59 @@ +From 5ad356eabc47d26a92140a0c4b20eba471c10de3 Mon Sep 17 00:00:00 2001 +From: Greg Hackmann +Date: Wed, 15 Aug 2018 12:51:21 -0700 +Subject: arm64: mm: check for upper PAGE_SHIFT bits in pfn_valid() + +From: Greg Hackmann + +commit 5ad356eabc47d26a92140a0c4b20eba471c10de3 upstream. + +ARM64's pfn_valid() shifts away the upper PAGE_SHIFT bits of the input +before seeing if the PFN is valid. This leads to false positives when +some of the upper bits are set, but the lower bits match a valid PFN. + +For example, the following userspace code looks up a bogus entry in +/proc/kpageflags: + + int pagemap = open("/proc/self/pagemap", O_RDONLY); + int pageflags = open("/proc/kpageflags", O_RDONLY); + uint64_t pfn, val; + + lseek64(pagemap, [...], SEEK_SET); + read(pagemap, &pfn, sizeof(pfn)); + if (pfn & (1UL << 63)) { /* valid PFN */ + pfn &= ((1UL << 55) - 1); /* clear flag bits */ + pfn |= (1UL << 55); + lseek64(pageflags, pfn * sizeof(uint64_t), SEEK_SET); + read(pageflags, &val, sizeof(val)); + } + +On ARM64 this causes the userspace process to crash with SIGSEGV rather +than reading (1 << KPF_NOPAGE). kpageflags_read() treats the offset as +valid, and stable_page_flags() will try to access an address between the +user and kernel address ranges. + +Fixes: c1cc1552616d ("arm64: MMU initialisation") +Cc: stable@vger.kernel.org +Signed-off-by: Greg Hackmann +Signed-off-by: Will Deacon +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm64/mm/init.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/arch/arm64/mm/init.c ++++ b/arch/arm64/mm/init.c +@@ -287,7 +287,11 @@ static void __init zone_sizes_init(unsig + #ifdef CONFIG_HAVE_ARCH_PFN_VALID + int pfn_valid(unsigned long pfn) + { +- return memblock_is_map_memory(pfn << PAGE_SHIFT); ++ phys_addr_t addr = pfn << PAGE_SHIFT; ++ ++ if ((addr >> PAGE_SHIFT) != pfn) ++ return 0; ++ return memblock_is_map_memory(addr); + } + EXPORT_SYMBOL(pfn_valid); + #endif diff --git a/queue-4.18/ext4-check-for-nul-characters-in-extended-attribute-s-name.patch b/queue-4.18/ext4-check-for-nul-characters-in-extended-attribute-s-name.patch new file mode 100644 index 00000000000..b7035911737 --- /dev/null +++ b/queue-4.18/ext4-check-for-nul-characters-in-extended-attribute-s-name.patch @@ -0,0 +1,38 @@ +From 7d95178c77014dbd8dce36ee40bbbc5e6c121ff5 Mon Sep 17 00:00:00 2001 +From: Theodore Ts'o +Date: Wed, 1 Aug 2018 12:36:52 -0400 +Subject: ext4: check for NUL characters in extended attribute's name + +From: Theodore Ts'o + +commit 7d95178c77014dbd8dce36ee40bbbc5e6c121ff5 upstream. + +Extended attribute names are defined to be NUL-terminated, so the name +must not contain a NUL character. This is important because there are +places when remove extended attribute, the code uses strlen to +determine the length of the entry. That should probably be fixed at +some point, but code is currently really messy, so the simplest fix +for now is to simply validate that the extended attributes are sane. + +https://bugzilla.kernel.org/show_bug.cgi?id=200401 + +Reported-by: Wen Xu +Signed-off-by: Theodore Ts'o +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/xattr.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/fs/ext4/xattr.c ++++ b/fs/ext4/xattr.c +@@ -190,6 +190,8 @@ ext4_xattr_check_entries(struct ext4_xat + struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e); + if ((void *)next >= end) + return -EFSCORRUPTED; ++ if (strnlen(e->e_name, e->e_name_len) != e->e_name_len) ++ return -EFSCORRUPTED; + e = next; + } + diff --git a/queue-4.18/ext4-fix-race-when-setting-the-bitmap-corrupted-flag.patch b/queue-4.18/ext4-fix-race-when-setting-the-bitmap-corrupted-flag.patch new file mode 100644 index 00000000000..c4ec22498b8 --- /dev/null +++ b/queue-4.18/ext4-fix-race-when-setting-the-bitmap-corrupted-flag.patch @@ -0,0 +1,70 @@ +From 9af0b3d1257756394ebbd06b14937b557e3a756b Mon Sep 17 00:00:00 2001 +From: Wang Shilong +Date: Sun, 29 Jul 2018 17:27:45 -0400 +Subject: ext4: fix race when setting the bitmap corrupted flag + +From: Wang Shilong + +commit 9af0b3d1257756394ebbd06b14937b557e3a756b upstream. + +Whenever we hit block or inode bitmap corruptions we set +bit and then reduce this block group free inode/clusters +counter to expose right available space. + +However some of ext4_mark_group_bitmap_corrupted() is called +inside group spinlock, some are not, this could make it happen +that we double reduce one block group free counters from system. + +Always hold group spinlock for it could fix it, but it looks +a little heavy, we could use test_and_set_bit() to fix race +problems here. + +Signed-off-by: Wang Shilong +Signed-off-by: Theodore Ts'o +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/super.c | 22 +++++++++++----------- + 1 file changed, 11 insertions(+), 11 deletions(-) + +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -776,26 +776,26 @@ void ext4_mark_group_bitmap_corrupted(st + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_group_info *grp = ext4_get_group_info(sb, group); + struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); ++ int ret; + +- if ((flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) && +- !EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) { +- percpu_counter_sub(&sbi->s_freeclusters_counter, +- grp->bb_free); +- set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, +- &grp->bb_state); ++ if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) { ++ ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, ++ &grp->bb_state); ++ if (!ret) ++ percpu_counter_sub(&sbi->s_freeclusters_counter, ++ grp->bb_free); + } + +- if ((flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT) && +- !EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) { +- if (gdp) { ++ if (flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT) { ++ ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, ++ &grp->bb_state); ++ if (!ret && gdp) { + int count; + + count = ext4_free_inodes_count(sb, gdp); + percpu_counter_sub(&sbi->s_freeinodes_counter, + count); + } +- set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, +- &grp->bb_state); + } + } + diff --git a/queue-4.18/ext4-reset-error-code-in-ext4_find_entry-in-fallback.patch b/queue-4.18/ext4-reset-error-code-in-ext4_find_entry-in-fallback.patch new file mode 100644 index 00000000000..626811100d5 --- /dev/null +++ b/queue-4.18/ext4-reset-error-code-in-ext4_find_entry-in-fallback.patch @@ -0,0 +1,37 @@ +From f39b3f45dbcb0343822cce31ea7636ad66e60bc2 Mon Sep 17 00:00:00 2001 +From: Eric Sandeen +Date: Sun, 29 Jul 2018 17:13:42 -0400 +Subject: ext4: reset error code in ext4_find_entry in fallback + +From: Eric Sandeen + +commit f39b3f45dbcb0343822cce31ea7636ad66e60bc2 upstream. + +When ext4_find_entry() falls back to "searching the old fashioned +way" due to a corrupt dx dir, it needs to reset the error code +to NULL so that the nonstandard ERR_BAD_DX_DIR code isn't returned +to userspace. + +https://bugzilla.kernel.org/show_bug.cgi?id=199947 + +Reported-by: Anatoly Trosinenko +Reviewed-by: Andreas Dilger +Signed-off-by: Eric Sandeen +Signed-off-by: Theodore Ts'o +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/namei.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/ext4/namei.c ++++ b/fs/ext4/namei.c +@@ -1398,6 +1398,7 @@ static struct buffer_head * ext4_find_en + goto cleanup_and_exit; + dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, " + "falling back\n")); ++ ret = NULL; + } + nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); + if (!nblocks) { diff --git a/queue-4.18/ext4-sysfs-print-ext4_super_block-fields-as-little-endian.patch b/queue-4.18/ext4-sysfs-print-ext4_super_block-fields-as-little-endian.patch new file mode 100644 index 00000000000..266d22eeba4 --- /dev/null +++ b/queue-4.18/ext4-sysfs-print-ext4_super_block-fields-as-little-endian.patch @@ -0,0 +1,59 @@ +From a4d2aadca184ece182418950d45ba4ffc7b652d2 Mon Sep 17 00:00:00 2001 +From: Arnd Bergmann +Date: Sun, 29 Jul 2018 15:48:00 -0400 +Subject: ext4: sysfs: print ext4_super_block fields as little-endian + +From: Arnd Bergmann + +commit a4d2aadca184ece182418950d45ba4ffc7b652d2 upstream. + +While working on extended rand for last_error/first_error timestamps, +I noticed that the endianess is wrong; we access the little-endian +fields in struct ext4_super_block as native-endian when we print them. + +This adds a special case in ext4_attr_show() and ext4_attr_store() +to byteswap the superblock fields if needed. + +In older kernels, this code was part of super.c, it got moved to +sysfs.c in linux-4.4. + +Cc: stable@vger.kernel.org +Fixes: 52c198c6820f ("ext4: add sysfs entry showing whether the fs contains errors") +Reviewed-by: Andreas Dilger +Signed-off-by: Arnd Bergmann +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/sysfs.c | 13 ++++++++++--- + 1 file changed, 10 insertions(+), 3 deletions(-) + +--- a/fs/ext4/sysfs.c ++++ b/fs/ext4/sysfs.c +@@ -274,8 +274,12 @@ static ssize_t ext4_attr_show(struct kob + case attr_pointer_ui: + if (!ptr) + return 0; +- return snprintf(buf, PAGE_SIZE, "%u\n", +- *((unsigned int *) ptr)); ++ if (a->attr_ptr == ptr_ext4_super_block_offset) ++ return snprintf(buf, PAGE_SIZE, "%u\n", ++ le32_to_cpup(ptr)); ++ else ++ return snprintf(buf, PAGE_SIZE, "%u\n", ++ *((unsigned int *) ptr)); + case attr_pointer_atomic: + if (!ptr) + return 0; +@@ -308,7 +312,10 @@ static ssize_t ext4_attr_store(struct ko + ret = kstrtoul(skip_spaces(buf), 0, &t); + if (ret) + return ret; +- *((unsigned int *) ptr) = t; ++ if (a->attr_ptr == ptr_ext4_super_block_offset) ++ *((__le32 *) ptr) = cpu_to_le32(t); ++ else ++ *((unsigned int *) ptr) = t; + return len; + case attr_inode_readahead: + return inode_readahead_blks_store(sbi, buf, len); diff --git a/queue-4.18/ext4-use-ext4_warning-for-sb_getblk-failure.patch b/queue-4.18/ext4-use-ext4_warning-for-sb_getblk-failure.patch new file mode 100644 index 00000000000..424b6cc4fe2 --- /dev/null +++ b/queue-4.18/ext4-use-ext4_warning-for-sb_getblk-failure.patch @@ -0,0 +1,52 @@ +From 5ef2a69993676a0dfd49bf60ae1323eb8a288366 Mon Sep 17 00:00:00 2001 +From: Wang Shilong +Date: Wed, 1 Aug 2018 12:02:31 -0400 +Subject: ext4: use ext4_warning() for sb_getblk failure + +From: Wang Shilong + +commit 5ef2a69993676a0dfd49bf60ae1323eb8a288366 upstream. + +Out of memory should not be considered as critical errors; so replace +ext4_error() with ext4_warnig(). + +Signed-off-by: Wang Shilong +Signed-off-by: Theodore Ts'o +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/balloc.c | 6 +++--- + fs/ext4/ialloc.c | 6 +++--- + 2 files changed, 6 insertions(+), 6 deletions(-) + +--- a/fs/ext4/balloc.c ++++ b/fs/ext4/balloc.c +@@ -426,9 +426,9 @@ ext4_read_block_bitmap_nowait(struct sup + } + bh = sb_getblk(sb, bitmap_blk); + if (unlikely(!bh)) { +- ext4_error(sb, "Cannot get buffer for block bitmap - " +- "block_group = %u, block_bitmap = %llu", +- block_group, bitmap_blk); ++ ext4_warning(sb, "Cannot get buffer for block bitmap - " ++ "block_group = %u, block_bitmap = %llu", ++ block_group, bitmap_blk); + return ERR_PTR(-ENOMEM); + } + +--- a/fs/ext4/ialloc.c ++++ b/fs/ext4/ialloc.c +@@ -138,9 +138,9 @@ ext4_read_inode_bitmap(struct super_bloc + } + bh = sb_getblk(sb, bitmap_blk); + if (unlikely(!bh)) { +- ext4_error(sb, "Cannot read inode bitmap - " +- "block_group = %u, inode_bitmap = %llu", +- block_group, bitmap_blk); ++ ext4_warning(sb, "Cannot read inode bitmap - " ++ "block_group = %u, inode_bitmap = %llu", ++ block_group, bitmap_blk); + return ERR_PTR(-ENOMEM); + } + if (bitmap_uptodate(bh)) diff --git a/queue-4.18/iommu-arm-smmu-error-out-only-if-not-enough-context-interrupts.patch b/queue-4.18/iommu-arm-smmu-error-out-only-if-not-enough-context-interrupts.patch new file mode 100644 index 00000000000..57a6619e351 --- /dev/null +++ b/queue-4.18/iommu-arm-smmu-error-out-only-if-not-enough-context-interrupts.patch @@ -0,0 +1,57 @@ +From d1e20222d5372e951bbb2fd3f6489ec4a6ea9b11 Mon Sep 17 00:00:00 2001 +From: Vivek Gautam +Date: Thu, 19 Jul 2018 23:23:56 +0530 +Subject: iommu/arm-smmu: Error out only if not enough context interrupts + +From: Vivek Gautam + +commit d1e20222d5372e951bbb2fd3f6489ec4a6ea9b11 upstream. + +Currently we check if the number of context banks is not equal to +num_context_interrupts. However, there are booloaders such as, one +on sdm845 that reserves few context banks and thus kernel views +less than the total available context banks. +So, although the hardware definition in device tree would mention +the correct number of context interrupts, this number can be +greater than the number of context banks visible to smmu in kernel. +We should therefore error out only when the number of context banks +is greater than the available number of context interrupts. + +Signed-off-by: Vivek Gautam +Suggested-by: Tomasz Figa +Cc: Robin Murphy +Cc: Will Deacon +[will: drop useless printk] +Signed-off-by: Will Deacon +Cc: Jitendra Bhivare +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/iommu/arm-smmu.c | 16 ++++++++++------ + 1 file changed, 10 insertions(+), 6 deletions(-) + +--- a/drivers/iommu/arm-smmu.c ++++ b/drivers/iommu/arm-smmu.c +@@ -2103,12 +2103,16 @@ static int arm_smmu_device_probe(struct + if (err) + return err; + +- if (smmu->version == ARM_SMMU_V2 && +- smmu->num_context_banks != smmu->num_context_irqs) { +- dev_err(dev, +- "found only %d context interrupt(s) but %d required\n", +- smmu->num_context_irqs, smmu->num_context_banks); +- return -ENODEV; ++ if (smmu->version == ARM_SMMU_V2) { ++ if (smmu->num_context_banks > smmu->num_context_irqs) { ++ dev_err(dev, ++ "found only %d context irq(s) but %d required\n", ++ smmu->num_context_irqs, smmu->num_context_banks); ++ return -ENODEV; ++ } ++ ++ /* Ignore superfluous interrupts */ ++ smmu->num_context_irqs = smmu->num_context_banks; + } + + for (i = 0; i < smmu->num_global_irqs; ++i) { diff --git a/queue-4.18/kprobes-arm64-fix-p-uses-in-error-messages.patch b/queue-4.18/kprobes-arm64-fix-p-uses-in-error-messages.patch new file mode 100644 index 00000000000..81b404ffa52 --- /dev/null +++ b/queue-4.18/kprobes-arm64-fix-p-uses-in-error-messages.patch @@ -0,0 +1,52 @@ +From 0722867dcbc28cc9b269b57acd847c7c1aa638d6 Mon Sep 17 00:00:00 2001 +From: Masami Hiramatsu +Date: Sat, 28 Apr 2018 21:38:04 +0900 +Subject: kprobes/arm64: Fix %p uses in error messages + +From: Masami Hiramatsu + +commit 0722867dcbc28cc9b269b57acd847c7c1aa638d6 upstream. + +Fix %p uses in error messages by removing it because +those are redundant or meaningless. + +Signed-off-by: Masami Hiramatsu +Acked-by: Will Deacon +Cc: Ananth N Mavinakayanahalli +Cc: Anil S Keshavamurthy +Cc: Arnd Bergmann +Cc: David Howells +Cc: David S . Miller +Cc: Heiko Carstens +Cc: Jon Medhurst +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: Thomas Richter +Cc: Tobin C . Harding +Cc: acme@kernel.org +Cc: akpm@linux-foundation.org +Cc: brueckner@linux.vnet.ibm.com +Cc: linux-arch@vger.kernel.org +Cc: rostedt@goodmis.org +Cc: schwidefsky@de.ibm.com +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/lkml/152491908405.9916.12425053035317241111.stgit@devbox +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm64/kernel/probes/kprobes.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/arm64/kernel/probes/kprobes.c ++++ b/arch/arm64/kernel/probes/kprobes.c +@@ -275,7 +275,7 @@ static int __kprobes reenter_kprobe(stru + break; + case KPROBE_HIT_SS: + case KPROBE_REENTER: +- pr_warn("Unrecoverable kprobe detected at %p.\n", p->addr); ++ pr_warn("Unrecoverable kprobe detected.\n"); + dump_kprobe(p); + BUG(); + break; diff --git a/queue-4.18/kvm-arm-arm64-fix-lost-irqs-from-emulated-physcial-timer-when-blocked.patch b/queue-4.18/kvm-arm-arm64-fix-lost-irqs-from-emulated-physcial-timer-when-blocked.patch new file mode 100644 index 00000000000..b3dbb61f5f7 --- /dev/null +++ b/queue-4.18/kvm-arm-arm64-fix-lost-irqs-from-emulated-physcial-timer-when-blocked.patch @@ -0,0 +1,53 @@ +From 245715cbe83ca934af5d20e078fd85175c62995e Mon Sep 17 00:00:00 2001 +From: Christoffer Dall +Date: Wed, 25 Jul 2018 10:21:28 +0100 +Subject: KVM: arm/arm64: Fix lost IRQs from emulated physcial timer when blocked + +From: Christoffer Dall + +commit 245715cbe83ca934af5d20e078fd85175c62995e upstream. + +When the VCPU is blocked (for example from WFI) we don't inject the +physical timer interrupt if it should fire while the CPU is blocked, but +instead we just wake up the VCPU and expect kvm_timer_vcpu_load to take +care of injecting the interrupt. + +Unfortunately, kvm_timer_vcpu_load() doesn't actually do that, it only +has support to schedule a soft timer if the emulated phys timer is +expected to fire in the future. + +Follow the same pattern as kvm_timer_update_state() and update the irq +state after potentially scheduling a soft timer. + +Reported-by: Andre Przywara +Cc: Stable # 4.15+ +Fixes: bbdd52cfcba29 ("KVM: arm/arm64: Avoid phys timer emulation in vcpu entry/exit") +Signed-off-by: Christoffer Dall +Signed-off-by: Marc Zyngier +Signed-off-by: Greg Kroah-Hartman + +--- + virt/kvm/arm/arch_timer.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/virt/kvm/arm/arch_timer.c ++++ b/virt/kvm/arm/arch_timer.c +@@ -487,6 +487,7 @@ void kvm_timer_vcpu_load(struct kvm_vcpu + { + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); ++ struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + + if (unlikely(!timer->enabled)) + return; +@@ -502,6 +503,10 @@ void kvm_timer_vcpu_load(struct kvm_vcpu + + /* Set the background timer for the physical timer emulation. */ + phys_timer_emulate(vcpu); ++ ++ /* If the timer fired while we weren't running, inject it now */ ++ if (kvm_timer_should_fire(ptimer) != ptimer->irq.level) ++ kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer); + } + + bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) diff --git a/queue-4.18/kvm-arm-arm64-fix-potential-loss-of-ptimer-interrupts.patch b/queue-4.18/kvm-arm-arm64-fix-potential-loss-of-ptimer-interrupts.patch new file mode 100644 index 00000000000..ef1b0fd0685 --- /dev/null +++ b/queue-4.18/kvm-arm-arm64-fix-potential-loss-of-ptimer-interrupts.patch @@ -0,0 +1,72 @@ +From 7afc4ddbf299a13aaf28406783d141a34c6b4f5a Mon Sep 17 00:00:00 2001 +From: Christoffer Dall +Date: Wed, 25 Jul 2018 10:21:27 +0100 +Subject: KVM: arm/arm64: Fix potential loss of ptimer interrupts + +From: Christoffer Dall + +commit 7afc4ddbf299a13aaf28406783d141a34c6b4f5a upstream. + +kvm_timer_update_state() is called when changing the phys timer +configuration registers, either via vcpu reset, as a result of a trap +from the guest, or when userspace programs the registers. + +phys_timer_emulate() is in turn called by kvm_timer_update_state() to +either cancel an existing software timer, or program a new software +timer, to emulate the behavior of a real phys timer, based on the change +in configuration registers. + +Unfortunately, the interaction between these two functions left a small +race; if the conceptual emulated phys timer should actually fire, but +the soft timer hasn't executed its callback yet, we cancel the timer in +phys_timer_emulate without injecting an irq. This only happens if the +check in kvm_timer_update_state is called before the timer should fire, +which is relatively unlikely, but possible. + +The solution is to update the state of the phys timer after calling +phys_timer_emulate, which will pick up the pending timer state and +update the interrupt value. + +Note that this leaves the opportunity of raising the interrupt twice, +once in the just-programmed soft timer, and once in +kvm_timer_update_state. Since this always happens synchronously with +the VCPU execution, there is no harm in this, and the guest ever only +sees a single timer interrupt. + +Cc: Stable # 4.15+ +Signed-off-by: Christoffer Dall +Signed-off-by: Marc Zyngier +Signed-off-by: Greg Kroah-Hartman + +--- + virt/kvm/arm/arch_timer.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/virt/kvm/arm/arch_timer.c ++++ b/virt/kvm/arm/arch_timer.c +@@ -295,9 +295,9 @@ static void phys_timer_emulate(struct kv + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + + /* +- * If the timer can fire now we have just raised the IRQ line and we +- * don't need to have a soft timer scheduled for the future. If the +- * timer cannot fire at all, then we also don't need a soft timer. ++ * If the timer can fire now, we don't need to have a soft timer ++ * scheduled for the future. If the timer cannot fire at all, ++ * then we also don't need a soft timer. + */ + if (kvm_timer_should_fire(ptimer) || !kvm_timer_irq_can_fire(ptimer)) { + soft_timer_cancel(&timer->phys_timer, NULL); +@@ -332,10 +332,10 @@ static void kvm_timer_update_state(struc + level = kvm_timer_should_fire(vtimer); + kvm_timer_update_irq(vcpu, level, vtimer); + ++ phys_timer_emulate(vcpu); ++ + if (kvm_timer_should_fire(ptimer) != ptimer->irq.level) + kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer); +- +- phys_timer_emulate(vcpu); + } + + static void vtimer_save_state(struct kvm_vcpu *vcpu) diff --git a/queue-4.18/kvm-arm-arm64-skip-updating-pmd-entry-if-no-change.patch b/queue-4.18/kvm-arm-arm64-skip-updating-pmd-entry-if-no-change.patch new file mode 100644 index 00000000000..9a71310cd44 --- /dev/null +++ b/queue-4.18/kvm-arm-arm64-skip-updating-pmd-entry-if-no-change.patch @@ -0,0 +1,86 @@ +From 86658b819cd0a9aa584cd84453ed268a6f013770 Mon Sep 17 00:00:00 2001 +From: Punit Agrawal +Date: Mon, 13 Aug 2018 11:43:50 +0100 +Subject: KVM: arm/arm64: Skip updating PMD entry if no change + +From: Punit Agrawal + +commit 86658b819cd0a9aa584cd84453ed268a6f013770 upstream. + +Contention on updating a PMD entry by a large number of vcpus can lead +to duplicate work when handling stage 2 page faults. As the page table +update follows the break-before-make requirement of the architecture, +it can lead to repeated refaults due to clearing the entry and +flushing the tlbs. + +This problem is more likely when - + +* there are large number of vcpus +* the mapping is large block mapping + +such as when using PMD hugepages (512MB) with 64k pages. + +Fix this by skipping the page table update if there is no change in +the entry being updated. + +Cc: stable@vger.kernel.org +Fixes: ad361f093c1e ("KVM: ARM: Support hugetlbfs backed huge pages") +Reviewed-by: Suzuki Poulose +Acked-by: Christoffer Dall +Signed-off-by: Punit Agrawal +Signed-off-by: Marc Zyngier +Signed-off-by: Greg Kroah-Hartman + +--- + virt/kvm/arm/mmu.c | 38 +++++++++++++++++++++++++++----------- + 1 file changed, 27 insertions(+), 11 deletions(-) + +--- a/virt/kvm/arm/mmu.c ++++ b/virt/kvm/arm/mmu.c +@@ -1015,19 +1015,35 @@ static int stage2_set_pmd_huge(struct kv + pmd = stage2_get_pmd(kvm, cache, addr); + VM_BUG_ON(!pmd); + +- /* +- * Mapping in huge pages should only happen through a fault. If a +- * page is merged into a transparent huge page, the individual +- * subpages of that huge page should be unmapped through MMU +- * notifiers before we get here. +- * +- * Merging of CompoundPages is not supported; they should become +- * splitting first, unmapped, merged, and mapped back in on-demand. +- */ +- VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd)); +- + old_pmd = *pmd; + if (pmd_present(old_pmd)) { ++ /* ++ * Multiple vcpus faulting on the same PMD entry, can ++ * lead to them sequentially updating the PMD with the ++ * same value. Following the break-before-make ++ * (pmd_clear() followed by tlb_flush()) process can ++ * hinder forward progress due to refaults generated ++ * on missing translations. ++ * ++ * Skip updating the page table if the entry is ++ * unchanged. ++ */ ++ if (pmd_val(old_pmd) == pmd_val(*new_pmd)) ++ return 0; ++ ++ /* ++ * Mapping in huge pages should only happen through a ++ * fault. If a page is merged into a transparent huge ++ * page, the individual subpages of that huge page ++ * should be unmapped through MMU notifiers before we ++ * get here. ++ * ++ * Merging of CompoundPages is not supported; they ++ * should become splitting first, unmapped, merged, ++ * and mapped back in on-demand. ++ */ ++ VM_BUG_ON(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd)); ++ + pmd_clear(pmd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + } else { diff --git a/queue-4.18/kvm-arm-arm64-skip-updating-pte-entry-if-no-change.patch b/queue-4.18/kvm-arm-arm64-skip-updating-pte-entry-if-no-change.patch new file mode 100644 index 00000000000..502063e4047 --- /dev/null +++ b/queue-4.18/kvm-arm-arm64-skip-updating-pte-entry-if-no-change.patch @@ -0,0 +1,41 @@ +From 976d34e2dab10ece5ea8fe7090b7692913f89084 Mon Sep 17 00:00:00 2001 +From: Punit Agrawal +Date: Mon, 13 Aug 2018 11:43:51 +0100 +Subject: KVM: arm/arm64: Skip updating PTE entry if no change + +From: Punit Agrawal + +commit 976d34e2dab10ece5ea8fe7090b7692913f89084 upstream. + +When there is contention on faulting in a particular page table entry +at stage 2, the break-before-make requirement of the architecture can +lead to additional refaulting due to TLB invalidation. + +Avoid this by skipping a page table update if the new value of the PTE +matches the previous value. + +Cc: stable@vger.kernel.org +Fixes: d5d8184d35c9 ("KVM: ARM: Memory virtualization setup") +Reviewed-by: Suzuki Poulose +Acked-by: Christoffer Dall +Signed-off-by: Punit Agrawal +Signed-off-by: Marc Zyngier +Signed-off-by: Greg Kroah-Hartman + +--- + virt/kvm/arm/mmu.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/virt/kvm/arm/mmu.c ++++ b/virt/kvm/arm/mmu.c +@@ -1118,6 +1118,10 @@ static int stage2_set_pte(struct kvm *kv + /* Create 2nd stage page table mapping - Level 3 */ + old_pte = *pte; + if (pte_present(old_pte)) { ++ /* Skip page table update if there is no change */ ++ if (pte_val(old_pte) == pte_val(*new_pte)) ++ return 0; ++ + kvm_set_pte(pte, __pte(0)); + kvm_tlb_flush_vmid_ipa(kvm, addr); + } else { diff --git a/queue-4.18/perf-kvm-fix-subcommands-on-s390.patch b/queue-4.18/perf-kvm-fix-subcommands-on-s390.patch new file mode 100644 index 00000000000..3f12a317dc5 --- /dev/null +++ b/queue-4.18/perf-kvm-fix-subcommands-on-s390.patch @@ -0,0 +1,45 @@ +From 8a95c8994509c55abf1e38c0cc037b1205725e21 Mon Sep 17 00:00:00 2001 +From: Thomas Richter +Date: Thu, 12 Jul 2018 09:09:36 +0200 +Subject: perf kvm: Fix subcommands on s390 + +From: Thomas Richter + +commit 8a95c8994509c55abf1e38c0cc037b1205725e21 upstream. + +With commit eca0fa28cd0d ("perf record: Provide detailed information on +s390 CPU") s390 platform provides detailed type/model/capacity +information in the CPU identifier string instead of just "IBM/S390". + +This breaks 'perf kvm' support which uses hard coded string IBM/S390 to +compare with the CPU identifier string. Fix this by changing the +comparison. + +Reported-by: Stefan Raspl +Signed-off-by: Thomas Richter +Reviewed-by: Hendrik Brueckner +Tested-by: Stefan Raspl +Acked-by: Christian Borntraeger +Cc: Heiko Carstens +Cc: Martin Schwidefsky +Cc: stable@vger.kernel.org +Fixes: eca0fa28cd0d ("perf record: Provide detailed information on s390 CPU") +Link: http://lkml.kernel.org/r/20180712070936.67547-1-tmricht@linux.ibm.com +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Greg Kroah-Hartman + +--- + tools/perf/arch/s390/util/kvm-stat.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/perf/arch/s390/util/kvm-stat.c ++++ b/tools/perf/arch/s390/util/kvm-stat.c +@@ -102,7 +102,7 @@ const char * const kvm_skip_events[] = { + + int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid) + { +- if (strstr(cpuid, "IBM/S390")) { ++ if (strstr(cpuid, "IBM")) { + kvm->exit_reasons = sie_exit_reasons; + kvm->exit_reasons_isa = "SIE"; + } else diff --git a/queue-4.18/printk-nmi-prevent-deadlock-when-accessing-the-main-log-buffer-in-nmi.patch b/queue-4.18/printk-nmi-prevent-deadlock-when-accessing-the-main-log-buffer-in-nmi.patch new file mode 100644 index 00000000000..4fa7b30bf9a --- /dev/null +++ b/queue-4.18/printk-nmi-prevent-deadlock-when-accessing-the-main-log-buffer-in-nmi.patch @@ -0,0 +1,232 @@ +From 03fc7f9c99c1e7ae2925d459e8487f1a6f199f79 Mon Sep 17 00:00:00 2001 +From: Petr Mladek +Date: Wed, 27 Jun 2018 16:20:28 +0200 +Subject: printk/nmi: Prevent deadlock when accessing the main log buffer in NMI + +From: Petr Mladek + +commit 03fc7f9c99c1e7ae2925d459e8487f1a6f199f79 upstream. + +The commit 719f6a7040f1bdaf96 ("printk: Use the main logbuf in NMI +when logbuf_lock is available") brought back the possible deadlocks +in printk() and NMI. + +The check of logbuf_lock is done only in printk_nmi_enter() to prevent +mixed output. But another CPU might take the lock later, enter NMI, and: + + + Both NMIs might be serialized by yet another lock, for example, + the one in nmi_cpu_backtrace(). + + + The other CPU might get stopped in NMI, see smp_send_stop() + in panic(). + +The only safe solution is to use trylock when storing the message +into the main log-buffer. It might cause reordering when some lines +go to the main lock buffer directly and others are delayed via +the per-CPU buffer. It means that it is not useful in general. + +This patch replaces the problematic NMI deferred context with NMI +direct context. It can be used to mark a code that might produce +many messages in NMI and the risk of losing them is more critical +than problems with eventual reordering. + +The context is then used when dumping trace buffers on oops. It was +the primary motivation for the original fix. Also the reordering is +even smaller issue there because some traces have their own time stamps. + +Finally, nmi_cpu_backtrace() need not longer be serialized because +it will always us the per-CPU buffers again. + +Fixes: 719f6a7040f1bdaf96 ("printk: Use the main logbuf in NMI when logbuf_lock is available") +Cc: stable@vger.kernel.org +Link: http://lkml.kernel.org/r/20180627142028.11259-1-pmladek@suse.com +To: Steven Rostedt +Cc: Peter Zijlstra +Cc: Tetsuo Handa +Cc: Sergey Senozhatsky +Cc: linux-kernel@vger.kernel.org +Cc: stable@vger.kernel.org +Acked-by: Sergey Senozhatsky +Signed-off-by: Petr Mladek +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/printk.h | 4 +++ + kernel/printk/internal.h | 9 ++++++ + kernel/printk/printk_safe.c | 58 ++++++++++++++++++++++++++++---------------- + kernel/trace/trace.c | 4 ++- + lib/nmi_backtrace.c | 3 -- + 5 files changed, 52 insertions(+), 26 deletions(-) + +--- a/include/linux/printk.h ++++ b/include/linux/printk.h +@@ -148,9 +148,13 @@ void early_printk(const char *s, ...) { + #ifdef CONFIG_PRINTK_NMI + extern void printk_nmi_enter(void); + extern void printk_nmi_exit(void); ++extern void printk_nmi_direct_enter(void); ++extern void printk_nmi_direct_exit(void); + #else + static inline void printk_nmi_enter(void) { } + static inline void printk_nmi_exit(void) { } ++static inline void printk_nmi_direct_enter(void) { } ++static inline void printk_nmi_direct_exit(void) { } + #endif /* PRINTK_NMI */ + + #ifdef CONFIG_PRINTK +--- a/kernel/printk/internal.h ++++ b/kernel/printk/internal.h +@@ -19,11 +19,16 @@ + #ifdef CONFIG_PRINTK + + #define PRINTK_SAFE_CONTEXT_MASK 0x3fffffff +-#define PRINTK_NMI_DEFERRED_CONTEXT_MASK 0x40000000 ++#define PRINTK_NMI_DIRECT_CONTEXT_MASK 0x40000000 + #define PRINTK_NMI_CONTEXT_MASK 0x80000000 + + extern raw_spinlock_t logbuf_lock; + ++__printf(5, 0) ++int vprintk_store(int facility, int level, ++ const char *dict, size_t dictlen, ++ const char *fmt, va_list args); ++ + __printf(1, 0) int vprintk_default(const char *fmt, va_list args); + __printf(1, 0) int vprintk_deferred(const char *fmt, va_list args); + __printf(1, 0) int vprintk_func(const char *fmt, va_list args); +@@ -54,6 +59,8 @@ void __printk_safe_exit(void); + local_irq_enable(); \ + } while (0) + ++void defer_console_output(void); ++ + #else + + __printf(1, 0) int vprintk_func(const char *fmt, va_list args) { return 0; } +--- a/kernel/printk/printk_safe.c ++++ b/kernel/printk/printk_safe.c +@@ -308,24 +308,33 @@ static __printf(1, 0) int vprintk_nmi(co + + void printk_nmi_enter(void) + { +- /* +- * The size of the extra per-CPU buffer is limited. Use it only when +- * the main one is locked. If this CPU is not in the safe context, +- * the lock must be taken on another CPU and we could wait for it. +- */ +- if ((this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK) && +- raw_spin_is_locked(&logbuf_lock)) { +- this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK); +- } else { +- this_cpu_or(printk_context, PRINTK_NMI_DEFERRED_CONTEXT_MASK); +- } ++ this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK); + } + + void printk_nmi_exit(void) + { +- this_cpu_and(printk_context, +- ~(PRINTK_NMI_CONTEXT_MASK | +- PRINTK_NMI_DEFERRED_CONTEXT_MASK)); ++ this_cpu_and(printk_context, ~PRINTK_NMI_CONTEXT_MASK); ++} ++ ++/* ++ * Marks a code that might produce many messages in NMI context ++ * and the risk of losing them is more critical than eventual ++ * reordering. ++ * ++ * It has effect only when called in NMI context. Then printk() ++ * will try to store the messages into the main logbuf directly ++ * and use the per-CPU buffers only as a fallback when the lock ++ * is not available. ++ */ ++void printk_nmi_direct_enter(void) ++{ ++ if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK) ++ this_cpu_or(printk_context, PRINTK_NMI_DIRECT_CONTEXT_MASK); ++} ++ ++void printk_nmi_direct_exit(void) ++{ ++ this_cpu_and(printk_context, ~PRINTK_NMI_DIRECT_CONTEXT_MASK); + } + + #else +@@ -363,6 +372,20 @@ void __printk_safe_exit(void) + + __printf(1, 0) int vprintk_func(const char *fmt, va_list args) + { ++ /* ++ * Try to use the main logbuf even in NMI. But avoid calling console ++ * drivers that might have their own locks. ++ */ ++ if ((this_cpu_read(printk_context) & PRINTK_NMI_DIRECT_CONTEXT_MASK) && ++ raw_spin_trylock(&logbuf_lock)) { ++ int len; ++ ++ len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args); ++ raw_spin_unlock(&logbuf_lock); ++ defer_console_output(); ++ return len; ++ } ++ + /* Use extra buffer in NMI when logbuf_lock is taken or in safe mode. */ + if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK) + return vprintk_nmi(fmt, args); +@@ -371,13 +394,6 @@ __printf(1, 0) int vprintk_func(const ch + if (this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK) + return vprintk_safe(fmt, args); + +- /* +- * Use the main logbuf when logbuf_lock is available in NMI. +- * But avoid calling console drivers that might have their own locks. +- */ +- if (this_cpu_read(printk_context) & PRINTK_NMI_DEFERRED_CONTEXT_MASK) +- return vprintk_deferred(fmt, args); +- + /* No obstacles. */ + return vprintk_default(fmt, args); + } +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -8288,6 +8288,7 @@ void ftrace_dump(enum ftrace_dump_mode o + tracing_off(); + + local_irq_save(flags); ++ printk_nmi_direct_enter(); + + /* Simulate the iterator */ + trace_init_global_iter(&iter); +@@ -8367,7 +8368,8 @@ void ftrace_dump(enum ftrace_dump_mode o + for_each_tracing_cpu(cpu) { + atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); + } +- atomic_dec(&dump_running); ++ atomic_dec(&dump_running); ++ printk_nmi_direct_exit(); + local_irq_restore(flags); + } + EXPORT_SYMBOL_GPL(ftrace_dump); +--- a/lib/nmi_backtrace.c ++++ b/lib/nmi_backtrace.c +@@ -87,11 +87,9 @@ void nmi_trigger_cpumask_backtrace(const + + bool nmi_cpu_backtrace(struct pt_regs *regs) + { +- static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED; + int cpu = smp_processor_id(); + + if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { +- arch_spin_lock(&lock); + if (regs && cpu_in_idle(instruction_pointer(regs))) { + pr_warn("NMI backtrace for cpu %d skipped: idling at %pS\n", + cpu, (void *)instruction_pointer(regs)); +@@ -102,7 +100,6 @@ bool nmi_cpu_backtrace(struct pt_regs *r + else + dump_stack(); + } +- arch_spin_unlock(&lock); + cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); + return true; + } diff --git a/queue-4.18/s390-kvm-fix-deadlock-when-killed-by-oom.patch b/queue-4.18/s390-kvm-fix-deadlock-when-killed-by-oom.patch new file mode 100644 index 00000000000..fe9b8962f4c --- /dev/null +++ b/queue-4.18/s390-kvm-fix-deadlock-when-killed-by-oom.patch @@ -0,0 +1,40 @@ +From 306d6c49ac9ded11114cb53b0925da52f2c2ada1 Mon Sep 17 00:00:00 2001 +From: Claudio Imbrenda +Date: Mon, 16 Jul 2018 10:38:57 +0200 +Subject: s390/kvm: fix deadlock when killed by oom + +From: Claudio Imbrenda + +commit 306d6c49ac9ded11114cb53b0925da52f2c2ada1 upstream. + +When the oom killer kills a userspace process in the page fault handler +while in guest context, the fault handler fails to release the mm_sem +if the FAULT_FLAG_RETRY_NOWAIT option is set. This leads to a deadlock +when tearing down the mm when the process terminates. This bug can only +happen when pfault is enabled, so only KVM clients are affected. + +The problem arises in the rare cases in which handle_mm_fault does not +release the mm_sem. This patch fixes the issue by manually releasing +the mm_sem when needed. + +Fixes: 24eb3a824c4f3 ("KVM: s390: Add FAULT_FLAG_RETRY_NOWAIT for guest fault") +Cc: # 3.15+ +Signed-off-by: Claudio Imbrenda +Signed-off-by: Martin Schwidefsky +Signed-off-by: Greg Kroah-Hartman + +--- + arch/s390/mm/fault.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/arch/s390/mm/fault.c ++++ b/arch/s390/mm/fault.c +@@ -502,6 +502,8 @@ retry: + /* No reason to continue if interrupted by SIGKILL. */ + if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) { + fault = VM_FAULT_SIGNAL; ++ if (flags & FAULT_FLAG_RETRY_NOWAIT) ++ goto out_up; + goto out; + } + if (unlikely(fault & VM_FAULT_ERROR)) diff --git a/queue-4.18/series b/queue-4.18/series index fcf7fcf7060..99e5db285e1 100644 --- a/queue-4.18/series +++ b/queue-4.18/series @@ -21,3 +21,23 @@ btrfs-send-fix-incorrect-file-layout-after-hole-punching-beyond-eof.patch hwmon-k10temp-27c-offset-needed-for-threadripper2.patch bpf-arm32-fix-stack-var-offset-in-jit.patch regulator-arizona-ldo1-use-correct-device-to-get-enable-gpio.patch +iommu-arm-smmu-error-out-only-if-not-enough-context-interrupts.patch +printk-nmi-prevent-deadlock-when-accessing-the-main-log-buffer-in-nmi.patch +kprobes-arm64-fix-p-uses-in-error-messages.patch +arm64-fix-mismatched-cache-line-size-detection.patch +arm64-handle-mismatched-cache-type.patch +arm64-mm-check-for-upper-page_shift-bits-in-pfn_valid.patch +arm64-dts-rockchip-corrected-uart1-clock-names-for-rk3328.patch +kvm-arm-arm64-fix-potential-loss-of-ptimer-interrupts.patch +kvm-arm-arm64-fix-lost-irqs-from-emulated-physcial-timer-when-blocked.patch +kvm-arm-arm64-skip-updating-pmd-entry-if-no-change.patch +kvm-arm-arm64-skip-updating-pte-entry-if-no-change.patch +s390-kvm-fix-deadlock-when-killed-by-oom.patch +perf-kvm-fix-subcommands-on-s390.patch +stop_machine-reflow-cpu_stop_queue_two_works.patch +stop_machine-atomically-queue-and-wake-stopper-threads.patch +ext4-check-for-nul-characters-in-extended-attribute-s-name.patch +ext4-use-ext4_warning-for-sb_getblk-failure.patch +ext4-sysfs-print-ext4_super_block-fields-as-little-endian.patch +ext4-reset-error-code-in-ext4_find_entry-in-fallback.patch +ext4-fix-race-when-setting-the-bitmap-corrupted-flag.patch diff --git a/queue-4.18/stop_machine-atomically-queue-and-wake-stopper-threads.patch b/queue-4.18/stop_machine-atomically-queue-and-wake-stopper-threads.patch new file mode 100644 index 00000000000..97690f0675d --- /dev/null +++ b/queue-4.18/stop_machine-atomically-queue-and-wake-stopper-threads.patch @@ -0,0 +1,97 @@ +From cfd355145c32bb7ccb65fccbe2d67280dc2119e1 Mon Sep 17 00:00:00 2001 +From: Prasad Sodagudi +Date: Fri, 3 Aug 2018 13:56:06 -0700 +Subject: stop_machine: Atomically queue and wake stopper threads + +From: Prasad Sodagudi + +commit cfd355145c32bb7ccb65fccbe2d67280dc2119e1 upstream. + +When cpu_stop_queue_work() releases the lock for the stopper +thread that was queued into its wake queue, preemption is +enabled, which leads to the following deadlock: + +CPU0 CPU1 +sched_setaffinity(0, ...) +__set_cpus_allowed_ptr() +stop_one_cpu(0, ...) stop_two_cpus(0, 1, ...) +cpu_stop_queue_work(0, ...) cpu_stop_queue_two_works(0, ..., 1, ...) + +-grabs lock for migration/0- + -spins with preemption disabled, + waiting for migration/0's lock to be + released- + +-adds work items for migration/0 +and queues migration/0 to its +wake_q- + +-releases lock for migration/0 + and preemption is enabled- + +-current thread is preempted, +and __set_cpus_allowed_ptr +has changed the thread's +cpu allowed mask to CPU1 only- + + -acquires migration/0 and migration/1's + locks- + + -adds work for migration/0 but does not + add migration/0 to wake_q, since it is + already in a wake_q- + + -adds work for migration/1 and adds + migration/1 to its wake_q- + + -releases migration/0 and migration/1's + locks, wakes migration/1, and enables + preemption- + + -since migration/1 is requested to run, + migration/1 begins to run and waits on + migration/0, but migration/0 will never + be able to run, since the thread that + can wake it is affine to CPU1- + +Disable preemption in cpu_stop_queue_work() before queueing works for +stopper threads, and queueing the stopper thread in the wake queue, to +ensure that the operation of queueing the works and waking the stopper +threads is atomic. + +Fixes: 0b26351b910f ("stop_machine, sched: Fix migrate_swap() vs. active_balance() deadlock") +Signed-off-by: Prasad Sodagudi +Signed-off-by: Isaac J. Manjarres +Signed-off-by: Thomas Gleixner +Cc: peterz@infradead.org +Cc: matt@codeblueprint.co.uk +Cc: bigeasy@linutronix.de +Cc: gregkh@linuxfoundation.org +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/1533329766-4856-1-git-send-email-isaacm@codeaurora.org +Signed-off-by: Greg Kroah-Hartman + +Co-Developed-by: Isaac J. Manjarres + +--- + kernel/stop_machine.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/kernel/stop_machine.c ++++ b/kernel/stop_machine.c +@@ -81,6 +81,7 @@ static bool cpu_stop_queue_work(unsigned + unsigned long flags; + bool enabled; + ++ preempt_disable(); + raw_spin_lock_irqsave(&stopper->lock, flags); + enabled = stopper->enabled; + if (enabled) +@@ -90,6 +91,7 @@ static bool cpu_stop_queue_work(unsigned + raw_spin_unlock_irqrestore(&stopper->lock, flags); + + wake_up_q(&wakeq); ++ preempt_enable(); + + return enabled; + } diff --git a/queue-4.18/stop_machine-reflow-cpu_stop_queue_two_works.patch b/queue-4.18/stop_machine-reflow-cpu_stop_queue_two_works.patch new file mode 100644 index 00000000000..4b8a1b27022 --- /dev/null +++ b/queue-4.18/stop_machine-reflow-cpu_stop_queue_two_works.patch @@ -0,0 +1,110 @@ +From b80a2bfce85e1051056d98d04ecb2d0b55cbbc1c Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Mon, 30 Jul 2018 13:21:40 +0200 +Subject: stop_machine: Reflow cpu_stop_queue_two_works() + +From: Peter Zijlstra + +commit b80a2bfce85e1051056d98d04ecb2d0b55cbbc1c upstream. + +The code flow in cpu_stop_queue_two_works() is a little arcane; fix this by +lifting the preempt_disable() to the top to create more natural nesting wrt +the spinlocks and make the wake_up_q() and preempt_enable() unconditional +at the end. + +Furthermore, enable preemption in the -EDEADLK case, such that we spin-wait +with preemption enabled. + +Suggested-by: Thomas Gleixner +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Thomas Gleixner +Cc: Sebastian Andrzej Siewior +Cc: isaacm@codeaurora.org +Cc: matt@codeblueprint.co.uk +Cc: psodagud@codeaurora.org +Cc: gregkh@linuxfoundation.org +Cc: pkondeti@codeaurora.org +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20180730112140.GH2494@hirez.programming.kicks-ass.net +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/stop_machine.c | 41 +++++++++++++++++++++++------------------ + 1 file changed, 23 insertions(+), 18 deletions(-) + +--- a/kernel/stop_machine.c ++++ b/kernel/stop_machine.c +@@ -236,13 +236,24 @@ static int cpu_stop_queue_two_works(int + struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2); + DEFINE_WAKE_Q(wakeq); + int err; ++ + retry: ++ /* ++ * The waking up of stopper threads has to happen in the same ++ * scheduling context as the queueing. Otherwise, there is a ++ * possibility of one of the above stoppers being woken up by another ++ * CPU, and preempting us. This will cause us to not wake up the other ++ * stopper forever. ++ */ ++ preempt_disable(); + raw_spin_lock_irq(&stopper1->lock); + raw_spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING); + +- err = -ENOENT; +- if (!stopper1->enabled || !stopper2->enabled) ++ if (!stopper1->enabled || !stopper2->enabled) { ++ err = -ENOENT; + goto unlock; ++ } ++ + /* + * Ensure that if we race with __stop_cpus() the stoppers won't get + * queued up in reverse order leading to system deadlock. +@@ -253,36 +264,30 @@ retry: + * It can be falsely true but it is safe to spin until it is cleared, + * queue_stop_cpus_work() does everything under preempt_disable(). + */ +- err = -EDEADLK; +- if (unlikely(stop_cpus_in_progress)) +- goto unlock; ++ if (unlikely(stop_cpus_in_progress)) { ++ err = -EDEADLK; ++ goto unlock; ++ } + + err = 0; + __cpu_stop_queue_work(stopper1, work1, &wakeq); + __cpu_stop_queue_work(stopper2, work2, &wakeq); +- /* +- * The waking up of stopper threads has to happen +- * in the same scheduling context as the queueing. +- * Otherwise, there is a possibility of one of the +- * above stoppers being woken up by another CPU, +- * and preempting us. This will cause us to n ot +- * wake up the other stopper forever. +- */ +- preempt_disable(); ++ + unlock: + raw_spin_unlock(&stopper2->lock); + raw_spin_unlock_irq(&stopper1->lock); + + if (unlikely(err == -EDEADLK)) { ++ preempt_enable(); ++ + while (stop_cpus_in_progress) + cpu_relax(); ++ + goto retry; + } + +- if (!err) { +- wake_up_q(&wakeq); +- preempt_enable(); +- } ++ wake_up_q(&wakeq); ++ preempt_enable(); + + return err; + }