From: Greg Kroah-Hartman Date: Tue, 3 Mar 2020 17:06:55 +0000 (+0100) Subject: 5.5-stable patches X-Git-Tag: v4.19.108~15 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=77cfc80d30b98a044c012fde16142d9289da7a69;p=thirdparty%2Fkernel%2Fstable-queue.git 5.5-stable patches added patches: bus-tegra-aconnect-remove-pm_clk-dependency.patch clk-qcom-rpmh-sort-of-match-table.patch drivers-net-xgene-fix-the-order-of-the-arguments-of-alloc_etherdev_mqs.patch f2fs-fix-to-add-swap-extent-correctly.patch ima-ima-lsm-policy-rule-loading-logic-bug-fixes.patch kprobes-set-unoptimized-flag-after-unoptimizing-code.patch kvm-nvmx-vmwrite-checks-unsupported-field-before-read-only-field.patch kvm-nvmx-vmwrite-checks-vmcs-link-pointer-before-vmcs-field.patch kvm-x86-fix-kvm_bitmap_or_dest_vcpus-to-use-irq-shorthand.patch kvm-x86-remove-spurious-clearing-of-async-pf-msr.patch kvm-x86-remove-spurious-kvm_mmu_unload-from-vcpu-destruction-path.patch lib-vdso-make-__arch_update_vdso_data-logic-understandable.patch lib-vdso-update-coarse-timekeeper-unconditionally.patch locking-lockdep-fix-lockdep_stats-indentation-problem.patch mm-debug.c-always-print-flags-in-dump_page.patch mm-gup-allow-foll_force-for-get_user_pages_fast.patch mm-huge_memory.c-use-head-to-check-huge-zero-page.patch mm-thp-fix-defrag-setting-if-newline-is-not-used.patch netfilter-nf_flowtable-fix-documentation.patch netfilter-nft_tunnel-no-need-to-call-htons-when-dumping-ports.patch padata-always-acquire-cpu_hotplug_lock-before-pinst-lock.patch perf-hists-browser-restore-esc-as-zoom-out-of-dso-thread-etc.patch perf-maps-add-missing-unlock-to-maps__insert-error-case.patch perf-ui-gtk-add-missing-zalloc-object.patch pwm-omap-dmtimer-put_device-after-of_find_device_by_node.patch rcu-allow-only-one-expedited-gp-to-run-concurrently-with-wakeups.patch rdma-hns-bugfix-for-posting-a-wqe-with-sge.patch rdma-hns-simplify-the-calculation-and-usage-of-wqe-idx-for-post-verbs.patch thermal-brcmstb_thermal-do-not-use-dt-coefficients.patch thermal-db8500-depromote-debug-print.patch ubifs-fix-ino_t-format-warnings-in-orphan_delete.patch x86-resctrl-check-monitoring-static-key-in-the-mbm-overflow-handler.patch xfs-clear-kernel-only-flags-in-xfs_ioc_attrmulti_by_handle.patch --- diff --git a/queue-5.5/bus-tegra-aconnect-remove-pm_clk-dependency.patch b/queue-5.5/bus-tegra-aconnect-remove-pm_clk-dependency.patch new file mode 100644 index 00000000000..5a099ad8cfe --- /dev/null +++ b/queue-5.5/bus-tegra-aconnect-remove-pm_clk-dependency.patch @@ -0,0 +1,32 @@ +From 2f56acf818a08a9187ac8ec6e3d994fc13dc368d Mon Sep 17 00:00:00 2001 +From: Sameer Pujar +Date: Thu, 20 Jun 2019 21:21:59 +0530 +Subject: bus: tegra-aconnect: Remove PM_CLK dependency + +From: Sameer Pujar + +commit 2f56acf818a08a9187ac8ec6e3d994fc13dc368d upstream. + +The ACONNECT bus driver does not use pm-clk interface anymore and hence +the dependency can be removed from its Kconfig option. + +Fixes: 0d7dab926130 ("bus: tegra-aconnect: use devm_clk_*() helpers") +Signed-off-by: Sameer Pujar +Acked-by: Jon Hunter +Signed-off-by: Thierry Reding +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/bus/Kconfig | 1 - + 1 file changed, 1 deletion(-) + +--- a/drivers/bus/Kconfig ++++ b/drivers/bus/Kconfig +@@ -139,7 +139,6 @@ config TEGRA_ACONNECT + tristate "Tegra ACONNECT Bus Driver" + depends on ARCH_TEGRA_210_SOC + depends on OF && PM +- select PM_CLK + help + Driver for the Tegra ACONNECT bus which is used to interface with + the devices inside the Audio Processing Engine (APE) for Tegra210. diff --git a/queue-5.5/clk-qcom-rpmh-sort-of-match-table.patch b/queue-5.5/clk-qcom-rpmh-sort-of-match-table.patch new file mode 100644 index 00000000000..78141a376c2 --- /dev/null +++ b/queue-5.5/clk-qcom-rpmh-sort-of-match-table.patch @@ -0,0 +1,34 @@ +From 9e0cda721d18f44f1cd74d3a426931d71c1f1b30 Mon Sep 17 00:00:00 2001 +From: Bjorn Andersson +Date: Fri, 24 Jan 2020 09:59:34 -0800 +Subject: clk: qcom: rpmh: Sort OF match table + +From: Bjorn Andersson + +commit 9e0cda721d18f44f1cd74d3a426931d71c1f1b30 upstream. + +sc7180 was added to the end of the match table, sort the table. + +Fixes: eee28109f871 ("clk: qcom: clk-rpmh: Add support for RPMHCC for SC7180") +Signed-off-by: Bjorn Andersson +Link: https://lkml.kernel.org/r/20200124175934.3937473-1-bjorn.andersson@linaro.org +Signed-off-by: Stephen Boyd +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/clk/qcom/clk-rpmh.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/clk/qcom/clk-rpmh.c ++++ b/drivers/clk/qcom/clk-rpmh.c +@@ -481,9 +481,9 @@ static int clk_rpmh_probe(struct platfor + } + + static const struct of_device_id clk_rpmh_match_table[] = { ++ { .compatible = "qcom,sc7180-rpmh-clk", .data = &clk_rpmh_sc7180}, + { .compatible = "qcom,sdm845-rpmh-clk", .data = &clk_rpmh_sdm845}, + { .compatible = "qcom,sm8150-rpmh-clk", .data = &clk_rpmh_sm8150}, +- { .compatible = "qcom,sc7180-rpmh-clk", .data = &clk_rpmh_sc7180}, + { } + }; + MODULE_DEVICE_TABLE(of, clk_rpmh_match_table); diff --git a/queue-5.5/drivers-net-xgene-fix-the-order-of-the-arguments-of-alloc_etherdev_mqs.patch b/queue-5.5/drivers-net-xgene-fix-the-order-of-the-arguments-of-alloc_etherdev_mqs.patch new file mode 100644 index 00000000000..8cd97803d4e --- /dev/null +++ b/queue-5.5/drivers-net-xgene-fix-the-order-of-the-arguments-of-alloc_etherdev_mqs.patch @@ -0,0 +1,37 @@ +From 5a44c71ccda60a50073c5d7fe3f694cdfa3ab0c2 Mon Sep 17 00:00:00 2001 +From: Christophe JAILLET +Date: Sun, 26 Jan 2020 11:44:29 +0100 +Subject: drivers: net: xgene: Fix the order of the arguments of 'alloc_etherdev_mqs()' + +From: Christophe JAILLET + +commit 5a44c71ccda60a50073c5d7fe3f694cdfa3ab0c2 upstream. + +'alloc_etherdev_mqs()' expects first 'tx', then 'rx'. The semantic here +looks reversed. + +Reorder the arguments passed to 'alloc_etherdev_mqs()' in order to keep +the correct semantic. + +In fact, this is a no-op because both XGENE_NUM_[RT]X_RING are 8. + +Fixes: 107dec2749fe ("drivers: net: xgene: Add support for multiple queues") +Signed-off-by: Christophe JAILLET +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/ethernet/apm/xgene/xgene_enet_main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c ++++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +@@ -2020,7 +2020,7 @@ static int xgene_enet_probe(struct platf + int ret; + + ndev = alloc_etherdev_mqs(sizeof(struct xgene_enet_pdata), +- XGENE_NUM_RX_RING, XGENE_NUM_TX_RING); ++ XGENE_NUM_TX_RING, XGENE_NUM_RX_RING); + if (!ndev) + return -ENOMEM; + diff --git a/queue-5.5/f2fs-fix-to-add-swap-extent-correctly.patch b/queue-5.5/f2fs-fix-to-add-swap-extent-correctly.patch new file mode 100644 index 00000000000..7c1968748b8 --- /dev/null +++ b/queue-5.5/f2fs-fix-to-add-swap-extent-correctly.patch @@ -0,0 +1,120 @@ +From 3e5e479a39ce9ed60cd63f7565cc1d9da77c2a4e Mon Sep 17 00:00:00 2001 +From: Chao Yu +Date: Fri, 27 Dec 2019 18:44:56 +0800 +Subject: f2fs: fix to add swap extent correctly + +From: Chao Yu + +commit 3e5e479a39ce9ed60cd63f7565cc1d9da77c2a4e upstream. + +As Youling reported in mailing list: + +https://www.linuxquestions.org/questions/linux-newbie-8/the-file-system-f2fs-is-broken-4175666043/ + +https://www.linux.org/threads/the-file-system-f2fs-is-broken.26490/ + +There is a test case can corrupt f2fs image: +- dd if=/dev/zero of=/swapfile bs=1M count=4096 +- chmod 600 /swapfile +- mkswap /swapfile +- swapon --discard /swapfile + +The root cause is f2fs_swap_activate() intends to return zero value +to setup_swap_extents() to enable SWP_FS mode (swap file goes through +fs), in this flow, setup_swap_extents() setups swap extent with wrong +block address range, result in discard_swap() erasing incorrect address. + +Because f2fs_swap_activate() has pinned swapfile, its data block +address will not change, it's safe to let swap to handle IO through +raw device, so we can get rid of SWAP_FS mode and initial swap extents +inside f2fs_swap_activate(), by this way, later discard_swap() can trim +in right address range. + +Fixes: 4969c06a0d83 ("f2fs: support swap file w/ DIO") +Signed-off-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Signed-off-by: Greg Kroah-Hartman + +--- + fs/f2fs/data.c | 32 +++++++++++++++++++++++++------- + 1 file changed, 25 insertions(+), 7 deletions(-) + +--- a/fs/f2fs/data.c ++++ b/fs/f2fs/data.c +@@ -3132,7 +3132,8 @@ int f2fs_migrate_page(struct address_spa + + #ifdef CONFIG_SWAP + /* Copied from generic_swapfile_activate() to check any holes */ +-static int check_swap_activate(struct file *swap_file, unsigned int max) ++static int check_swap_activate(struct swap_info_struct *sis, ++ struct file *swap_file, sector_t *span) + { + struct address_space *mapping = swap_file->f_mapping; + struct inode *inode = mapping->host; +@@ -3143,6 +3144,8 @@ static int check_swap_activate(struct fi + sector_t last_block; + sector_t lowest_block = -1; + sector_t highest_block = 0; ++ int nr_extents = 0; ++ int ret; + + blkbits = inode->i_blkbits; + blocks_per_page = PAGE_SIZE >> blkbits; +@@ -3154,7 +3157,8 @@ static int check_swap_activate(struct fi + probe_block = 0; + page_no = 0; + last_block = i_size_read(inode) >> blkbits; +- while ((probe_block + blocks_per_page) <= last_block && page_no < max) { ++ while ((probe_block + blocks_per_page) <= last_block && ++ page_no < sis->max) { + unsigned block_in_page; + sector_t first_block; + +@@ -3194,13 +3198,27 @@ static int check_swap_activate(struct fi + highest_block = first_block; + } + ++ /* ++ * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks ++ */ ++ ret = add_swap_extent(sis, page_no, 1, first_block); ++ if (ret < 0) ++ goto out; ++ nr_extents += ret; + page_no++; + probe_block += blocks_per_page; + reprobe: + continue; + } +- return 0; +- ++ ret = nr_extents; ++ *span = 1 + highest_block - lowest_block; ++ if (page_no == 0) ++ page_no = 1; /* force Empty message */ ++ sis->max = page_no; ++ sis->pages = page_no - 1; ++ sis->highest_bit = page_no - 1; ++out: ++ return ret; + bad_bmap: + pr_err("swapon: swapfile has holes\n"); + return -EINVAL; +@@ -3222,14 +3240,14 @@ static int f2fs_swap_activate(struct swa + if (ret) + return ret; + +- ret = check_swap_activate(file, sis->max); +- if (ret) ++ ret = check_swap_activate(sis, file, span); ++ if (ret < 0) + return ret; + + set_inode_flag(inode, FI_PIN_FILE); + f2fs_precache_extents(inode); + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); +- return 0; ++ return ret; + } + + static void f2fs_swap_deactivate(struct file *file) diff --git a/queue-5.5/ima-ima-lsm-policy-rule-loading-logic-bug-fixes.patch b/queue-5.5/ima-ima-lsm-policy-rule-loading-logic-bug-fixes.patch new file mode 100644 index 00000000000..f6f817de5d8 --- /dev/null +++ b/queue-5.5/ima-ima-lsm-policy-rule-loading-logic-bug-fixes.patch @@ -0,0 +1,140 @@ +From 483ec26eed42bf050931d9a5c5f9f0b5f2ad5f3b Mon Sep 17 00:00:00 2001 +From: Janne Karhunen +Date: Wed, 15 Jan 2020 17:42:30 +0200 +Subject: ima: ima/lsm policy rule loading logic bug fixes + +From: Janne Karhunen + +commit 483ec26eed42bf050931d9a5c5f9f0b5f2ad5f3b upstream. + +Keep the ima policy rules around from the beginning even if they appear +invalid at the time of loading, as they may become active after an lsm +policy load. However, loading a custom IMA policy with unknown LSM +labels is only safe after we have transitioned from the "built-in" +policy rules to a custom IMA policy. + +Patch also fixes the rule re-use during the lsm policy reload and makes +some prints a bit more human readable. + +Changelog: +v4: +- Do not allow the initial policy load refer to non-existing lsm rules. +v3: +- Fix too wide policy rule matching for non-initialized LSMs +v2: +- Fix log prints + +Fixes: b16942455193 ("ima: use the lsm policy update notifier") +Cc: Casey Schaufler +Reported-by: Mimi Zohar +Signed-off-by: Janne Karhunen +Signed-off-by: Konsta Karsisto +Signed-off-by: Mimi Zohar +Signed-off-by: Greg Kroah-Hartman + +--- + security/integrity/ima/ima_policy.c | 44 +++++++++++++++++++++--------------- + 1 file changed, 26 insertions(+), 18 deletions(-) + +--- a/security/integrity/ima/ima_policy.c ++++ b/security/integrity/ima/ima_policy.c +@@ -263,7 +263,7 @@ static void ima_lsm_free_rule(struct ima + static struct ima_rule_entry *ima_lsm_copy_rule(struct ima_rule_entry *entry) + { + struct ima_rule_entry *nentry; +- int i, result; ++ int i; + + nentry = kmalloc(sizeof(*nentry), GFP_KERNEL); + if (!nentry) +@@ -277,7 +277,7 @@ static struct ima_rule_entry *ima_lsm_co + memset(nentry->lsm, 0, sizeof_field(struct ima_rule_entry, lsm)); + + for (i = 0; i < MAX_LSM_RULES; i++) { +- if (!entry->lsm[i].rule) ++ if (!entry->lsm[i].args_p) + continue; + + nentry->lsm[i].type = entry->lsm[i].type; +@@ -286,13 +286,13 @@ static struct ima_rule_entry *ima_lsm_co + if (!nentry->lsm[i].args_p) + goto out_err; + +- result = security_filter_rule_init(nentry->lsm[i].type, +- Audit_equal, +- nentry->lsm[i].args_p, +- &nentry->lsm[i].rule); +- if (result == -EINVAL) +- pr_warn("ima: rule for LSM \'%d\' is undefined\n", +- entry->lsm[i].type); ++ security_filter_rule_init(nentry->lsm[i].type, ++ Audit_equal, ++ nentry->lsm[i].args_p, ++ &nentry->lsm[i].rule); ++ if (!nentry->lsm[i].rule) ++ pr_warn("rule for LSM \'%s\' is undefined\n", ++ (char *)entry->lsm[i].args_p); + } + return nentry; + +@@ -329,7 +329,7 @@ static void ima_lsm_update_rules(void) + list_for_each_entry_safe(entry, e, &ima_policy_rules, list) { + needs_update = 0; + for (i = 0; i < MAX_LSM_RULES; i++) { +- if (entry->lsm[i].rule) { ++ if (entry->lsm[i].args_p) { + needs_update = 1; + break; + } +@@ -339,8 +339,7 @@ static void ima_lsm_update_rules(void) + + result = ima_lsm_update_rule(entry); + if (result) { +- pr_err("ima: lsm rule update error %d\n", +- result); ++ pr_err("lsm rule update error %d\n", result); + return; + } + } +@@ -357,7 +356,7 @@ int ima_lsm_policy_change(struct notifie + } + + /** +- * ima_match_rules - determine whether an inode matches the measure rule. ++ * ima_match_rules - determine whether an inode matches the policy rule. + * @rule: a pointer to a rule + * @inode: a pointer to an inode + * @cred: a pointer to a credentials structure for user validation +@@ -415,9 +414,12 @@ static bool ima_match_rules(struct ima_r + int rc = 0; + u32 osid; + +- if (!rule->lsm[i].rule) +- continue; +- ++ if (!rule->lsm[i].rule) { ++ if (!rule->lsm[i].args_p) ++ continue; ++ else ++ return false; ++ } + switch (i) { + case LSM_OBJ_USER: + case LSM_OBJ_ROLE: +@@ -823,8 +825,14 @@ static int ima_lsm_rule_init(struct ima_ + entry->lsm[lsm_rule].args_p, + &entry->lsm[lsm_rule].rule); + if (!entry->lsm[lsm_rule].rule) { +- kfree(entry->lsm[lsm_rule].args_p); +- return -EINVAL; ++ pr_warn("rule for LSM \'%s\' is undefined\n", ++ (char *)entry->lsm[lsm_rule].args_p); ++ ++ if (ima_rules == &ima_default_rules) { ++ kfree(entry->lsm[lsm_rule].args_p); ++ result = -EINVAL; ++ } else ++ result = 0; + } + + return result; diff --git a/queue-5.5/kprobes-set-unoptimized-flag-after-unoptimizing-code.patch b/queue-5.5/kprobes-set-unoptimized-flag-after-unoptimizing-code.patch new file mode 100644 index 00000000000..aea05e81f66 --- /dev/null +++ b/queue-5.5/kprobes-set-unoptimized-flag-after-unoptimizing-code.patch @@ -0,0 +1,89 @@ +From f66c0447cca1281116224d474cdb37d6a18e4b5b Mon Sep 17 00:00:00 2001 +From: Masami Hiramatsu +Date: Wed, 27 Nov 2019 14:57:04 +0900 +Subject: kprobes: Set unoptimized flag after unoptimizing code + +From: Masami Hiramatsu + +commit f66c0447cca1281116224d474cdb37d6a18e4b5b upstream. + +Set the unoptimized flag after confirming the code is completely +unoptimized. Without this fix, when a kprobe hits the intermediate +modified instruction (the first byte is replaced by an INT3, but +later bytes can still be a jump address operand) while unoptimizing, +it can return to the middle byte of the modified code, which causes +an invalid instruction exception in the kernel. + +Usually, this is a rare case, but if we put a probe on the function +call while text patching, it always causes a kernel panic as below: + + # echo p text_poke+5 > kprobe_events + # echo 1 > events/kprobes/enable + # echo 0 > events/kprobes/enable + +invalid opcode: 0000 [#1] PREEMPT SMP PTI + RIP: 0010:text_poke+0x9/0x50 + Call Trace: + arch_unoptimize_kprobe+0x22/0x28 + arch_unoptimize_kprobes+0x39/0x87 + kprobe_optimizer+0x6e/0x290 + process_one_work+0x2a0/0x610 + worker_thread+0x28/0x3d0 + ? process_one_work+0x610/0x610 + kthread+0x10d/0x130 + ? kthread_park+0x80/0x80 + ret_from_fork+0x3a/0x50 + +text_poke() is used for patching the code in optprobes. + +This can happen even if we blacklist text_poke() and other functions, +because there is a small time window during which we show the intermediate +code to other CPUs. + + [ mingo: Edited the changelog. ] + +Tested-by: Alexei Starovoitov +Signed-off-by: Masami Hiramatsu +Cc: Andy Lutomirski +Cc: Borislav Petkov +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Steven Rostedt +Cc: Thomas Gleixner +Cc: bristot@redhat.com +Fixes: 6274de4984a6 ("kprobes: Support delayed unoptimizing") +Link: https://lkml.kernel.org/r/157483422375.25881.13508326028469515760.stgit@devnote2 +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/kprobes.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/kernel/kprobes.c ++++ b/kernel/kprobes.c +@@ -510,6 +510,8 @@ static void do_unoptimize_kprobes(void) + arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list); + /* Loop free_list for disarming */ + list_for_each_entry_safe(op, tmp, &freeing_list, list) { ++ /* Switching from detour code to origin */ ++ op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; + /* Disarm probes if marked disabled */ + if (kprobe_disabled(&op->kp)) + arch_disarm_kprobe(&op->kp); +@@ -665,6 +667,7 @@ static void force_unoptimize_kprobe(stru + { + lockdep_assert_cpus_held(); + arch_unoptimize_kprobe(op); ++ op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; + if (kprobe_disabled(&op->kp)) + arch_disarm_kprobe(&op->kp); + } +@@ -681,7 +684,6 @@ static void unoptimize_kprobe(struct kpr + if (!kprobe_optimized(p)) + return; + +- op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; + if (!list_empty(&op->list)) { + if (optprobe_queued_unopt(op)) { + /* Queued in unoptimizing queue */ diff --git a/queue-5.5/kvm-nvmx-vmwrite-checks-unsupported-field-before-read-only-field.patch b/queue-5.5/kvm-nvmx-vmwrite-checks-unsupported-field-before-read-only-field.patch new file mode 100644 index 00000000000..3081720ac59 --- /dev/null +++ b/queue-5.5/kvm-nvmx-vmwrite-checks-unsupported-field-before-read-only-field.patch @@ -0,0 +1,55 @@ +From 693e02cc24090c379217138719d9d84e50036b24 Mon Sep 17 00:00:00 2001 +From: Jim Mattson +Date: Fri, 6 Dec 2019 15:46:36 -0800 +Subject: kvm: nVMX: VMWRITE checks unsupported field before read-only field + +From: Jim Mattson + +commit 693e02cc24090c379217138719d9d84e50036b24 upstream. + +According to the SDM, VMWRITE checks to see if the secondary source +operand corresponds to an unsupported VMCS field before it checks to +see if the secondary source operand corresponds to a VM-exit +information field and the processor does not support writing to +VM-exit information fields. + +Fixes: 49f705c5324aa ("KVM: nVMX: Implement VMREAD and VMWRITE") +Signed-off-by: Jim Mattson +Cc: Paolo Bonzini +Reviewed-by: Peter Shier +Reviewed-by: Oliver Upton +Reviewed-by: Jon Cargille +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/vmx/nested.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +--- a/arch/x86/kvm/vmx/nested.c ++++ b/arch/x86/kvm/vmx/nested.c +@@ -4940,6 +4940,12 @@ static int handle_vmwrite(struct kvm_vcp + + + field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); ++ ++ offset = vmcs_field_to_offset(field); ++ if (offset < 0) ++ return nested_vmx_failValid(vcpu, ++ VMXERR_UNSUPPORTED_VMCS_COMPONENT); ++ + /* + * If the vCPU supports "VMWRITE to any supported field in the + * VMCS," then the "read-only" fields are actually read/write. +@@ -4956,11 +4962,6 @@ static int handle_vmwrite(struct kvm_vcp + if (!is_guest_mode(vcpu) && !is_shadow_field_rw(field)) + copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12); + +- offset = vmcs_field_to_offset(field); +- if (offset < 0) +- return nested_vmx_failValid(vcpu, +- VMXERR_UNSUPPORTED_VMCS_COMPONENT); +- + /* + * Some Intel CPUs intentionally drop the reserved bits of the AR byte + * fields on VMWRITE. Emulate this behavior to ensure consistent KVM diff --git a/queue-5.5/kvm-nvmx-vmwrite-checks-vmcs-link-pointer-before-vmcs-field.patch b/queue-5.5/kvm-nvmx-vmwrite-checks-vmcs-link-pointer-before-vmcs-field.patch new file mode 100644 index 00000000000..074cfac1cb3 --- /dev/null +++ b/queue-5.5/kvm-nvmx-vmwrite-checks-vmcs-link-pointer-before-vmcs-field.patch @@ -0,0 +1,130 @@ +From dd2d6042b7f4a5440705b4ffc6c4c2dba81a43b7 Mon Sep 17 00:00:00 2001 +From: Jim Mattson +Date: Fri, 6 Dec 2019 15:46:35 -0800 +Subject: kvm: nVMX: VMWRITE checks VMCS-link pointer before VMCS field + +From: Jim Mattson + +commit dd2d6042b7f4a5440705b4ffc6c4c2dba81a43b7 upstream. + +According to the SDM, a VMWRITE in VMX non-root operation with an +invalid VMCS-link pointer results in VMfailInvalid before the validity +of the VMCS field in the secondary source operand is checked. + +For consistency, modify both handle_vmwrite and handle_vmread, even +though there was no problem with the latter. + +Fixes: 6d894f498f5d1 ("KVM: nVMX: vmread/vmwrite: Use shadow vmcs12 if running L2") +Signed-off-by: Jim Mattson +Cc: Liran Alon +Cc: Paolo Bonzini +Cc: Vitaly Kuznetsov +Reviewed-by: Peter Shier +Reviewed-by: Oliver Upton +Reviewed-by: Jon Cargille +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/vmx/nested.c | 59 +++++++++++++++++++--------------------------- + 1 file changed, 25 insertions(+), 34 deletions(-) + +--- a/arch/x86/kvm/vmx/nested.c ++++ b/arch/x86/kvm/vmx/nested.c +@@ -4808,32 +4808,28 @@ static int handle_vmread(struct kvm_vcpu + { + unsigned long field; + u64 field_value; ++ struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); + int len; + gva_t gva = 0; +- struct vmcs12 *vmcs12; ++ struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu) ++ : get_vmcs12(vcpu); + struct x86_exception e; + short offset; + + if (!nested_vmx_check_permission(vcpu)) + return 1; + +- if (to_vmx(vcpu)->nested.current_vmptr == -1ull) ++ /* ++ * In VMX non-root operation, when the VMCS-link pointer is -1ull, ++ * any VMREAD sets the ALU flags for VMfailInvalid. ++ */ ++ if (vmx->nested.current_vmptr == -1ull || ++ (is_guest_mode(vcpu) && ++ get_vmcs12(vcpu)->vmcs_link_pointer == -1ull)) + return nested_vmx_failInvalid(vcpu); + +- if (!is_guest_mode(vcpu)) +- vmcs12 = get_vmcs12(vcpu); +- else { +- /* +- * When vmcs->vmcs_link_pointer is -1ull, any VMREAD +- * to shadowed-field sets the ALU flags for VMfailInvalid. +- */ +- if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull) +- return nested_vmx_failInvalid(vcpu); +- vmcs12 = get_shadow_vmcs12(vcpu); +- } +- + /* Decode instruction info and find the field to read */ + field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); + +@@ -4912,13 +4908,20 @@ static int handle_vmwrite(struct kvm_vcp + */ + u64 field_value = 0; + struct x86_exception e; +- struct vmcs12 *vmcs12; ++ struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu) ++ : get_vmcs12(vcpu); + short offset; + + if (!nested_vmx_check_permission(vcpu)) + return 1; + +- if (vmx->nested.current_vmptr == -1ull) ++ /* ++ * In VMX non-root operation, when the VMCS-link pointer is -1ull, ++ * any VMWRITE sets the ALU flags for VMfailInvalid. ++ */ ++ if (vmx->nested.current_vmptr == -1ull || ++ (is_guest_mode(vcpu) && ++ get_vmcs12(vcpu)->vmcs_link_pointer == -1ull)) + return nested_vmx_failInvalid(vcpu); + + if (vmx_instruction_info & (1u << 10)) +@@ -4946,24 +4949,12 @@ static int handle_vmwrite(struct kvm_vcp + return nested_vmx_failValid(vcpu, + VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT); + +- if (!is_guest_mode(vcpu)) { +- vmcs12 = get_vmcs12(vcpu); +- +- /* +- * Ensure vmcs12 is up-to-date before any VMWRITE that dirties +- * vmcs12, else we may crush a field or consume a stale value. +- */ +- if (!is_shadow_field_rw(field)) +- copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12); +- } else { +- /* +- * When vmcs->vmcs_link_pointer is -1ull, any VMWRITE +- * to shadowed-field sets the ALU flags for VMfailInvalid. +- */ +- if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull) +- return nested_vmx_failInvalid(vcpu); +- vmcs12 = get_shadow_vmcs12(vcpu); +- } ++ /* ++ * Ensure vmcs12 is up-to-date before any VMWRITE that dirties ++ * vmcs12, else we may crush a field or consume a stale value. ++ */ ++ if (!is_guest_mode(vcpu) && !is_shadow_field_rw(field)) ++ copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12); + + offset = vmcs_field_to_offset(field); + if (offset < 0) diff --git a/queue-5.5/kvm-x86-fix-kvm_bitmap_or_dest_vcpus-to-use-irq-shorthand.patch b/queue-5.5/kvm-x86-fix-kvm_bitmap_or_dest_vcpus-to-use-irq-shorthand.patch new file mode 100644 index 00000000000..97d8894adc7 --- /dev/null +++ b/queue-5.5/kvm-x86-fix-kvm_bitmap_or_dest_vcpus-to-use-irq-shorthand.patch @@ -0,0 +1,33 @@ +From b4b2963616bbd91ebb33148522552e1135de56ae Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Wed, 4 Dec 2019 20:07:16 +0100 +Subject: KVM: X86: Fix kvm_bitmap_or_dest_vcpus() to use irq shorthand + +From: Peter Xu + +commit b4b2963616bbd91ebb33148522552e1135de56ae upstream. + +The 3rd parameter of kvm_apic_match_dest() is the irq shorthand, +rather than the irq delivery mode. + +Fixes: 7ee30bc132c6 ("KVM: x86: deliver KVM IOAPIC scan request to target vCPUs") +Reviewed-by: Vitaly Kuznetsov +Signed-off-by: Peter Xu +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/lapic.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kvm/lapic.c ++++ b/arch/x86/kvm/lapic.c +@@ -1150,7 +1150,7 @@ void kvm_bitmap_or_dest_vcpus(struct kvm + if (!kvm_apic_present(vcpu)) + continue; + if (!kvm_apic_match_dest(vcpu, NULL, +- irq->delivery_mode, ++ irq->shorthand, + irq->dest_id, + irq->dest_mode)) + continue; diff --git a/queue-5.5/kvm-x86-remove-spurious-clearing-of-async-pf-msr.patch b/queue-5.5/kvm-x86-remove-spurious-clearing-of-async-pf-msr.patch new file mode 100644 index 00000000000..881bce705ec --- /dev/null +++ b/queue-5.5/kvm-x86-remove-spurious-clearing-of-async-pf-msr.patch @@ -0,0 +1,41 @@ +From 208050dac5ef4de5cb83ffcafa78499c94d0b5ad Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Wed, 18 Dec 2019 13:55:06 -0800 +Subject: KVM: x86: Remove spurious clearing of async #PF MSR + +From: Sean Christopherson + +commit 208050dac5ef4de5cb83ffcafa78499c94d0b5ad upstream. + +Remove a bogus clearing of apf.msr_val from kvm_arch_vcpu_destroy(). + +apf.msr_val is only set to a non-zero value by kvm_pv_enable_async_pf(), +which is only reachable by kvm_set_msr_common(), i.e. by writing +MSR_KVM_ASYNC_PF_EN. KVM does not autonomously write said MSR, i.e. +can only be written via KVM_SET_MSRS or KVM_RUN. Since KVM_SET_MSRS and +KVM_RUN are vcpu ioctls, they require a valid vcpu file descriptor. +kvm_arch_vcpu_destroy() is only called if KVM_CREATE_VCPU fails, and KVM +declares KVM_CREATE_VCPU successful once the vcpu fd is installed and +thus visible to userspace. Ergo, apf.msr_val cannot be non-zero when +kvm_arch_vcpu_destroy() is called. + +Fixes: 344d9588a9df0 ("KVM: Add PV MSR to enable asynchronous page faults delivery.") +Signed-off-by: Sean Christopherson +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/x86.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -9227,8 +9227,6 @@ void kvm_arch_vcpu_postcreate(struct kvm + + void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) + { +- vcpu->arch.apf.msr_val = 0; +- + kvm_arch_vcpu_free(vcpu); + } + diff --git a/queue-5.5/kvm-x86-remove-spurious-kvm_mmu_unload-from-vcpu-destruction-path.patch b/queue-5.5/kvm-x86-remove-spurious-kvm_mmu_unload-from-vcpu-destruction-path.patch new file mode 100644 index 00000000000..07eee63bb8a --- /dev/null +++ b/queue-5.5/kvm-x86-remove-spurious-kvm_mmu_unload-from-vcpu-destruction-path.patch @@ -0,0 +1,40 @@ +From 9d979c7e6ff43ca3200ffcb74f57415fd633a2da Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Wed, 18 Dec 2019 13:55:05 -0800 +Subject: KVM: x86: Remove spurious kvm_mmu_unload() from vcpu destruction path + +From: Sean Christopherson + +commit 9d979c7e6ff43ca3200ffcb74f57415fd633a2da upstream. + +x86 does not load its MMU until KVM_RUN, which cannot be invoked until +after vCPU creation succeeds. Given that kvm_arch_vcpu_destroy() is +called if and only if vCPU creation fails, it is impossible for the MMU +to be loaded. + +Note, the bogus kvm_mmu_unload() call was added during an unrelated +refactoring of vCPU allocation, i.e. was presumably added as an +opportunstic "fix" for a perceived leak. + +Fixes: fb3f0f51d92d1 ("KVM: Dynamically allocate vcpus") +Signed-off-by: Sean Christopherson +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/x86.c | 4 ---- + 1 file changed, 4 deletions(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -9229,10 +9229,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vc + { + vcpu->arch.apf.msr_val = 0; + +- vcpu_load(vcpu); +- kvm_mmu_unload(vcpu); +- vcpu_put(vcpu); +- + kvm_arch_vcpu_free(vcpu); + } + diff --git a/queue-5.5/lib-vdso-make-__arch_update_vdso_data-logic-understandable.patch b/queue-5.5/lib-vdso-make-__arch_update_vdso_data-logic-understandable.patch new file mode 100644 index 00000000000..f1ddc778a38 --- /dev/null +++ b/queue-5.5/lib-vdso-make-__arch_update_vdso_data-logic-understandable.patch @@ -0,0 +1,63 @@ +From 9a6b55ac4a44060bcb782baf002859b2a2c63267 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Tue, 14 Jan 2020 19:52:38 +0100 +Subject: lib/vdso: Make __arch_update_vdso_data() logic understandable + +From: Thomas Gleixner + +commit 9a6b55ac4a44060bcb782baf002859b2a2c63267 upstream. + +The function name suggests that this is a boolean checking whether the +architecture asks for an update of the VDSO data, but it works the other +way round. To spare further confusion invert the logic. + +Fixes: 44f57d788e7d ("timekeeping: Provide a generic update_vsyscall() implementation") +Signed-off-by: Thomas Gleixner +Link: https://lore.kernel.org/r/20200114185946.656652824@linutronix.de +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm/include/asm/vdso/vsyscall.h | 4 ++-- + include/asm-generic/vdso/vsyscall.h | 4 ++-- + kernel/time/vsyscall.c | 2 +- + 3 files changed, 5 insertions(+), 5 deletions(-) + +--- a/arch/arm/include/asm/vdso/vsyscall.h ++++ b/arch/arm/include/asm/vdso/vsyscall.h +@@ -34,9 +34,9 @@ struct vdso_data *__arm_get_k_vdso_data( + #define __arch_get_k_vdso_data __arm_get_k_vdso_data + + static __always_inline +-int __arm_update_vdso_data(void) ++bool __arm_update_vdso_data(void) + { +- return !cntvct_ok; ++ return cntvct_ok; + } + #define __arch_update_vdso_data __arm_update_vdso_data + +--- a/include/asm-generic/vdso/vsyscall.h ++++ b/include/asm-generic/vdso/vsyscall.h +@@ -12,9 +12,9 @@ static __always_inline struct vdso_data + #endif /* __arch_get_k_vdso_data */ + + #ifndef __arch_update_vdso_data +-static __always_inline int __arch_update_vdso_data(void) ++static __always_inline bool __arch_update_vdso_data(void) + { +- return 0; ++ return true; + } + #endif /* __arch_update_vdso_data */ + +--- a/kernel/time/vsyscall.c ++++ b/kernel/time/vsyscall.c +@@ -84,7 +84,7 @@ void update_vsyscall(struct timekeeper * + struct vdso_timestamp *vdso_ts; + u64 nsec; + +- if (__arch_update_vdso_data()) { ++ if (!__arch_update_vdso_data()) { + /* + * Some architectures might want to skip the update of the + * data page. diff --git a/queue-5.5/lib-vdso-update-coarse-timekeeper-unconditionally.patch b/queue-5.5/lib-vdso-update-coarse-timekeeper-unconditionally.patch new file mode 100644 index 00000000000..532096c70e2 --- /dev/null +++ b/queue-5.5/lib-vdso-update-coarse-timekeeper-unconditionally.patch @@ -0,0 +1,105 @@ +From 9f24c540f7f8eb3a981528da9a9a636a5bdf5987 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Tue, 14 Jan 2020 19:52:39 +0100 +Subject: lib/vdso: Update coarse timekeeper unconditionally + +From: Thomas Gleixner + +commit 9f24c540f7f8eb3a981528da9a9a636a5bdf5987 upstream. + +The low resolution parts of the VDSO, i.e.: + + clock_gettime(CLOCK_*_COARSE), clock_getres(), time() + +can be used even if there is no VDSO capable clocksource. + +But if an architecture opts out of the VDSO data update then this +information becomes stale. This affects ARM when there is no architected +timer available. The lack of update causes userspace to use stale data +forever. + +Make the update of the low resolution parts unconditional and only skip +the update of the high resolution parts if the architecture requests it. + +Fixes: 44f57d788e7d ("timekeeping: Provide a generic update_vsyscall() implementation") +Signed-off-by: Thomas Gleixner +Link: https://lore.kernel.org/r/20200114185946.765577901@linutronix.de +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/time/vsyscall.c | 37 +++++++++++++++++-------------------- + 1 file changed, 17 insertions(+), 20 deletions(-) + +--- a/kernel/time/vsyscall.c ++++ b/kernel/time/vsyscall.c +@@ -28,11 +28,6 @@ static inline void update_vdso_data(stru + vdata[CS_RAW].mult = tk->tkr_raw.mult; + vdata[CS_RAW].shift = tk->tkr_raw.shift; + +- /* CLOCK_REALTIME */ +- vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_REALTIME]; +- vdso_ts->sec = tk->xtime_sec; +- vdso_ts->nsec = tk->tkr_mono.xtime_nsec; +- + /* CLOCK_MONOTONIC */ + vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_MONOTONIC]; + vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; +@@ -70,12 +65,6 @@ static inline void update_vdso_data(stru + vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_TAI]; + vdso_ts->sec = tk->xtime_sec + (s64)tk->tai_offset; + vdso_ts->nsec = tk->tkr_mono.xtime_nsec; +- +- /* +- * Read without the seqlock held by clock_getres(). +- * Note: No need to have a second copy. +- */ +- WRITE_ONCE(vdata[CS_HRES_COARSE].hrtimer_res, hrtimer_resolution); + } + + void update_vsyscall(struct timekeeper *tk) +@@ -84,20 +73,17 @@ void update_vsyscall(struct timekeeper * + struct vdso_timestamp *vdso_ts; + u64 nsec; + +- if (!__arch_update_vdso_data()) { +- /* +- * Some architectures might want to skip the update of the +- * data page. +- */ +- return; +- } +- + /* copy vsyscall data */ + vdso_write_begin(vdata); + + vdata[CS_HRES_COARSE].clock_mode = __arch_get_clock_mode(tk); + vdata[CS_RAW].clock_mode = __arch_get_clock_mode(tk); + ++ /* CLOCK_REALTIME also required for time() */ ++ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_REALTIME]; ++ vdso_ts->sec = tk->xtime_sec; ++ vdso_ts->nsec = tk->tkr_mono.xtime_nsec; ++ + /* CLOCK_REALTIME_COARSE */ + vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_REALTIME_COARSE]; + vdso_ts->sec = tk->xtime_sec; +@@ -110,7 +96,18 @@ void update_vsyscall(struct timekeeper * + nsec = nsec + tk->wall_to_monotonic.tv_nsec; + vdso_ts->sec += __iter_div_u64_rem(nsec, NSEC_PER_SEC, &vdso_ts->nsec); + +- update_vdso_data(vdata, tk); ++ /* ++ * Read without the seqlock held by clock_getres(). ++ * Note: No need to have a second copy. ++ */ ++ WRITE_ONCE(vdata[CS_HRES_COARSE].hrtimer_res, hrtimer_resolution); ++ ++ /* ++ * Architectures can opt out of updating the high resolution part ++ * of the VDSO. ++ */ ++ if (__arch_update_vdso_data()) ++ update_vdso_data(vdata, tk); + + __arch_update_vsyscall(vdata, tk); + diff --git a/queue-5.5/locking-lockdep-fix-lockdep_stats-indentation-problem.patch b/queue-5.5/locking-lockdep-fix-lockdep_stats-indentation-problem.patch new file mode 100644 index 00000000000..6f1f9d10220 --- /dev/null +++ b/queue-5.5/locking-lockdep-fix-lockdep_stats-indentation-problem.patch @@ -0,0 +1,53 @@ +From a030f9767da1a6bbcec840fc54770eb11c2414b6 Mon Sep 17 00:00:00 2001 +From: Waiman Long +Date: Wed, 11 Dec 2019 16:31:39 -0500 +Subject: locking/lockdep: Fix lockdep_stats indentation problem + +From: Waiman Long + +commit a030f9767da1a6bbcec840fc54770eb11c2414b6 upstream. + +It was found that two lines in the output of /proc/lockdep_stats have +indentation problem: + + # cat /proc/lockdep_stats + : + in-process chains: 25057 + stack-trace entries: 137827 [max: 524288] + number of stack traces: 7973 + number of stack hash chains: 6355 + combined max dependencies: 1356414598 + hardirq-safe locks: 57 + hardirq-unsafe locks: 1286 + : + +All the numbers displayed in /proc/lockdep_stats except the two stack +trace numbers are formatted with a field with of 11. To properly align +all the numbers, a field width of 11 is now added to the two stack +trace numbers. + +Fixes: 8c779229d0f4 ("locking/lockdep: Report more stack trace statistics") +Signed-off-by: Waiman Long +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Bart Van Assche +Link: https://lkml.kernel.org/r/20191211213139.29934-1-longman@redhat.com +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/locking/lockdep_proc.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/kernel/locking/lockdep_proc.c ++++ b/kernel/locking/lockdep_proc.c +@@ -286,9 +286,9 @@ static int lockdep_stats_show(struct seq + seq_printf(m, " stack-trace entries: %11lu [max: %lu]\n", + nr_stack_trace_entries, MAX_STACK_TRACE_ENTRIES); + #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) +- seq_printf(m, " number of stack traces: %llu\n", ++ seq_printf(m, " number of stack traces: %11llu\n", + lockdep_stack_trace_count()); +- seq_printf(m, " number of stack hash chains: %llu\n", ++ seq_printf(m, " number of stack hash chains: %11llu\n", + lockdep_stack_hash_count()); + #endif + seq_printf(m, " combined max dependencies: %11u\n", diff --git a/queue-5.5/mm-debug.c-always-print-flags-in-dump_page.patch b/queue-5.5/mm-debug.c-always-print-flags-in-dump_page.patch new file mode 100644 index 00000000000..908bc946a6d --- /dev/null +++ b/queue-5.5/mm-debug.c-always-print-flags-in-dump_page.patch @@ -0,0 +1,75 @@ +From 5b57b8f22709f07c0ab5921c94fd66e8c59c3e11 Mon Sep 17 00:00:00 2001 +From: Vlastimil Babka +Date: Thu, 30 Jan 2020 22:12:03 -0800 +Subject: mm/debug.c: always print flags in dump_page() + +From: Vlastimil Babka + +commit 5b57b8f22709f07c0ab5921c94fd66e8c59c3e11 upstream. + +Commit 76a1850e4572 ("mm/debug.c: __dump_page() prints an extra line") +inadvertently removed printing of page flags for pages that are neither +anon nor ksm nor have a mapping. Fix that. + +Using pr_cont() again would be a solution, but the commit explicitly +removed its use. Avoiding the danger of mixing up split lines from +multiple CPUs might be beneficial for near-panic dumps like this, so fix +this without reintroducing pr_cont(). + +Link: http://lkml.kernel.org/r/9f884d5c-ca60-dc7b-219c-c081c755fab6@suse.cz +Fixes: 76a1850e4572 ("mm/debug.c: __dump_page() prints an extra line") +Signed-off-by: Vlastimil Babka +Reported-by: Anshuman Khandual +Reported-by: Michal Hocko +Acked-by: Michal Hocko +Cc: David Hildenbrand +Cc: Qian Cai +Cc: Oscar Salvador +Cc: Mel Gorman +Cc: Mike Rapoport +Cc: Dan Williams +Cc: Pavel Tatashin +Cc: Ralph Campbell +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/debug.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/mm/debug.c ++++ b/mm/debug.c +@@ -47,6 +47,7 @@ void __dump_page(struct page *page, cons + struct address_space *mapping; + bool page_poisoned = PagePoisoned(page); + int mapcount; ++ char *type = ""; + + /* + * If struct page is poisoned don't access Page*() functions as that +@@ -78,9 +79,9 @@ void __dump_page(struct page *page, cons + page, page_ref_count(page), mapcount, + page->mapping, page_to_pgoff(page)); + if (PageKsm(page)) +- pr_warn("ksm flags: %#lx(%pGp)\n", page->flags, &page->flags); ++ type = "ksm "; + else if (PageAnon(page)) +- pr_warn("anon flags: %#lx(%pGp)\n", page->flags, &page->flags); ++ type = "anon "; + else if (mapping) { + if (mapping->host && mapping->host->i_dentry.first) { + struct dentry *dentry; +@@ -88,10 +89,11 @@ void __dump_page(struct page *page, cons + pr_warn("%ps name:\"%pd\"\n", mapping->a_ops, dentry); + } else + pr_warn("%ps\n", mapping->a_ops); +- pr_warn("flags: %#lx(%pGp)\n", page->flags, &page->flags); + } + BUILD_BUG_ON(ARRAY_SIZE(pageflag_names) != __NR_PAGEFLAGS + 1); + ++ pr_warn("%sflags: %#lx(%pGp)\n", type, page->flags, &page->flags); ++ + hex_only: + print_hex_dump(KERN_WARNING, "raw: ", DUMP_PREFIX_NONE, 32, + sizeof(unsigned long), page, diff --git a/queue-5.5/mm-gup-allow-foll_force-for-get_user_pages_fast.patch b/queue-5.5/mm-gup-allow-foll_force-for-get_user_pages_fast.patch new file mode 100644 index 00000000000..47819a291d9 --- /dev/null +++ b/queue-5.5/mm-gup-allow-foll_force-for-get_user_pages_fast.patch @@ -0,0 +1,65 @@ +From f4000fdf435b8301a11cf85237c561047f8c4c72 Mon Sep 17 00:00:00 2001 +From: John Hubbard +Date: Thu, 30 Jan 2020 22:12:43 -0800 +Subject: mm/gup: allow FOLL_FORCE for get_user_pages_fast() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: John Hubbard + +commit f4000fdf435b8301a11cf85237c561047f8c4c72 upstream. + +Commit 817be129e6f2 ("mm: validate get_user_pages_fast flags") allowed +only FOLL_WRITE and FOLL_LONGTERM to be passed to get_user_pages_fast(). +This, combined with the fact that get_user_pages_fast() falls back to +"slow gup", which *does* accept FOLL_FORCE, leads to an odd situation: +if you need FOLL_FORCE, you cannot call get_user_pages_fast(). + +There does not appear to be any reason for filtering out FOLL_FORCE. +There is nothing in the _fast() implementation that requires that we +avoid writing to the pages. So it appears to have been an oversight. + +Fix by allowing FOLL_FORCE to be set for get_user_pages_fast(). + +Link: http://lkml.kernel.org/r/20200107224558.2362728-9-jhubbard@nvidia.com +Fixes: 817be129e6f2 ("mm: validate get_user_pages_fast flags") +Signed-off-by: John Hubbard +Reviewed-by: Leon Romanovsky +Reviewed-by: Jan Kara +Cc: Christoph Hellwig +Cc: Alex Williamson +Cc: Aneesh Kumar K.V +Cc: Björn Töpel +Cc: Daniel Vetter +Cc: Dan Williams +Cc: Hans Verkuil +Cc: Ira Weiny +Cc: Jason Gunthorpe +Cc: Jason Gunthorpe +Cc: Jens Axboe +Cc: Jerome Glisse +Cc: Jonathan Corbet +Cc: Kirill A. Shutemov +Cc: Mauro Carvalho Chehab +Cc: Mike Rapoport +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/gup.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/mm/gup.c ++++ b/mm/gup.c +@@ -2415,7 +2415,8 @@ int get_user_pages_fast(unsigned long st + unsigned long addr, len, end; + int nr = 0, ret = 0; + +- if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM))) ++ if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM | ++ FOLL_FORCE))) + return -EINVAL; + + start = untagged_addr(start) & PAGE_MASK; diff --git a/queue-5.5/mm-huge_memory.c-use-head-to-check-huge-zero-page.patch b/queue-5.5/mm-huge_memory.c-use-head-to-check-huge-zero-page.patch new file mode 100644 index 00000000000..8b40b985a5a --- /dev/null +++ b/queue-5.5/mm-huge_memory.c-use-head-to-check-huge-zero-page.patch @@ -0,0 +1,36 @@ +From cb829624867b5ab10bc6a7036d183b1b82bfe9f8 Mon Sep 17 00:00:00 2001 +From: Wei Yang +Date: Thu, 30 Jan 2020 22:14:29 -0800 +Subject: mm/huge_memory.c: use head to check huge zero page + +From: Wei Yang + +commit cb829624867b5ab10bc6a7036d183b1b82bfe9f8 upstream. + +The page could be a tail page, if this is the case, this BUG_ON will +never be triggered. + +Link: http://lkml.kernel.org/r/20200110032610.26499-1-richardw.yang@linux.intel.com +Fixes: e9b61f19858a ("thp: reintroduce split_huge_page()") + +Signed-off-by: Wei Yang +Acked-by: Kirill A. Shutemov +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/huge_memory.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -2712,7 +2712,7 @@ int split_huge_page_to_list(struct page + unsigned long flags; + pgoff_t end; + +- VM_BUG_ON_PAGE(is_huge_zero_page(page), page); ++ VM_BUG_ON_PAGE(is_huge_zero_page(head), head); + VM_BUG_ON_PAGE(!PageLocked(page), page); + VM_BUG_ON_PAGE(!PageCompound(page), page); + diff --git a/queue-5.5/mm-thp-fix-defrag-setting-if-newline-is-not-used.patch b/queue-5.5/mm-thp-fix-defrag-setting-if-newline-is-not-used.patch new file mode 100644 index 00000000000..a3c8ed56201 --- /dev/null +++ b/queue-5.5/mm-thp-fix-defrag-setting-if-newline-is-not-used.patch @@ -0,0 +1,107 @@ +From f42f25526502d851d0e3ca1e46297da8aafce8a7 Mon Sep 17 00:00:00 2001 +From: David Rientjes +Date: Thu, 30 Jan 2020 22:14:48 -0800 +Subject: mm, thp: fix defrag setting if newline is not used + +From: David Rientjes + +commit f42f25526502d851d0e3ca1e46297da8aafce8a7 upstream. + +If thp defrag setting "defer" is used and a newline is *not* used when +writing to the sysfs file, this is interpreted as the "defer+madvise" +option. + +This is because we do prefix matching and if five characters are written +without a newline, the current code ends up comparing to the first five +bytes of the "defer+madvise" option and using that instead. + +Use the more appropriate sysfs_streq() that handles the trailing newline +for us. Since this doubles as a nice cleanup, do it in enabled_store() +as well. + +The current implementation relies on prefix matching: the number of +bytes compared is either the number of bytes written or the length of +the option being compared. With a newline, "defer\n" does not match +"defer+"madvise"; without a newline, however, "defer" is considered to +match "defer+madvise" (prefix matching is only comparing the first five +bytes). End result is that writing "defer" is broken unless it has an +additional trailing character. + +This means that writing "madv" in the past would match and set +"madvise". With strict checking, that no longer is the case but it is +unlikely anybody is currently doing this. + +Link: http://lkml.kernel.org/r/alpine.DEB.2.21.2001171411020.56385@chino.kir.corp.google.com +Fixes: 21440d7eb904 ("mm, thp: add new defer+madvise defrag option") +Signed-off-by: David Rientjes +Suggested-by: Andrew Morton +Acked-by: Vlastimil Babka +Cc: Mel Gorman +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/huge_memory.c | 24 ++++++++---------------- + 1 file changed, 8 insertions(+), 16 deletions(-) + +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -177,16 +177,13 @@ static ssize_t enabled_store(struct kobj + { + ssize_t ret = count; + +- if (!memcmp("always", buf, +- min(sizeof("always")-1, count))) { ++ if (sysfs_streq(buf, "always")) { + clear_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags); + set_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags); +- } else if (!memcmp("madvise", buf, +- min(sizeof("madvise")-1, count))) { ++ } else if (sysfs_streq(buf, "madvise")) { + clear_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags); + set_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags); +- } else if (!memcmp("never", buf, +- min(sizeof("never")-1, count))) { ++ } else if (sysfs_streq(buf, "never")) { + clear_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags); + } else +@@ -250,32 +247,27 @@ static ssize_t defrag_store(struct kobje + struct kobj_attribute *attr, + const char *buf, size_t count) + { +- if (!memcmp("always", buf, +- min(sizeof("always")-1, count))) { ++ if (sysfs_streq(buf, "always")) { + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); + set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); +- } else if (!memcmp("defer+madvise", buf, +- min(sizeof("defer+madvise")-1, count))) { ++ } else if (sysfs_streq(buf, "defer+madvise")) { + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); + set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); +- } else if (!memcmp("defer", buf, +- min(sizeof("defer")-1, count))) { ++ } else if (sysfs_streq(buf, "defer")) { + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); + set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); +- } else if (!memcmp("madvise", buf, +- min(sizeof("madvise")-1, count))) { ++ } else if (sysfs_streq(buf, "madvise")) { + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); + set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); +- } else if (!memcmp("never", buf, +- min(sizeof("never")-1, count))) { ++ } else if (sysfs_streq(buf, "never")) { + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); diff --git a/queue-5.5/netfilter-nf_flowtable-fix-documentation.patch b/queue-5.5/netfilter-nf_flowtable-fix-documentation.patch new file mode 100644 index 00000000000..36b3c9d8201 --- /dev/null +++ b/queue-5.5/netfilter-nf_flowtable-fix-documentation.patch @@ -0,0 +1,39 @@ +From 78e06cf430934fc3768c342cbebdd1013dcd6fa7 Mon Sep 17 00:00:00 2001 +From: Matteo Croce +Date: Thu, 30 Jan 2020 20:10:19 +0100 +Subject: netfilter: nf_flowtable: fix documentation + +From: Matteo Croce + +commit 78e06cf430934fc3768c342cbebdd1013dcd6fa7 upstream. + +In the flowtable documentation there is a missing semicolon, the command +as is would give this error: + + nftables.conf:5:27-33: Error: syntax error, unexpected devices, expecting newline or semicolon + hook ingress priority 0 devices = { br0, pppoe-data }; + ^^^^^^^ + nftables.conf:4:12-13: Error: invalid hook (null) + flowtable ft { + ^^ + +Fixes: 19b351f16fd9 ("netfilter: add flowtable documentation") +Signed-off-by: Matteo Croce +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman + +--- + Documentation/networking/nf_flowtable.txt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/Documentation/networking/nf_flowtable.txt ++++ b/Documentation/networking/nf_flowtable.txt +@@ -76,7 +76,7 @@ flowtable and add one rule to your forwa + + table inet x { + flowtable f { +- hook ingress priority 0 devices = { eth0, eth1 }; ++ hook ingress priority 0; devices = { eth0, eth1 }; + } + chain y { + type filter hook forward priority 0; policy accept; diff --git a/queue-5.5/netfilter-nft_tunnel-no-need-to-call-htons-when-dumping-ports.patch b/queue-5.5/netfilter-nft_tunnel-no-need-to-call-htons-when-dumping-ports.patch new file mode 100644 index 00000000000..2d1efe13e37 --- /dev/null +++ b/queue-5.5/netfilter-nft_tunnel-no-need-to-call-htons-when-dumping-ports.patch @@ -0,0 +1,35 @@ +From cf3e204a1ca5442190018a317d9ec181b4639bd6 Mon Sep 17 00:00:00 2001 +From: Xin Long +Date: Fri, 13 Dec 2019 16:53:05 +0800 +Subject: netfilter: nft_tunnel: no need to call htons() when dumping ports + +From: Xin Long + +commit cf3e204a1ca5442190018a317d9ec181b4639bd6 upstream. + +info->key.tp_src and tp_dst are __be16, when using nla_put_be16() +to dump them, htons() is not needed, so remove it in this patch. + +Fixes: af308b94a2a4 ("netfilter: nf_tables: add tunnel support") +Signed-off-by: Xin Long +Reviewed-by: Simon Horman +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman + +--- + net/netfilter/nft_tunnel.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/netfilter/nft_tunnel.c ++++ b/net/netfilter/nft_tunnel.c +@@ -505,8 +505,8 @@ static int nft_tunnel_opts_dump(struct s + static int nft_tunnel_ports_dump(struct sk_buff *skb, + struct ip_tunnel_info *info) + { +- if (nla_put_be16(skb, NFTA_TUNNEL_KEY_SPORT, htons(info->key.tp_src)) < 0 || +- nla_put_be16(skb, NFTA_TUNNEL_KEY_DPORT, htons(info->key.tp_dst)) < 0) ++ if (nla_put_be16(skb, NFTA_TUNNEL_KEY_SPORT, info->key.tp_src) < 0 || ++ nla_put_be16(skb, NFTA_TUNNEL_KEY_DPORT, info->key.tp_dst) < 0) + return -1; + + return 0; diff --git a/queue-5.5/padata-always-acquire-cpu_hotplug_lock-before-pinst-lock.patch b/queue-5.5/padata-always-acquire-cpu_hotplug_lock-before-pinst-lock.patch new file mode 100644 index 00000000000..7b71e9b09e3 --- /dev/null +++ b/queue-5.5/padata-always-acquire-cpu_hotplug_lock-before-pinst-lock.patch @@ -0,0 +1,67 @@ +From 38228e8848cd7dd86ccb90406af32de0cad24be3 Mon Sep 17 00:00:00 2001 +From: Daniel Jordan +Date: Tue, 3 Dec 2019 14:31:11 -0500 +Subject: padata: always acquire cpu_hotplug_lock before pinst->lock + +From: Daniel Jordan + +commit 38228e8848cd7dd86ccb90406af32de0cad24be3 upstream. + +lockdep complains when padata's paths to update cpumasks via CPU hotplug +and sysfs are both taken: + + # echo 0 > /sys/devices/system/cpu/cpu1/online + # echo ff > /sys/kernel/pcrypt/pencrypt/parallel_cpumask + + ====================================================== + WARNING: possible circular locking dependency detected + 5.4.0-rc8-padata-cpuhp-v3+ #1 Not tainted + ------------------------------------------------------ + bash/205 is trying to acquire lock: + ffffffff8286bcd0 (cpu_hotplug_lock.rw_sem){++++}, at: padata_set_cpumask+0x2b/0x120 + + but task is already holding lock: + ffff8880001abfa0 (&pinst->lock){+.+.}, at: padata_set_cpumask+0x26/0x120 + + which lock already depends on the new lock. + +padata doesn't take cpu_hotplug_lock and pinst->lock in a consistent +order. Which should be first? CPU hotplug calls into padata with +cpu_hotplug_lock already held, so it should have priority. + +Fixes: 6751fb3c0e0c ("padata: Use get_online_cpus/put_online_cpus") +Signed-off-by: Daniel Jordan +Cc: Eric Biggers +Cc: Herbert Xu +Cc: Steffen Klassert +Cc: linux-crypto@vger.kernel.org +Cc: linux-kernel@vger.kernel.org +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/padata.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/kernel/padata.c ++++ b/kernel/padata.c +@@ -643,8 +643,8 @@ int padata_set_cpumask(struct padata_ins + struct cpumask *serial_mask, *parallel_mask; + int err = -EINVAL; + +- mutex_lock(&pinst->lock); + get_online_cpus(); ++ mutex_lock(&pinst->lock); + + switch (cpumask_type) { + case PADATA_CPU_PARALLEL: +@@ -662,8 +662,8 @@ int padata_set_cpumask(struct padata_ins + err = __padata_set_cpumasks(pinst, parallel_mask, serial_mask); + + out: +- put_online_cpus(); + mutex_unlock(&pinst->lock); ++ put_online_cpus(); + + return err; + } diff --git a/queue-5.5/perf-hists-browser-restore-esc-as-zoom-out-of-dso-thread-etc.patch b/queue-5.5/perf-hists-browser-restore-esc-as-zoom-out-of-dso-thread-etc.patch new file mode 100644 index 00000000000..488d673b954 --- /dev/null +++ b/queue-5.5/perf-hists-browser-restore-esc-as-zoom-out-of-dso-thread-etc.patch @@ -0,0 +1,35 @@ +From 3f7774033e6820d25beee5cf7aefa11d4968b951 Mon Sep 17 00:00:00 2001 +From: Arnaldo Carvalho de Melo +Date: Mon, 16 Dec 2019 13:22:33 -0300 +Subject: perf hists browser: Restore ESC as "Zoom out" of DSO/thread/etc + +From: Arnaldo Carvalho de Melo + +commit 3f7774033e6820d25beee5cf7aefa11d4968b951 upstream. + +We need to set actions->ms.map since 599a2f38a989 ("perf hists browser: +Check sort keys before hot key actions"), as in that patch we bail out +if map is NULL. + +Reviewed-by: Jiri Olsa +Cc: Adrian Hunter +Cc: Namhyung Kim +Fixes: 599a2f38a989 ("perf hists browser: Check sort keys before hot key actions") +Link: https://lkml.kernel.org/n/tip-wp1ssoewy6zihwwexqpohv0j@git.kernel.org +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Greg Kroah-Hartman + +--- + tools/perf/ui/browsers/hists.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/tools/perf/ui/browsers/hists.c ++++ b/tools/perf/ui/browsers/hists.c +@@ -3062,6 +3062,7 @@ static int perf_evsel__hists_browse(stru + + continue; + } ++ actions->ms.map = map; + top = pstack__peek(browser->pstack); + if (top == &browser->hists->dso_filter) { + /* diff --git a/queue-5.5/perf-maps-add-missing-unlock-to-maps__insert-error-case.patch b/queue-5.5/perf-maps-add-missing-unlock-to-maps__insert-error-case.patch new file mode 100644 index 00000000000..5e79f78133f --- /dev/null +++ b/queue-5.5/perf-maps-add-missing-unlock-to-maps__insert-error-case.patch @@ -0,0 +1,41 @@ +From 85fc95d75970ee7dd8e01904e7fb1197c275ba6b Mon Sep 17 00:00:00 2001 +From: Cengiz Can +Date: Mon, 20 Jan 2020 17:15:54 +0300 +Subject: perf maps: Add missing unlock to maps__insert() error case + +From: Cengiz Can + +commit 85fc95d75970ee7dd8e01904e7fb1197c275ba6b upstream. + +`tools/perf/util/map.c` has a function named `maps__insert` that +acquires a write lock if its in multithread context. + +Even though this lock is released when function successfully completes, +there's a branch that is executed when `maps_by_name == NULL` that +returns from this function without releasing the write lock. + +Added an `up_write` to release the lock when this happens. + +Fixes: a7c2b572e217 ("perf map_groups: Auto sort maps by name, if needed") +Signed-off-by: Cengiz Can +Cc: Adrian Hunter +Cc: Jiri Olsa +Cc: Namhyung Kim +Link: http://lore.kernel.org/lkml/20200120141553.23934-1-cengiz@kernel.wtf +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Greg Kroah-Hartman + +--- + tools/perf/util/map.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/tools/perf/util/map.c ++++ b/tools/perf/util/map.c +@@ -549,6 +549,7 @@ void maps__insert(struct maps *maps, str + + if (maps_by_name == NULL) { + __maps__free_maps_by_name(maps); ++ up_write(&maps->lock); + return; + } + diff --git a/queue-5.5/perf-ui-gtk-add-missing-zalloc-object.patch b/queue-5.5/perf-ui-gtk-add-missing-zalloc-object.patch new file mode 100644 index 00000000000..a0e8a95ec4b --- /dev/null +++ b/queue-5.5/perf-ui-gtk-add-missing-zalloc-object.patch @@ -0,0 +1,41 @@ +From 604e2139a1026793b8c2172bd92c7e9d039a5cf0 Mon Sep 17 00:00:00 2001 +From: Jiri Olsa +Date: Mon, 13 Jan 2020 11:43:57 +0100 +Subject: perf ui gtk: Add missing zalloc object + +From: Jiri Olsa + +commit 604e2139a1026793b8c2172bd92c7e9d039a5cf0 upstream. + +When we moved zalloc.o to the library we missed gtk library which needs +it compiled in, otherwise the missing __zfree symbol will cause the +library to fail to load. + +Adding the zalloc object to the gtk library build. + +Fixes: 7f7c536f23e6 ("tools lib: Adopt zalloc()/zfree() from tools/perf") +Signed-off-by: Jiri Olsa +Cc: Alexander Shishkin +Cc: Jelle van der Waa +Cc: Michael Petlan +Cc: Namhyung Kim +Cc: Peter Zijlstra +Link: http://lore.kernel.org/lkml/20200113104358.123511-1-jolsa@kernel.org +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Greg Kroah-Hartman + +--- + tools/perf/ui/gtk/Build | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/tools/perf/ui/gtk/Build ++++ b/tools/perf/ui/gtk/Build +@@ -7,3 +7,8 @@ gtk-y += util.o + gtk-y += helpline.o + gtk-y += progress.o + gtk-y += annotate.o ++gtk-y += zalloc.o ++ ++$(OUTPUT)ui/gtk/zalloc.o: ../lib/zalloc.c FORCE ++ $(call rule_mkdir) ++ $(call if_changed_dep,cc_o_c) diff --git a/queue-5.5/pwm-omap-dmtimer-put_device-after-of_find_device_by_node.patch b/queue-5.5/pwm-omap-dmtimer-put_device-after-of_find_device_by_node.patch new file mode 100644 index 00000000000..195e6ff2f2c --- /dev/null +++ b/queue-5.5/pwm-omap-dmtimer-put_device-after-of_find_device_by_node.patch @@ -0,0 +1,96 @@ +From c7cb3a1dd53f63c64fb2b567d0be130b92a44d91 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= +Date: Mon, 11 Nov 2019 10:03:56 +0100 +Subject: pwm: omap-dmtimer: put_device() after of_find_device_by_node() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Uwe Kleine-König + +commit c7cb3a1dd53f63c64fb2b567d0be130b92a44d91 upstream. + +This was found by coccicheck: + + drivers/pwm/pwm-omap-dmtimer.c:304:2-8: ERROR: missing put_device; + call of_find_device_by_node on line 255, but without a corresponding + object release within this function. + +Reported-by: Markus Elfring +Fixes: 6604c6556db9 ("pwm: Add PWM driver for OMAP using dual-mode timers") +Signed-off-by: Uwe Kleine-König +Signed-off-by: Thierry Reding +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/pwm/pwm-omap-dmtimer.c | 21 +++++++++++++++------ + 1 file changed, 15 insertions(+), 6 deletions(-) + +--- a/drivers/pwm/pwm-omap-dmtimer.c ++++ b/drivers/pwm/pwm-omap-dmtimer.c +@@ -256,7 +256,7 @@ static int pwm_omap_dmtimer_probe(struct + if (!timer_pdev) { + dev_err(&pdev->dev, "Unable to find Timer pdev\n"); + ret = -ENODEV; +- goto put; ++ goto err_find_timer_pdev; + } + + timer_pdata = dev_get_platdata(&timer_pdev->dev); +@@ -264,7 +264,7 @@ static int pwm_omap_dmtimer_probe(struct + dev_dbg(&pdev->dev, + "dmtimer pdata structure NULL, deferring probe\n"); + ret = -EPROBE_DEFER; +- goto put; ++ goto err_platdata; + } + + pdata = timer_pdata->timer_ops; +@@ -283,19 +283,19 @@ static int pwm_omap_dmtimer_probe(struct + !pdata->write_counter) { + dev_err(&pdev->dev, "Incomplete dmtimer pdata structure\n"); + ret = -EINVAL; +- goto put; ++ goto err_platdata; + } + + if (!of_get_property(timer, "ti,timer-pwm", NULL)) { + dev_err(&pdev->dev, "Missing ti,timer-pwm capability\n"); + ret = -ENODEV; +- goto put; ++ goto err_timer_property; + } + + dm_timer = pdata->request_by_node(timer); + if (!dm_timer) { + ret = -EPROBE_DEFER; +- goto put; ++ goto err_request_timer; + } + + omap = devm_kzalloc(&pdev->dev, sizeof(*omap), GFP_KERNEL); +@@ -352,7 +352,14 @@ err_pwmchip_add: + err_alloc_omap: + + pdata->free(dm_timer); +-put: ++err_request_timer: ++ ++err_timer_property: ++err_platdata: ++ ++ put_device(&timer_pdev->dev); ++err_find_timer_pdev: ++ + of_node_put(timer); + + return ret; +@@ -372,6 +379,8 @@ static int pwm_omap_dmtimer_remove(struc + + omap->pdata->free(omap->dm_timer); + ++ put_device(&omap->dm_timer_pdev->dev); ++ + mutex_destroy(&omap->mutex); + + return 0; diff --git a/queue-5.5/rcu-allow-only-one-expedited-gp-to-run-concurrently-with-wakeups.patch b/queue-5.5/rcu-allow-only-one-expedited-gp-to-run-concurrently-with-wakeups.patch new file mode 100644 index 00000000000..1e29f36b4b5 --- /dev/null +++ b/queue-5.5/rcu-allow-only-one-expedited-gp-to-run-concurrently-with-wakeups.patch @@ -0,0 +1,86 @@ +From 4bc6b745e5cbefed92c48071e28a5f41246d0470 Mon Sep 17 00:00:00 2001 +From: Neeraj Upadhyay +Date: Tue, 19 Nov 2019 11:50:52 -0800 +Subject: rcu: Allow only one expedited GP to run concurrently with wakeups + +From: Neeraj Upadhyay + +commit 4bc6b745e5cbefed92c48071e28a5f41246d0470 upstream. + +The current expedited RCU grace-period code expects that a task +requesting an expedited grace period cannot awaken until that grace +period has reached the wakeup phase. However, it is possible for a long +preemption to result in the waiting task never sleeping. For example, +consider the following sequence of events: + +1. Task A starts an expedited grace period by invoking + synchronize_rcu_expedited(). It proceeds normally up to the + wait_event() near the end of that function, and is then preempted + (or interrupted or whatever). + +2. The expedited grace period completes, and a kworker task starts + the awaken phase, having incremented the counter and acquired + the rcu_state structure's .exp_wake_mutex. This kworker task + is then preempted or interrupted or whatever. + +3. Task A resumes and enters wait_event(), which notes that the + expedited grace period has completed, and thus doesn't sleep. + +4. Task B starts an expedited grace period exactly as did Task A, + complete with the preemption (or whatever delay) just before + the call to wait_event(). + +5. The expedited grace period completes, and another kworker + task starts the awaken phase, having incremented the counter. + However, it blocks when attempting to acquire the rcu_state + structure's .exp_wake_mutex because step 2's kworker task has + not yet released it. + +6. Steps 4 and 5 repeat, resulting in overflow of the rcu_node + structure's ->exp_wq[] array. + +In theory, this is harmless. Tasks waiting on the various ->exp_wq[] +array will just be spuriously awakened, but they will just sleep again +on noting that the rcu_state structure's ->expedited_sequence value has +not advanced far enough. + +In practice, this wastes CPU time and is an accident waiting to happen. +This commit therefore moves the rcu_exp_gp_seq_end() call that officially +ends the expedited grace period (along with associate tracing) until +after the ->exp_wake_mutex has been acquired. This prevents Task A from +awakening prematurely, thus preventing more than one expedited grace +period from being in flight during a previous expedited grace period's +wakeup phase. + +Fixes: 3b5f668e715b ("rcu: Overlap wakeups with next expedited grace period") +Signed-off-by: Neeraj Upadhyay +[ paulmck: Added updated comment. ] +Signed-off-by: Paul E. McKenney +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/rcu/tree_exp.h | 11 +++++------ + 1 file changed, 5 insertions(+), 6 deletions(-) + +--- a/kernel/rcu/tree_exp.h ++++ b/kernel/rcu/tree_exp.h +@@ -540,14 +540,13 @@ static void rcu_exp_wait_wake(unsigned l + struct rcu_node *rnp; + + synchronize_sched_expedited_wait(); +- rcu_exp_gp_seq_end(); +- trace_rcu_exp_grace_period(rcu_state.name, s, TPS("end")); + +- /* +- * Switch over to wakeup mode, allowing the next GP, but -only- the +- * next GP, to proceed. +- */ ++ // Switch over to wakeup mode, allowing the next GP to proceed. ++ // End the previous grace period only after acquiring the mutex ++ // to ensure that only one GP runs concurrently with wakeups. + mutex_lock(&rcu_state.exp_wake_mutex); ++ rcu_exp_gp_seq_end(); ++ trace_rcu_exp_grace_period(rcu_state.name, s, TPS("end")); + + rcu_for_each_node_breadth_first(rnp) { + if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s)) { diff --git a/queue-5.5/rdma-hns-bugfix-for-posting-a-wqe-with-sge.patch b/queue-5.5/rdma-hns-bugfix-for-posting-a-wqe-with-sge.patch new file mode 100644 index 00000000000..986b9218b98 --- /dev/null +++ b/queue-5.5/rdma-hns-bugfix-for-posting-a-wqe-with-sge.patch @@ -0,0 +1,169 @@ +From 468d020e2f02867b8ec561461a1689cd4365e493 Mon Sep 17 00:00:00 2001 +From: Lijun Ou +Date: Thu, 9 Jan 2020 20:10:52 +0800 +Subject: RDMA/hns: Bugfix for posting a wqe with sge + +From: Lijun Ou + +commit 468d020e2f02867b8ec561461a1689cd4365e493 upstream. + +Driver should first check whether the sge is valid, then fill the valid +sge and the caculated total into hardware, otherwise invalid sges will +cause an error. + +Fixes: 52e3b42a2f58 ("RDMA/hns: Filter for zero length of sge in hip08 kernel mode") +Fixes: 7bdee4158b37 ("RDMA/hns: Fill sq wqe context of ud type in hip08") +Link: https://lore.kernel.org/r/1578571852-13704-1-git-send-email-liweihang@huawei.com +Signed-off-by: Lijun Ou +Signed-off-by: Weihang Li +Signed-off-by: Jason Gunthorpe +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 41 +++++++++++++++++------------ + 1 file changed, 25 insertions(+), 16 deletions(-) + +--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c ++++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +@@ -110,7 +110,7 @@ static void set_atomic_seg(struct hns_ro + } + + static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, +- unsigned int *sge_ind) ++ unsigned int *sge_ind, int valid_num_sge) + { + struct hns_roce_v2_wqe_data_seg *dseg; + struct ib_sge *sg; +@@ -123,7 +123,7 @@ static void set_extend_sge(struct hns_ro + + if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) + num_in_wqe = HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; +- extend_sge_num = wr->num_sge - num_in_wqe; ++ extend_sge_num = valid_num_sge - num_in_wqe; + sg = wr->sg_list + num_in_wqe; + shift = qp->hr_buf.page_shift; + +@@ -159,14 +159,16 @@ static void set_extend_sge(struct hns_ro + static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, + struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, + void *wqe, unsigned int *sge_ind, ++ int valid_num_sge, + const struct ib_send_wr **bad_wr) + { + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_v2_wqe_data_seg *dseg = wqe; + struct hns_roce_qp *qp = to_hr_qp(ibqp); ++ int j = 0; + int i; + +- if (wr->send_flags & IB_SEND_INLINE && wr->num_sge) { ++ if (wr->send_flags & IB_SEND_INLINE && valid_num_sge) { + if (le32_to_cpu(rc_sq_wqe->msg_len) > + hr_dev->caps.max_sq_inline) { + *bad_wr = wr; +@@ -190,7 +192,7 @@ static int set_rwqe_data_seg(struct ib_q + roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S, + 1); + } else { +- if (wr->num_sge <= HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) { ++ if (valid_num_sge <= HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) { + for (i = 0; i < wr->num_sge; i++) { + if (likely(wr->sg_list[i].length)) { + set_data_seg_v2(dseg, wr->sg_list + i); +@@ -203,19 +205,21 @@ static int set_rwqe_data_seg(struct ib_q + V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, + (*sge_ind) & (qp->sge.sge_cnt - 1)); + +- for (i = 0; i < HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; i++) { ++ for (i = 0; i < wr->num_sge && ++ j < HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; i++) { + if (likely(wr->sg_list[i].length)) { + set_data_seg_v2(dseg, wr->sg_list + i); + dseg++; ++ j++; + } + } + +- set_extend_sge(qp, wr, sge_ind); ++ set_extend_sge(qp, wr, sge_ind, valid_num_sge); + } + + roce_set_field(rc_sq_wqe->byte_16, + V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, +- V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, wr->num_sge); ++ V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge); + } + + return 0; +@@ -243,6 +247,7 @@ static int hns_roce_v2_post_send(struct + unsigned int sge_idx; + unsigned int wqe_idx; + unsigned long flags; ++ int valid_num_sge; + void *wqe = NULL; + bool loopback; + int attr_mask; +@@ -292,8 +297,16 @@ static int hns_roce_v2_post_send(struct + qp->sq.wrid[wqe_idx] = wr->wr_id; + owner_bit = + ~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1); ++ valid_num_sge = 0; + tmp_len = 0; + ++ for (i = 0; i < wr->num_sge; i++) { ++ if (likely(wr->sg_list[i].length)) { ++ tmp_len += wr->sg_list[i].length; ++ valid_num_sge++; ++ } ++ } ++ + /* Corresponding to the QP type, wqe process separately */ + if (ibqp->qp_type == IB_QPT_GSI) { + ud_sq_wqe = wqe; +@@ -329,9 +342,6 @@ static int hns_roce_v2_post_send(struct + V2_UD_SEND_WQE_BYTE_4_OPCODE_S, + HNS_ROCE_V2_WQE_OP_SEND); + +- for (i = 0; i < wr->num_sge; i++) +- tmp_len += wr->sg_list[i].length; +- + ud_sq_wqe->msg_len = + cpu_to_le32(le32_to_cpu(ud_sq_wqe->msg_len) + tmp_len); + +@@ -367,7 +377,7 @@ static int hns_roce_v2_post_send(struct + roce_set_field(ud_sq_wqe->byte_16, + V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M, + V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S, +- wr->num_sge); ++ valid_num_sge); + + roce_set_field(ud_sq_wqe->byte_20, + V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M, +@@ -422,12 +432,10 @@ static int hns_roce_v2_post_send(struct + memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], + GID_LEN_V2); + +- set_extend_sge(qp, wr, &sge_idx); ++ set_extend_sge(qp, wr, &sge_idx, valid_num_sge); + } else if (ibqp->qp_type == IB_QPT_RC) { + rc_sq_wqe = wqe; + memset(rc_sq_wqe, 0, sizeof(*rc_sq_wqe)); +- for (i = 0; i < wr->num_sge; i++) +- tmp_len += wr->sg_list[i].length; + + rc_sq_wqe->msg_len = + cpu_to_le32(le32_to_cpu(rc_sq_wqe->msg_len) + tmp_len); +@@ -548,10 +556,11 @@ static int hns_roce_v2_post_send(struct + roce_set_field(rc_sq_wqe->byte_16, + V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, + V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, +- wr->num_sge); ++ valid_num_sge); + } else if (wr->opcode != IB_WR_REG_MR) { + ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe, +- wqe, &sge_idx, bad_wr); ++ wqe, &sge_idx, ++ valid_num_sge, bad_wr); + if (ret) + goto out; + } diff --git a/queue-5.5/rdma-hns-simplify-the-calculation-and-usage-of-wqe-idx-for-post-verbs.patch b/queue-5.5/rdma-hns-simplify-the-calculation-and-usage-of-wqe-idx-for-post-verbs.patch new file mode 100644 index 00000000000..a026e3bdf4c --- /dev/null +++ b/queue-5.5/rdma-hns-simplify-the-calculation-and-usage-of-wqe-idx-for-post-verbs.patch @@ -0,0 +1,338 @@ +From 4768820243d71d49f1044b3f911ac3d52bdb79af Mon Sep 17 00:00:00 2001 +From: Yixian Liu +Date: Tue, 10 Dec 2019 20:45:02 +0800 +Subject: RDMA/hns: Simplify the calculation and usage of wqe idx for post verbs + +From: Yixian Liu + +commit 4768820243d71d49f1044b3f911ac3d52bdb79af upstream. + +Currently, the wqe idx is calculated repeatly everywhere it is used. This +patch defines wqe_idx and calculated it only once, then just use it as +needed. + +Fixes: 2d40788825ac ("RDMA/hns: Add support for processing send wr and receive wr") +Link: https://lore.kernel.org/r/1575981902-5274-1-git-send-email-liweihang@hisilicon.com +Signed-off-by: Yixian Liu +Signed-off-by: Weihang Li +Signed-off-by: Jason Gunthorpe +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/infiniband/hw/hns/hns_roce_device.h | 3 - + drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 37 ++++++++++-------------- + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 43 +++++++++++----------------- + 3 files changed, 35 insertions(+), 48 deletions(-) + +--- a/drivers/infiniband/hw/hns/hns_roce_device.h ++++ b/drivers/infiniband/hw/hns/hns_roce_device.h +@@ -423,7 +423,7 @@ struct hns_roce_mr_table { + struct hns_roce_wq { + u64 *wrid; /* Work request ID */ + spinlock_t lock; +- int wqe_cnt; /* WQE num */ ++ u32 wqe_cnt; /* WQE num */ + int max_gs; + int offset; + int wqe_shift; /* WQE size */ +@@ -647,7 +647,6 @@ struct hns_roce_qp { + u8 sdb_en; + u32 doorbell_qpn; + u32 sq_signal_bits; +- u32 sq_next_wqe; + struct hns_roce_wq sq; + + struct ib_umem *umem; +--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c ++++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +@@ -74,8 +74,8 @@ static int hns_roce_v1_post_send(struct + unsigned long flags = 0; + void *wqe = NULL; + __le32 doorbell[2]; ++ u32 wqe_idx = 0; + int nreq = 0; +- u32 ind = 0; + int ret = 0; + u8 *smac; + int loopback; +@@ -88,7 +88,7 @@ static int hns_roce_v1_post_send(struct + } + + spin_lock_irqsave(&qp->sq.lock, flags); +- ind = qp->sq_next_wqe; ++ + for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (hns_roce_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { + ret = -ENOMEM; +@@ -96,6 +96,8 @@ static int hns_roce_v1_post_send(struct + goto out; + } + ++ wqe_idx = (qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1); ++ + if (unlikely(wr->num_sge > qp->sq.max_gs)) { + dev_err(dev, "num_sge=%d > qp->sq.max_gs=%d\n", + wr->num_sge, qp->sq.max_gs); +@@ -104,9 +106,8 @@ static int hns_roce_v1_post_send(struct + goto out; + } + +- wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); +- qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] = +- wr->wr_id; ++ wqe = get_send_wqe(qp, wqe_idx); ++ qp->sq.wrid[wqe_idx] = wr->wr_id; + + /* Corresponding to the RC and RD type wqe process separately */ + if (ibqp->qp_type == IB_QPT_GSI) { +@@ -210,7 +211,6 @@ static int hns_roce_v1_post_send(struct + cpu_to_le32((wr->sg_list[1].addr) >> 32); + ud_sq_wqe->l_key1 = + cpu_to_le32(wr->sg_list[1].lkey); +- ind++; + } else if (ibqp->qp_type == IB_QPT_RC) { + u32 tmp_len = 0; + +@@ -308,7 +308,6 @@ static int hns_roce_v1_post_send(struct + ctrl->flag |= cpu_to_le32(wr->num_sge << + HNS_ROCE_WQE_SGE_NUM_BIT); + } +- ind++; + } + } + +@@ -336,7 +335,6 @@ out: + doorbell[1] = sq_db.u32_8; + + hns_roce_write64_k(doorbell, qp->sq.db_reg_l); +- qp->sq_next_wqe = ind; + } + + spin_unlock_irqrestore(&qp->sq.lock, flags); +@@ -348,12 +346,6 @@ static int hns_roce_v1_post_recv(struct + const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) + { +- int ret = 0; +- int nreq = 0; +- int ind = 0; +- int i = 0; +- u32 reg_val; +- unsigned long flags = 0; + struct hns_roce_rq_wqe_ctrl *ctrl = NULL; + struct hns_roce_wqe_data_seg *scat = NULL; + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); +@@ -361,9 +353,14 @@ static int hns_roce_v1_post_recv(struct + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_rq_db rq_db; + __le32 doorbell[2] = {0}; ++ unsigned long flags = 0; ++ unsigned int wqe_idx; ++ int ret = 0; ++ int nreq = 0; ++ int i = 0; ++ u32 reg_val; + + spin_lock_irqsave(&hr_qp->rq.lock, flags); +- ind = hr_qp->rq.head & (hr_qp->rq.wqe_cnt - 1); + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (hns_roce_wq_overflow(&hr_qp->rq, nreq, +@@ -373,6 +370,8 @@ static int hns_roce_v1_post_recv(struct + goto out; + } + ++ wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1); ++ + if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) { + dev_err(dev, "rq:num_sge=%d > qp->sq.max_gs=%d\n", + wr->num_sge, hr_qp->rq.max_gs); +@@ -381,7 +380,7 @@ static int hns_roce_v1_post_recv(struct + goto out; + } + +- ctrl = get_recv_wqe(hr_qp, ind); ++ ctrl = get_recv_wqe(hr_qp, wqe_idx); + + roce_set_field(ctrl->rwqe_byte_12, + RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_M, +@@ -393,9 +392,7 @@ static int hns_roce_v1_post_recv(struct + for (i = 0; i < wr->num_sge; i++) + set_data_seg(scat + i, wr->sg_list + i); + +- hr_qp->rq.wrid[ind] = wr->wr_id; +- +- ind = (ind + 1) & (hr_qp->rq.wqe_cnt - 1); ++ hr_qp->rq.wrid[wqe_idx] = wr->wr_id; + } + + out: +@@ -2701,7 +2698,6 @@ static int hns_roce_v1_m_sqp(struct ib_q + hr_qp->rq.tail = 0; + hr_qp->sq.head = 0; + hr_qp->sq.tail = 0; +- hr_qp->sq_next_wqe = 0; + } + + kfree(context); +@@ -3315,7 +3311,6 @@ static int hns_roce_v1_m_qp(struct ib_qp + hr_qp->rq.tail = 0; + hr_qp->sq.head = 0; + hr_qp->sq.tail = 0; +- hr_qp->sq_next_wqe = 0; + } + out: + kfree(context); +--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c ++++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +@@ -239,10 +239,10 @@ static int hns_roce_v2_post_send(struct + struct device *dev = hr_dev->dev; + struct hns_roce_v2_db sq_db; + struct ib_qp_attr attr; +- unsigned int sge_ind; + unsigned int owner_bit; ++ unsigned int sge_idx; ++ unsigned int wqe_idx; + unsigned long flags; +- unsigned int ind; + void *wqe = NULL; + bool loopback; + int attr_mask; +@@ -269,8 +269,7 @@ static int hns_roce_v2_post_send(struct + } + + spin_lock_irqsave(&qp->sq.lock, flags); +- ind = qp->sq_next_wqe; +- sge_ind = qp->next_sge; ++ sge_idx = qp->next_sge; + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (hns_roce_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { +@@ -279,6 +278,8 @@ static int hns_roce_v2_post_send(struct + goto out; + } + ++ wqe_idx = (qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1); ++ + if (unlikely(wr->num_sge > qp->sq.max_gs)) { + dev_err(dev, "num_sge=%d > qp->sq.max_gs=%d\n", + wr->num_sge, qp->sq.max_gs); +@@ -287,10 +288,8 @@ static int hns_roce_v2_post_send(struct + goto out; + } + +- wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); +- qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] = +- wr->wr_id; +- ++ wqe = get_send_wqe(qp, wqe_idx); ++ qp->sq.wrid[wqe_idx] = wr->wr_id; + owner_bit = + ~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1); + tmp_len = 0; +@@ -373,7 +372,7 @@ static int hns_roce_v2_post_send(struct + roce_set_field(ud_sq_wqe->byte_20, + V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M, + V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, +- sge_ind & (qp->sge.sge_cnt - 1)); ++ sge_idx & (qp->sge.sge_cnt - 1)); + + roce_set_field(ud_sq_wqe->byte_24, + V2_UD_SEND_WQE_BYTE_24_UDPSPN_M, +@@ -423,8 +422,7 @@ static int hns_roce_v2_post_send(struct + memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], + GID_LEN_V2); + +- set_extend_sge(qp, wr, &sge_ind); +- ind++; ++ set_extend_sge(qp, wr, &sge_idx); + } else if (ibqp->qp_type == IB_QPT_RC) { + rc_sq_wqe = wqe; + memset(rc_sq_wqe, 0, sizeof(*rc_sq_wqe)); +@@ -553,12 +551,10 @@ static int hns_roce_v2_post_send(struct + wr->num_sge); + } else if (wr->opcode != IB_WR_REG_MR) { + ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe, +- wqe, &sge_ind, bad_wr); ++ wqe, &sge_idx, bad_wr); + if (ret) + goto out; + } +- +- ind++; + } else { + dev_err(dev, "Illegal qp_type(0x%x)\n", ibqp->qp_type); + spin_unlock_irqrestore(&qp->sq.lock, flags); +@@ -588,8 +584,7 @@ out: + + hns_roce_write64(hr_dev, (__le32 *)&sq_db, qp->sq.db_reg_l); + +- qp->sq_next_wqe = ind; +- qp->next_sge = sge_ind; ++ qp->next_sge = sge_idx; + + if (qp->state == IB_QPS_ERR) { + attr_mask = IB_QP_STATE; +@@ -623,13 +618,12 @@ static int hns_roce_v2_post_recv(struct + unsigned long flags; + void *wqe = NULL; + int attr_mask; ++ u32 wqe_idx; + int ret = 0; + int nreq; +- int ind; + int i; + + spin_lock_irqsave(&hr_qp->rq.lock, flags); +- ind = hr_qp->rq.head & (hr_qp->rq.wqe_cnt - 1); + + if (hr_qp->state == IB_QPS_RESET) { + spin_unlock_irqrestore(&hr_qp->rq.lock, flags); +@@ -645,6 +639,8 @@ static int hns_roce_v2_post_recv(struct + goto out; + } + ++ wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1); ++ + if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) { + dev_err(dev, "rq:num_sge=%d > qp->sq.max_gs=%d\n", + wr->num_sge, hr_qp->rq.max_gs); +@@ -653,7 +649,7 @@ static int hns_roce_v2_post_recv(struct + goto out; + } + +- wqe = get_recv_wqe(hr_qp, ind); ++ wqe = get_recv_wqe(hr_qp, wqe_idx); + dseg = (struct hns_roce_v2_wqe_data_seg *)wqe; + for (i = 0; i < wr->num_sge; i++) { + if (!wr->sg_list[i].length) +@@ -669,8 +665,8 @@ static int hns_roce_v2_post_recv(struct + + /* rq support inline data */ + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) { +- sge_list = hr_qp->rq_inl_buf.wqe_list[ind].sg_list; +- hr_qp->rq_inl_buf.wqe_list[ind].sge_cnt = ++ sge_list = hr_qp->rq_inl_buf.wqe_list[wqe_idx].sg_list; ++ hr_qp->rq_inl_buf.wqe_list[wqe_idx].sge_cnt = + (u32)wr->num_sge; + for (i = 0; i < wr->num_sge; i++) { + sge_list[i].addr = +@@ -679,9 +675,7 @@ static int hns_roce_v2_post_recv(struct + } + } + +- hr_qp->rq.wrid[ind] = wr->wr_id; +- +- ind = (ind + 1) & (hr_qp->rq.wqe_cnt - 1); ++ hr_qp->rq.wrid[wqe_idx] = wr->wr_id; + } + + out: +@@ -4464,7 +4458,6 @@ static int hns_roce_v2_modify_qp(struct + hr_qp->rq.tail = 0; + hr_qp->sq.head = 0; + hr_qp->sq.tail = 0; +- hr_qp->sq_next_wqe = 0; + hr_qp->next_sge = 0; + if (hr_qp->rq.wqe_cnt) + *hr_qp->rdb.db_record = 0; diff --git a/queue-5.5/series b/queue-5.5/series index 4d52a583ff0..ede27e583c4 100644 --- a/queue-5.5/series +++ b/queue-5.5/series @@ -141,3 +141,36 @@ perf-report-fix-no-libunwind-compiled-warning-break-s390-issue.patch kvm-svm-override-default-mmio-mask-if-memory-encryption-is-enabled.patch kvm-check-for-a-bad-hva-before-dropping-into-the-ghc-slow-path.patch sched-fair-optimize-select_idle_cpu.patch +f2fs-fix-to-add-swap-extent-correctly.patch +rdma-hns-simplify-the-calculation-and-usage-of-wqe-idx-for-post-verbs.patch +rdma-hns-bugfix-for-posting-a-wqe-with-sge.patch +drivers-net-xgene-fix-the-order-of-the-arguments-of-alloc_etherdev_mqs.patch +ima-ima-lsm-policy-rule-loading-logic-bug-fixes.patch +kprobes-set-unoptimized-flag-after-unoptimizing-code.patch +lib-vdso-make-__arch_update_vdso_data-logic-understandable.patch +lib-vdso-update-coarse-timekeeper-unconditionally.patch +pwm-omap-dmtimer-put_device-after-of_find_device_by_node.patch +perf-hists-browser-restore-esc-as-zoom-out-of-dso-thread-etc.patch +perf-ui-gtk-add-missing-zalloc-object.patch +perf-maps-add-missing-unlock-to-maps__insert-error-case.patch +x86-resctrl-check-monitoring-static-key-in-the-mbm-overflow-handler.patch +kvm-x86-fix-kvm_bitmap_or_dest_vcpus-to-use-irq-shorthand.patch +kvm-x86-remove-spurious-kvm_mmu_unload-from-vcpu-destruction-path.patch +kvm-x86-remove-spurious-clearing-of-async-pf-msr.patch +rcu-allow-only-one-expedited-gp-to-run-concurrently-with-wakeups.patch +ubifs-fix-ino_t-format-warnings-in-orphan_delete.patch +thermal-db8500-depromote-debug-print.patch +thermal-brcmstb_thermal-do-not-use-dt-coefficients.patch +netfilter-nft_tunnel-no-need-to-call-htons-when-dumping-ports.patch +netfilter-nf_flowtable-fix-documentation.patch +bus-tegra-aconnect-remove-pm_clk-dependency.patch +clk-qcom-rpmh-sort-of-match-table.patch +xfs-clear-kernel-only-flags-in-xfs_ioc_attrmulti_by_handle.patch +padata-always-acquire-cpu_hotplug_lock-before-pinst-lock.patch +locking-lockdep-fix-lockdep_stats-indentation-problem.patch +mm-debug.c-always-print-flags-in-dump_page.patch +mm-gup-allow-foll_force-for-get_user_pages_fast.patch +mm-huge_memory.c-use-head-to-check-huge-zero-page.patch +mm-thp-fix-defrag-setting-if-newline-is-not-used.patch +kvm-nvmx-vmwrite-checks-vmcs-link-pointer-before-vmcs-field.patch +kvm-nvmx-vmwrite-checks-unsupported-field-before-read-only-field.patch diff --git a/queue-5.5/thermal-brcmstb_thermal-do-not-use-dt-coefficients.patch b/queue-5.5/thermal-brcmstb_thermal-do-not-use-dt-coefficients.patch new file mode 100644 index 00000000000..00e7a0d57f5 --- /dev/null +++ b/queue-5.5/thermal-brcmstb_thermal-do-not-use-dt-coefficients.patch @@ -0,0 +1,100 @@ +From e1ff6fc22f19e2af8adbad618526b80067911d40 Mon Sep 17 00:00:00 2001 +From: Florian Fainelli +Date: Tue, 14 Jan 2020 11:06:02 -0800 +Subject: thermal: brcmstb_thermal: Do not use DT coefficients + +From: Florian Fainelli + +commit e1ff6fc22f19e2af8adbad618526b80067911d40 upstream. + +At the time the brcmstb_thermal driver and its binding were merged, the +DT binding did not make the coefficients properties a mandatory one, +therefore all users of the brcmstb_thermal driver out there have a non +functional implementation with zero coefficients. Even if these +properties were provided, the formula used for computation is incorrect. + +The coefficients are entirely process specific (right now, only 28nm is +supported) and not board or SoC specific, it is therefore appropriate to +hard code them in the driver given the compatibility string we are +probed with which has to be updated whenever a new process is +introduced. + +We remove the existing coefficients definition since subsequent patches +are going to add support for a new process and will introduce new +coefficients as well. + +Fixes: 9e03cf1b2dd5 ("thermal: add brcmstb AVS TMON driver") +Signed-off-by: Florian Fainelli +Reviewed-by: Amit Kucheria +Signed-off-by: Daniel Lezcano +Link: https://lore.kernel.org/r/20200114190607.29339-2-f.fainelli@gmail.com +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/thermal/broadcom/brcmstb_thermal.c | 31 ++++++++--------------------- + 1 file changed, 9 insertions(+), 22 deletions(-) + +--- a/drivers/thermal/broadcom/brcmstb_thermal.c ++++ b/drivers/thermal/broadcom/brcmstb_thermal.c +@@ -49,7 +49,7 @@ + #define AVS_TMON_TP_TEST_ENABLE 0x20 + + /* Default coefficients */ +-#define AVS_TMON_TEMP_SLOPE -487 ++#define AVS_TMON_TEMP_SLOPE 487 + #define AVS_TMON_TEMP_OFFSET 410040 + + /* HW related temperature constants */ +@@ -108,23 +108,12 @@ struct brcmstb_thermal_priv { + struct thermal_zone_device *thermal; + }; + +-static void avs_tmon_get_coeffs(struct thermal_zone_device *tz, int *slope, +- int *offset) +-{ +- *slope = thermal_zone_get_slope(tz); +- *offset = thermal_zone_get_offset(tz); +-} +- + /* Convert a HW code to a temperature reading (millidegree celsius) */ + static inline int avs_tmon_code_to_temp(struct thermal_zone_device *tz, + u32 code) + { +- const int val = code & AVS_TMON_TEMP_MASK; +- int slope, offset; +- +- avs_tmon_get_coeffs(tz, &slope, &offset); +- +- return slope * val + offset; ++ return (AVS_TMON_TEMP_OFFSET - ++ (int)((code & AVS_TMON_TEMP_MAX) * AVS_TMON_TEMP_SLOPE)); + } + + /* +@@ -136,20 +125,18 @@ static inline int avs_tmon_code_to_temp( + static inline u32 avs_tmon_temp_to_code(struct thermal_zone_device *tz, + int temp, bool low) + { +- int slope, offset; +- + if (temp < AVS_TMON_TEMP_MIN) +- return AVS_TMON_TEMP_MAX; /* Maximum code value */ +- +- avs_tmon_get_coeffs(tz, &slope, &offset); ++ return AVS_TMON_TEMP_MAX; /* Maximum code value */ + +- if (temp >= offset) ++ if (temp >= AVS_TMON_TEMP_OFFSET) + return 0; /* Minimum code value */ + + if (low) +- return (u32)(DIV_ROUND_UP(offset - temp, abs(slope))); ++ return (u32)(DIV_ROUND_UP(AVS_TMON_TEMP_OFFSET - temp, ++ AVS_TMON_TEMP_SLOPE)); + else +- return (u32)((offset - temp) / abs(slope)); ++ return (u32)((AVS_TMON_TEMP_OFFSET - temp) / ++ AVS_TMON_TEMP_SLOPE); + } + + static int brcmstb_get_temp(void *data, int *temp) diff --git a/queue-5.5/thermal-db8500-depromote-debug-print.patch b/queue-5.5/thermal-db8500-depromote-debug-print.patch new file mode 100644 index 00000000000..a03a5bea9fc --- /dev/null +++ b/queue-5.5/thermal-db8500-depromote-debug-print.patch @@ -0,0 +1,38 @@ +From c56dcfa3d4d0f49f0c37cd24886aa86db7aa7f30 Mon Sep 17 00:00:00 2001 +From: Linus Walleij +Date: Tue, 19 Nov 2019 08:46:50 +0100 +Subject: thermal: db8500: Depromote debug print + +From: Linus Walleij + +commit c56dcfa3d4d0f49f0c37cd24886aa86db7aa7f30 upstream. + +We are not interested in getting this debug print on our +console all the time. + +Cc: Daniel Lezcano +Cc: Stephan Gerhold +Fixes: 6c375eccded4 ("thermal: db8500: Rewrite to be a pure OF sensor") +Signed-off-by: Linus Walleij +Reviewed-by: Stephan Gerhold +Signed-off-by: Daniel Lezcano +Link: https://lore.kernel.org/r/20191119074650.2664-1-linus.walleij@linaro.org +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/thermal/db8500_thermal.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/thermal/db8500_thermal.c ++++ b/drivers/thermal/db8500_thermal.c +@@ -152,8 +152,8 @@ static irqreturn_t prcmu_high_irq_handle + db8500_thermal_update_config(th, idx, THERMAL_TREND_RAISING, + next_low, next_high); + +- dev_info(&th->tz->device, +- "PRCMU set max %ld, min %ld\n", next_high, next_low); ++ dev_dbg(&th->tz->device, ++ "PRCMU set max %ld, min %ld\n", next_high, next_low); + } else if (idx == num_points - 1) + /* So we roof out 1 degree over the max point */ + th->interpolated_temp = db8500_thermal_points[idx] + 1; diff --git a/queue-5.5/ubifs-fix-ino_t-format-warnings-in-orphan_delete.patch b/queue-5.5/ubifs-fix-ino_t-format-warnings-in-orphan_delete.patch new file mode 100644 index 00000000000..e4adb500324 --- /dev/null +++ b/queue-5.5/ubifs-fix-ino_t-format-warnings-in-orphan_delete.patch @@ -0,0 +1,57 @@ +From 155fc6ba488a8bdfd1d3be3d7ba98c9cec2b2429 Mon Sep 17 00:00:00 2001 +From: Geert Uytterhoeven +Date: Mon, 13 Jan 2020 11:51:56 +0100 +Subject: ubifs: Fix ino_t format warnings in orphan_delete() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Geert Uytterhoeven + +commit 155fc6ba488a8bdfd1d3be3d7ba98c9cec2b2429 upstream. + +On alpha and s390x: + + fs/ubifs/debug.h:158:11: warning: format ‘%lu’ expects argument of type ‘long unsigned int’, but argument 4 has type ‘ino_t {aka unsigned int}’ [-Wformat=] + ... + fs/ubifs/orphan.c:132:3: note: in expansion of macro ‘dbg_gen’ + dbg_gen("deleted twice ino %lu", orph->inum); + ... + fs/ubifs/orphan.c:140:3: note: in expansion of macro ‘dbg_gen’ + dbg_gen("delete later ino %lu", orph->inum); + +__kernel_ino_t is "unsigned long" on most architectures, but not on +alpha and s390x, where it is "unsigned int". Hence when printing an +ino_t, it should always be cast to "unsigned long" first. + +Fix this by re-adding the recently removed casts. + +Fixes: 8009ce956c3d2802 ("ubifs: Don't leak orphans on memory during commit") +Signed-off-by: Geert Uytterhoeven +Signed-off-by: Richard Weinberger +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ubifs/orphan.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/fs/ubifs/orphan.c ++++ b/fs/ubifs/orphan.c +@@ -129,7 +129,7 @@ static void __orphan_drop(struct ubifs_i + static void orphan_delete(struct ubifs_info *c, struct ubifs_orphan *orph) + { + if (orph->del) { +- dbg_gen("deleted twice ino %lu", orph->inum); ++ dbg_gen("deleted twice ino %lu", (unsigned long)orph->inum); + return; + } + +@@ -137,7 +137,7 @@ static void orphan_delete(struct ubifs_i + orph->del = 1; + orph->dnext = c->orph_dnext; + c->orph_dnext = orph; +- dbg_gen("delete later ino %lu", orph->inum); ++ dbg_gen("delete later ino %lu", (unsigned long)orph->inum); + return; + } + diff --git a/queue-5.5/x86-resctrl-check-monitoring-static-key-in-the-mbm-overflow-handler.patch b/queue-5.5/x86-resctrl-check-monitoring-static-key-in-the-mbm-overflow-handler.patch new file mode 100644 index 00000000000..4dc75f2edc1 --- /dev/null +++ b/queue-5.5/x86-resctrl-check-monitoring-static-key-in-the-mbm-overflow-handler.patch @@ -0,0 +1,69 @@ +From 536a0d8e79fb928f2735db37dda95682b6754f9a Mon Sep 17 00:00:00 2001 +From: Xiaochen Shen +Date: Thu, 12 Dec 2019 04:05:05 +0800 +Subject: x86/resctrl: Check monitoring static key in the MBM overflow handler + +From: Xiaochen Shen + +commit 536a0d8e79fb928f2735db37dda95682b6754f9a upstream. + +Currently, there are three static keys in the resctrl file system: +rdt_mon_enable_key and rdt_alloc_enable_key indicate if the monitoring +feature and the allocation feature are enabled, respectively. The +rdt_enable_key is enabled when either the monitoring feature or the +allocation feature is enabled. + +If no monitoring feature is present (either hardware doesn't support a +monitoring feature or the feature is disabled by the kernel command line +option "rdt="), rdt_enable_key is still enabled but rdt_mon_enable_key +is disabled. + +MBM is a monitoring feature. The MBM overflow handler intends to +check if the monitoring feature is not enabled for fast return. + +So check the rdt_mon_enable_key in it instead of the rdt_enable_key as +former is the more accurate check. + + [ bp: Massage commit message. ] + +Fixes: e33026831bdb ("x86/intel_rdt/mbm: Handle counter overflow") +Signed-off-by: Xiaochen Shen +Signed-off-by: Borislav Petkov +Link: https://lkml.kernel.org/r/1576094705-13660-1-git-send-email-xiaochen.shen@intel.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/cpu/resctrl/internal.h | 1 + + arch/x86/kernel/cpu/resctrl/monitor.c | 4 ++-- + 2 files changed, 3 insertions(+), 2 deletions(-) + +--- a/arch/x86/kernel/cpu/resctrl/internal.h ++++ b/arch/x86/kernel/cpu/resctrl/internal.h +@@ -57,6 +57,7 @@ static inline struct rdt_fs_context *rdt + } + + DECLARE_STATIC_KEY_FALSE(rdt_enable_key); ++DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key); + + /** + * struct mon_evt - Entry in the event list of a resource +--- a/arch/x86/kernel/cpu/resctrl/monitor.c ++++ b/arch/x86/kernel/cpu/resctrl/monitor.c +@@ -514,7 +514,7 @@ void mbm_handle_overflow(struct work_str + + mutex_lock(&rdtgroup_mutex); + +- if (!static_branch_likely(&rdt_enable_key)) ++ if (!static_branch_likely(&rdt_mon_enable_key)) + goto out_unlock; + + d = get_domain_from_cpu(cpu, &rdt_resources_all[RDT_RESOURCE_L3]); +@@ -543,7 +543,7 @@ void mbm_setup_overflow_handler(struct r + unsigned long delay = msecs_to_jiffies(delay_ms); + int cpu; + +- if (!static_branch_likely(&rdt_enable_key)) ++ if (!static_branch_likely(&rdt_mon_enable_key)) + return; + cpu = cpumask_any(&dom->cpu_mask); + dom->mbm_work_cpu = cpu; diff --git a/queue-5.5/xfs-clear-kernel-only-flags-in-xfs_ioc_attrmulti_by_handle.patch b/queue-5.5/xfs-clear-kernel-only-flags-in-xfs_ioc_attrmulti_by_handle.patch new file mode 100644 index 00000000000..770eea5e7d5 --- /dev/null +++ b/queue-5.5/xfs-clear-kernel-only-flags-in-xfs_ioc_attrmulti_by_handle.patch @@ -0,0 +1,69 @@ +From 953aa9d136f53e226448dbd801a905c28f8071bf Mon Sep 17 00:00:00 2001 +From: Christoph Hellwig +Date: Tue, 7 Jan 2020 15:25:37 -0800 +Subject: xfs: clear kernel only flags in XFS_IOC_ATTRMULTI_BY_HANDLE + +From: Christoph Hellwig + +commit 953aa9d136f53e226448dbd801a905c28f8071bf upstream. + +Don't allow passing arbitrary flags as they change behavior including +memory allocation that the call stack is not prepared for. + +Fixes: ddbca70cc45c ("xfs: allocate xattr buffer on demand") +Signed-off-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Signed-off-by: Greg Kroah-Hartman + +--- + fs/xfs/libxfs/xfs_attr.h | 7 +++++-- + fs/xfs/xfs_ioctl.c | 2 ++ + fs/xfs/xfs_ioctl32.c | 2 ++ + 3 files changed, 9 insertions(+), 2 deletions(-) + +--- a/fs/xfs/libxfs/xfs_attr.h ++++ b/fs/xfs/libxfs/xfs_attr.h +@@ -26,7 +26,7 @@ struct xfs_attr_list_context; + *========================================================================*/ + + +-#define ATTR_DONTFOLLOW 0x0001 /* -- unused, from IRIX -- */ ++#define ATTR_DONTFOLLOW 0x0001 /* -- ignored, from IRIX -- */ + #define ATTR_ROOT 0x0002 /* use attrs in root (trusted) namespace */ + #define ATTR_TRUST 0x0004 /* -- unused, from IRIX -- */ + #define ATTR_SECURE 0x0008 /* use attrs in security namespace */ +@@ -37,7 +37,10 @@ struct xfs_attr_list_context; + #define ATTR_KERNOVAL 0x2000 /* [kernel] get attr size only, not value */ + + #define ATTR_INCOMPLETE 0x4000 /* [kernel] return INCOMPLETE attr keys */ +-#define ATTR_ALLOC 0x8000 /* allocate xattr buffer on demand */ ++#define ATTR_ALLOC 0x8000 /* [kernel] allocate xattr buffer on demand */ ++ ++#define ATTR_KERNEL_FLAGS \ ++ (ATTR_KERNOTIME | ATTR_KERNOVAL | ATTR_INCOMPLETE | ATTR_ALLOC) + + #define XFS_ATTR_FLAGS \ + { ATTR_DONTFOLLOW, "DONTFOLLOW" }, \ +--- a/fs/xfs/xfs_ioctl.c ++++ b/fs/xfs/xfs_ioctl.c +@@ -462,6 +462,8 @@ xfs_attrmulti_by_handle( + + error = 0; + for (i = 0; i < am_hreq.opcount; i++) { ++ ops[i].am_flags &= ~ATTR_KERNEL_FLAGS; ++ + ops[i].am_error = strncpy_from_user((char *)attr_name, + ops[i].am_attrname, MAXNAMELEN); + if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) +--- a/fs/xfs/xfs_ioctl32.c ++++ b/fs/xfs/xfs_ioctl32.c +@@ -450,6 +450,8 @@ xfs_compat_attrmulti_by_handle( + + error = 0; + for (i = 0; i < am_hreq.opcount; i++) { ++ ops[i].am_flags &= ~ATTR_KERNEL_FLAGS; ++ + ops[i].am_error = strncpy_from_user((char *)attr_name, + compat_ptr(ops[i].am_attrname), + MAXNAMELEN);