From 24f499876f28305d71293b64d12e84f62ad5adc3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 19 Jul 2017 11:43:39 +0200 Subject: [PATCH] 4.11-stable patches added patches: alsa-x86-clear-the-pdata.notify_lpe_audio-pointer-before-teardown.patch crypto-atmel-only-treat-ebusy-as-transient-if-backlog.patch crypto-caam-fix-signals-handling.patch crypto-caam-properly-set-iv-after-en-de-crypt.patch crypto-sha1-ssse3-disable-avx2.patch crypto-talitos-extend-max-key-length-for-sha384-512-hmac-and-aead.patch kvm-vmx-allow-host-to-access-guest-msr_ia32_bndcfgs.patch kvm-vmx-check-value-written-to-ia32_bndcfgs.patch kvm-vmx-do-not-disable-intercepts-for-bndcfgs.patch kvm-x86-guest-bndcfgs-requires-guest-mpx-support.patch pm-qos-return-einval-for-bogus-strings.patch pm-wakeirq-convert-to-srcu.patch sched-fair-cpumask-export-for_each_cpu_wrap.patch sched-topology-fix-building-of-overlapping-sched-groups.patch sched-topology-fix-overlapping-sched_group_mask.patch sched-topology-optimize-build_group_mask.patch --- ...fy_lpe_audio-pointer-before-teardown.patch | 41 ++++ ...-treat-ebusy-as-transient-if-backlog.patch | 35 ++++ .../crypto-caam-fix-signals-handling.patch | 59 ++++++ ...am-properly-set-iv-after-en-de-crypt.patch | 89 +++++++++ .../crypto-sha1-ssse3-disable-avx2.patch | 33 ++++ ...-length-for-sha384-512-hmac-and-aead.patch | 52 +++++ ...ost-to-access-guest-msr_ia32_bndcfgs.patch | 43 ++++ ...-check-value-written-to-ia32_bndcfgs.patch | 48 +++++ ...o-not-disable-intercepts-for-bndcfgs.patch | 40 ++++ ...t-bndcfgs-requires-guest-mpx-support.patch | 63 ++++++ ...-qos-return-einval-for-bogus-strings.patch | 34 ++++ queue-4.11/pm-wakeirq-convert-to-srcu.patch | 147 ++++++++++++++ ...air-cpumask-export-for_each_cpu_wrap.patch | 185 ++++++++++++++++++ ...building-of-overlapping-sched-groups.patch | 65 ++++++ ...ogy-fix-overlapping-sched_group_mask.patch | 99 ++++++++++ ...d-topology-optimize-build_group_mask.patch | 46 +++++ queue-4.11/series | 16 ++ 17 files changed, 1095 insertions(+) create mode 100644 queue-4.11/alsa-x86-clear-the-pdata.notify_lpe_audio-pointer-before-teardown.patch create mode 100644 queue-4.11/crypto-atmel-only-treat-ebusy-as-transient-if-backlog.patch create mode 100644 queue-4.11/crypto-caam-fix-signals-handling.patch create mode 100644 queue-4.11/crypto-caam-properly-set-iv-after-en-de-crypt.patch create mode 100644 queue-4.11/crypto-sha1-ssse3-disable-avx2.patch create mode 100644 queue-4.11/crypto-talitos-extend-max-key-length-for-sha384-512-hmac-and-aead.patch create mode 100644 queue-4.11/kvm-vmx-allow-host-to-access-guest-msr_ia32_bndcfgs.patch create mode 100644 queue-4.11/kvm-vmx-check-value-written-to-ia32_bndcfgs.patch create mode 100644 queue-4.11/kvm-vmx-do-not-disable-intercepts-for-bndcfgs.patch create mode 100644 queue-4.11/kvm-x86-guest-bndcfgs-requires-guest-mpx-support.patch create mode 100644 queue-4.11/pm-qos-return-einval-for-bogus-strings.patch create mode 100644 queue-4.11/pm-wakeirq-convert-to-srcu.patch create mode 100644 queue-4.11/sched-fair-cpumask-export-for_each_cpu_wrap.patch create mode 100644 queue-4.11/sched-topology-fix-building-of-overlapping-sched-groups.patch create mode 100644 queue-4.11/sched-topology-fix-overlapping-sched_group_mask.patch create mode 100644 queue-4.11/sched-topology-optimize-build_group_mask.patch diff --git a/queue-4.11/alsa-x86-clear-the-pdata.notify_lpe_audio-pointer-before-teardown.patch b/queue-4.11/alsa-x86-clear-the-pdata.notify_lpe_audio-pointer-before-teardown.patch new file mode 100644 index 00000000000..d70777fb666 --- /dev/null +++ b/queue-4.11/alsa-x86-clear-the-pdata.notify_lpe_audio-pointer-before-teardown.patch @@ -0,0 +1,41 @@ +From 8d5c30308d7c5a17db96fa5452c0232f633377c2 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= +Date: Thu, 27 Apr 2017 19:02:21 +0300 +Subject: ALSA: x86: Clear the pdata.notify_lpe_audio pointer before teardown +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Ville Syrjälä + +commit 8d5c30308d7c5a17db96fa5452c0232f633377c2 upstream. + +Clear the notify function pointer in the platform data before we tear +down the driver. Otherwise i915 would end up calling a stale function +pointer and possibly explode. + +Cc: Takashi Iwai +Cc: Pierre-Louis Bossart +Signed-off-by: Ville Syrjälä +Link: http://patchwork.freedesktop.org/patch/msgid/20170427160231.13337-3-ville.syrjala@linux.intel.com +Reviewed-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman + +--- + sound/x86/intel_hdmi_audio.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/sound/x86/intel_hdmi_audio.c ++++ b/sound/x86/intel_hdmi_audio.c +@@ -1665,6 +1665,11 @@ static int __maybe_unused hdmi_lpe_audio + static void hdmi_lpe_audio_free(struct snd_card *card) + { + struct snd_intelhad *ctx = card->private_data; ++ struct intel_hdmi_lpe_audio_pdata *pdata = ctx->dev->platform_data; ++ ++ spin_lock_irq(&pdata->lpe_audio_slock); ++ pdata->notify_audio_lpe = NULL; ++ spin_unlock_irq(&pdata->lpe_audio_slock); + + cancel_work_sync(&ctx->hdmi_audio_wq); + diff --git a/queue-4.11/crypto-atmel-only-treat-ebusy-as-transient-if-backlog.patch b/queue-4.11/crypto-atmel-only-treat-ebusy-as-transient-if-backlog.patch new file mode 100644 index 00000000000..17492f816ef --- /dev/null +++ b/queue-4.11/crypto-atmel-only-treat-ebusy-as-transient-if-backlog.patch @@ -0,0 +1,35 @@ +From 1606043f214f912a52195293614935811a6e3e53 Mon Sep 17 00:00:00 2001 +From: Gilad Ben-Yossef +Date: Wed, 28 Jun 2017 10:22:03 +0300 +Subject: crypto: atmel - only treat EBUSY as transient if backlog + +From: Gilad Ben-Yossef + +commit 1606043f214f912a52195293614935811a6e3e53 upstream. + +The Atmel SHA driver was treating -EBUSY as indication of queueing +to backlog without checking that backlog is enabled for the request. + +Fix it by checking request flags. + +Signed-off-by: Gilad Ben-Yossef +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/crypto/atmel-sha.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/crypto/atmel-sha.c ++++ b/drivers/crypto/atmel-sha.c +@@ -1204,7 +1204,9 @@ static int atmel_sha_finup(struct ahash_ + ctx->flags |= SHA_FLAGS_FINUP; + + err1 = atmel_sha_update(req); +- if (err1 == -EINPROGRESS || err1 == -EBUSY) ++ if (err1 == -EINPROGRESS || ++ (err1 == -EBUSY && (ahash_request_flags(req) & ++ CRYPTO_TFM_REQ_MAY_BACKLOG))) + return err1; + + /* diff --git a/queue-4.11/crypto-caam-fix-signals-handling.patch b/queue-4.11/crypto-caam-fix-signals-handling.patch new file mode 100644 index 00000000000..ee89a191be2 --- /dev/null +++ b/queue-4.11/crypto-caam-fix-signals-handling.patch @@ -0,0 +1,59 @@ +From 7459e1d25ffefa2b1be799477fcc1f6c62f6cec7 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Horia=20Geant=C4=83?= +Date: Fri, 7 Jul 2017 16:57:06 +0300 +Subject: crypto: caam - fix signals handling +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Horia Geantă + +commit 7459e1d25ffefa2b1be799477fcc1f6c62f6cec7 upstream. + +Driver does not properly handle the case when signals interrupt +wait_for_completion_interruptible(): +-it does not check for return value +-completion structure is allocated on stack; in case a signal interrupts +the sleep, it will go out of scope, causing the worker thread +(caam_jr_dequeue) to fail when it accesses it + +wait_for_completion_interruptible() is replaced with uninterruptable +wait_for_completion(). +We choose to block all signals while waiting for I/O (device executing +the split key generation job descriptor) since the alternative - in +order to have a deterministic device state - would be to flush the job +ring (aborting *all* in-progress jobs). + +Fixes: 045e36780f115 ("crypto: caam - ahash hmac support") +Fixes: 4c1ec1f930154 ("crypto: caam - refactor key_gen, sg") +Signed-off-by: Horia Geantă +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/crypto/caam/caamhash.c | 2 +- + drivers/crypto/caam/key_gen.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/crypto/caam/caamhash.c ++++ b/drivers/crypto/caam/caamhash.c +@@ -396,7 +396,7 @@ static int hash_digest_key(struct caam_h + ret = caam_jr_enqueue(jrdev, desc, split_key_done, &result); + if (!ret) { + /* in progress */ +- wait_for_completion_interruptible(&result.completion); ++ wait_for_completion(&result.completion); + ret = result.err; + #ifdef DEBUG + print_hex_dump(KERN_ERR, +--- a/drivers/crypto/caam/key_gen.c ++++ b/drivers/crypto/caam/key_gen.c +@@ -149,7 +149,7 @@ int gen_split_key(struct device *jrdev, + ret = caam_jr_enqueue(jrdev, desc, split_key_done, &result); + if (!ret) { + /* in progress */ +- wait_for_completion_interruptible(&result.completion); ++ wait_for_completion(&result.completion); + ret = result.err; + #ifdef DEBUG + print_hex_dump(KERN_ERR, "ctx.key@"__stringify(__LINE__)": ", diff --git a/queue-4.11/crypto-caam-properly-set-iv-after-en-de-crypt.patch b/queue-4.11/crypto-caam-properly-set-iv-after-en-de-crypt.patch new file mode 100644 index 00000000000..79e520d9184 --- /dev/null +++ b/queue-4.11/crypto-caam-properly-set-iv-after-en-de-crypt.patch @@ -0,0 +1,89 @@ +From 854b06f768794cd664886ec3ba3a5b1c58d42167 Mon Sep 17 00:00:00 2001 +From: David Gstir +Date: Wed, 28 Jun 2017 15:27:10 +0200 +Subject: crypto: caam - properly set IV after {en,de}crypt +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: David Gstir + +commit 854b06f768794cd664886ec3ba3a5b1c58d42167 upstream. + +Certain cipher modes like CTS expect the IV (req->info) of +ablkcipher_request (or equivalently req->iv of skcipher_request) to +contain the last ciphertext block when the {en,de}crypt operation is done. +This is currently not the case for the CAAM driver which in turn breaks +e.g. cts(cbc(aes)) when the CAAM driver is enabled. + +This patch fixes the CAAM driver to properly set the IV after the +{en,de}crypt operation of ablkcipher finishes. + +This issue was revealed by the changes in the SW CTS mode in commit +0605c41cc53ca ("crypto: cts - Convert to skcipher") + +Signed-off-by: David Gstir +Reviewed-by: Horia Geantă +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/crypto/caam/caamalg.c | 20 ++++++++++++++++++-- + 1 file changed, 18 insertions(+), 2 deletions(-) + +--- a/drivers/crypto/caam/caamalg.c ++++ b/drivers/crypto/caam/caamalg.c +@@ -881,10 +881,10 @@ static void ablkcipher_encrypt_done(stru + { + struct ablkcipher_request *req = context; + struct ablkcipher_edesc *edesc; +-#ifdef DEBUG + struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req); + int ivsize = crypto_ablkcipher_ivsize(ablkcipher); + ++#ifdef DEBUG + dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); + #endif + +@@ -903,6 +903,14 @@ static void ablkcipher_encrypt_done(stru + #endif + + ablkcipher_unmap(jrdev, edesc, req); ++ ++ /* ++ * The crypto API expects us to set the IV (req->info) to the last ++ * ciphertext block. This is used e.g. by the CTS mode. ++ */ ++ scatterwalk_map_and_copy(req->info, req->dst, req->nbytes - ivsize, ++ ivsize, 0); ++ + kfree(edesc); + + ablkcipher_request_complete(req, err); +@@ -913,10 +921,10 @@ static void ablkcipher_decrypt_done(stru + { + struct ablkcipher_request *req = context; + struct ablkcipher_edesc *edesc; +-#ifdef DEBUG + struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req); + int ivsize = crypto_ablkcipher_ivsize(ablkcipher); + ++#ifdef DEBUG + dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); + #endif + +@@ -934,6 +942,14 @@ static void ablkcipher_decrypt_done(stru + #endif + + ablkcipher_unmap(jrdev, edesc, req); ++ ++ /* ++ * The crypto API expects us to set the IV (req->info) to the last ++ * ciphertext block. ++ */ ++ scatterwalk_map_and_copy(req->info, req->src, req->nbytes - ivsize, ++ ivsize, 0); ++ + kfree(edesc); + + ablkcipher_request_complete(req, err); diff --git a/queue-4.11/crypto-sha1-ssse3-disable-avx2.patch b/queue-4.11/crypto-sha1-ssse3-disable-avx2.patch new file mode 100644 index 00000000000..5cde1880338 --- /dev/null +++ b/queue-4.11/crypto-sha1-ssse3-disable-avx2.patch @@ -0,0 +1,33 @@ +From b82ce24426a4071da9529d726057e4e642948667 Mon Sep 17 00:00:00 2001 +From: Herbert Xu +Date: Tue, 4 Jul 2017 12:21:12 +0800 +Subject: crypto: sha1-ssse3 - Disable avx2 + +From: Herbert Xu + +commit b82ce24426a4071da9529d726057e4e642948667 upstream. + +It has been reported that sha1-avx2 can cause page faults by reading +beyond the end of the input. This patch disables it until it can be +fixed. + +Fixes: 7c1da8d0d046 ("crypto: sha - SHA1 transform x86_64 AVX2") +Reported-by: Jan Stancek +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/crypto/sha1_ssse3_glue.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/crypto/sha1_ssse3_glue.c ++++ b/arch/x86/crypto/sha1_ssse3_glue.c +@@ -201,7 +201,7 @@ asmlinkage void sha1_transform_avx2(u32 + + static bool avx2_usable(void) + { +- if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) ++ if (false && avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) + && boot_cpu_has(X86_FEATURE_BMI1) + && boot_cpu_has(X86_FEATURE_BMI2)) + return true; diff --git a/queue-4.11/crypto-talitos-extend-max-key-length-for-sha384-512-hmac-and-aead.patch b/queue-4.11/crypto-talitos-extend-max-key-length-for-sha384-512-hmac-and-aead.patch new file mode 100644 index 00000000000..ccf25ae9b64 --- /dev/null +++ b/queue-4.11/crypto-talitos-extend-max-key-length-for-sha384-512-hmac-and-aead.patch @@ -0,0 +1,52 @@ +From 03d2c5114c95797c0aa7d9f463348b171a274fd4 Mon Sep 17 00:00:00 2001 +From: Martin Hicks +Date: Tue, 2 May 2017 09:38:35 -0400 +Subject: crypto: talitos - Extend max key length for SHA384/512-HMAC and AEAD +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Martin Hicks + +commit 03d2c5114c95797c0aa7d9f463348b171a274fd4 upstream. + +An updated patch that also handles the additional key length requirements +for the AEAD algorithms. + +The max keysize is not 96. For SHA384/512 it's 128, and for the AEAD +algorithms it's longer still. Extend the max keysize for the +AEAD size for AES256 + HMAC(SHA512). + +Fixes: 357fb60502ede ("crypto: talitos - add sha224, sha384 and sha512 to existing AEAD algorithms") +Signed-off-by: Martin Hicks +Acked-by: Horia Geantă +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/crypto/talitos.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/drivers/crypto/talitos.c ++++ b/drivers/crypto/talitos.c +@@ -816,7 +816,7 @@ static void talitos_unregister_rng(struc + * HMAC_SNOOP_NO_AFEA (HSNA) instead of type IPSEC_ESP + */ + #define TALITOS_CRA_PRIORITY_AEAD_HSNA (TALITOS_CRA_PRIORITY - 1) +-#define TALITOS_MAX_KEY_SIZE 96 ++#define TALITOS_MAX_KEY_SIZE (AES_MAX_KEY_SIZE + SHA512_BLOCK_SIZE) + #define TALITOS_MAX_IV_LENGTH 16 /* max of AES_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE */ + + struct talitos_ctx { +@@ -1495,6 +1495,11 @@ static int ablkcipher_setkey(struct cryp + { + struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher); + ++ if (keylen > TALITOS_MAX_KEY_SIZE) { ++ crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN); ++ return -EINVAL; ++ } ++ + memcpy(&ctx->key, key, keylen); + ctx->keylen = keylen; + diff --git a/queue-4.11/kvm-vmx-allow-host-to-access-guest-msr_ia32_bndcfgs.patch b/queue-4.11/kvm-vmx-allow-host-to-access-guest-msr_ia32_bndcfgs.patch new file mode 100644 index 00000000000..e35dad659e2 --- /dev/null +++ b/queue-4.11/kvm-vmx-allow-host-to-access-guest-msr_ia32_bndcfgs.patch @@ -0,0 +1,43 @@ +From 691bd4340bef49cf7e5855d06cf24444b5bf2d85 Mon Sep 17 00:00:00 2001 +From: Haozhong Zhang +Date: Tue, 4 Jul 2017 10:27:41 +0800 +Subject: kvm: vmx: allow host to access guest MSR_IA32_BNDCFGS + +From: Haozhong Zhang + +commit 691bd4340bef49cf7e5855d06cf24444b5bf2d85 upstream. + +It's easier for host applications, such as QEMU, if they can always +access guest MSR_IA32_BNDCFGS in VMCS, even though MPX is disabled in +guest cpuid. + +Signed-off-by: Haozhong Zhang +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/vmx.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -3200,7 +3200,8 @@ static int vmx_get_msr(struct kvm_vcpu * + msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP); + break; + case MSR_IA32_BNDCFGS: +- if (!kvm_mpx_supported() || !guest_cpuid_has_mpx(vcpu)) ++ if (!kvm_mpx_supported() || ++ (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu))) + return 1; + msr_info->data = vmcs_read64(GUEST_BNDCFGS); + break; +@@ -3282,7 +3283,8 @@ static int vmx_set_msr(struct kvm_vcpu * + vmcs_writel(GUEST_SYSENTER_ESP, data); + break; + case MSR_IA32_BNDCFGS: +- if (!kvm_mpx_supported() || !guest_cpuid_has_mpx(vcpu)) ++ if (!kvm_mpx_supported() || ++ (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu))) + return 1; + if (is_noncanonical_address(data & PAGE_MASK) || + (data & MSR_IA32_BNDCFGS_RSVD)) diff --git a/queue-4.11/kvm-vmx-check-value-written-to-ia32_bndcfgs.patch b/queue-4.11/kvm-vmx-check-value-written-to-ia32_bndcfgs.patch new file mode 100644 index 00000000000..7ab1e94d7d0 --- /dev/null +++ b/queue-4.11/kvm-vmx-check-value-written-to-ia32_bndcfgs.patch @@ -0,0 +1,48 @@ +From 4531662d1abf6c1f0e5c2b86ddb60e61509786c8 Mon Sep 17 00:00:00 2001 +From: Jim Mattson +Date: Tue, 23 May 2017 11:52:54 -0700 +Subject: kvm: vmx: Check value written to IA32_BNDCFGS +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jim Mattson + +commit 4531662d1abf6c1f0e5c2b86ddb60e61509786c8 upstream. + +Bits 11:2 must be zero and the linear addess in bits 63:12 must be +canonical. Otherwise, WRMSR(BNDCFGS) should raise #GP. + +Fixes: 0dd376e709975779 ("KVM: x86: add MSR_IA32_BNDCFGS to msrs_to_save") +Signed-off-by: Jim Mattson +Signed-off-by: Radim Krčmář +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/msr-index.h | 2 ++ + arch/x86/kvm/vmx.c | 3 +++ + 2 files changed, 5 insertions(+) + +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -417,6 +417,8 @@ + #define MSR_IA32_TSC_ADJUST 0x0000003b + #define MSR_IA32_BNDCFGS 0x00000d90 + ++#define MSR_IA32_BNDCFGS_RSVD 0x00000ffc ++ + #define MSR_IA32_XSS 0x00000da0 + + #define FEATURE_CONTROL_LOCKED (1<<0) +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -3284,6 +3284,9 @@ static int vmx_set_msr(struct kvm_vcpu * + case MSR_IA32_BNDCFGS: + if (!kvm_mpx_supported() || !guest_cpuid_has_mpx(vcpu)) + return 1; ++ if (is_noncanonical_address(data & PAGE_MASK) || ++ (data & MSR_IA32_BNDCFGS_RSVD)) ++ return 1; + vmcs_write64(GUEST_BNDCFGS, data); + break; + case MSR_IA32_TSC: diff --git a/queue-4.11/kvm-vmx-do-not-disable-intercepts-for-bndcfgs.patch b/queue-4.11/kvm-vmx-do-not-disable-intercepts-for-bndcfgs.patch new file mode 100644 index 00000000000..3a002ebbcc8 --- /dev/null +++ b/queue-4.11/kvm-vmx-do-not-disable-intercepts-for-bndcfgs.patch @@ -0,0 +1,40 @@ +From a8b6fda38f80e75afa3b125c9e7f2550b579454b Mon Sep 17 00:00:00 2001 +From: Jim Mattson +Date: Tue, 23 May 2017 11:52:52 -0700 +Subject: kvm: vmx: Do not disable intercepts for BNDCFGS +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jim Mattson + +commit a8b6fda38f80e75afa3b125c9e7f2550b579454b upstream. + +The MSR permission bitmaps are shared by all VMs. However, some VMs +may not be configured to support MPX, even when the host does. If the +host supports VMX and the guest does not, we should intercept accesses +to the BNDCFGS MSR, so that we can synthesize a #GP +fault. Furthermore, if the host does not support MPX and the +"ignore_msrs" kvm kernel parameter is set, then we should intercept +accesses to the BNDCFGS MSR, so that we can skip over the rdmsr/wrmsr +without raising a #GP fault. + +Fixes: da8999d31818fdc8 ("KVM: x86: Intel MPX vmx and msr handle") +Signed-off-by: Jim Mattson +Signed-off-by: Radim Krčmář +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/vmx.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -6588,7 +6588,6 @@ static __init int hardware_setup(void) + vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); + vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); + vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); +- vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true); + + memcpy(vmx_msr_bitmap_legacy_x2apic_apicv, + vmx_msr_bitmap_legacy, PAGE_SIZE); diff --git a/queue-4.11/kvm-x86-guest-bndcfgs-requires-guest-mpx-support.patch b/queue-4.11/kvm-x86-guest-bndcfgs-requires-guest-mpx-support.patch new file mode 100644 index 00000000000..046311ba96e --- /dev/null +++ b/queue-4.11/kvm-x86-guest-bndcfgs-requires-guest-mpx-support.patch @@ -0,0 +1,63 @@ +From 4439af9f911ae0243ffe4e2dfc12bace49605d8b Mon Sep 17 00:00:00 2001 +From: Jim Mattson +Date: Wed, 24 May 2017 10:49:25 -0700 +Subject: kvm: x86: Guest BNDCFGS requires guest MPX support +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jim Mattson + +commit 4439af9f911ae0243ffe4e2dfc12bace49605d8b upstream. + +The BNDCFGS MSR should only be exposed to the guest if the guest +supports MPX. (cf. the TSC_AUX MSR and RDTSCP.) + +Fixes: 0dd376e709975779 ("KVM: x86: add MSR_IA32_BNDCFGS to msrs_to_save") +Change-Id: I3ad7c01bda616715137ceac878f3fa7e66b6b387 +Signed-off-by: Jim Mattson +Signed-off-by: Radim Krčmář +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/cpuid.h | 8 ++++++++ + arch/x86/kvm/vmx.c | 4 ++-- + 2 files changed, 10 insertions(+), 2 deletions(-) + +--- a/arch/x86/kvm/cpuid.h ++++ b/arch/x86/kvm/cpuid.h +@@ -144,6 +144,14 @@ static inline bool guest_cpuid_has_rtm(s + return best && (best->ebx & bit(X86_FEATURE_RTM)); + } + ++static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu) ++{ ++ struct kvm_cpuid_entry2 *best; ++ ++ best = kvm_find_cpuid_entry(vcpu, 7, 0); ++ return best && (best->ebx & bit(X86_FEATURE_MPX)); ++} ++ + static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu) + { + struct kvm_cpuid_entry2 *best; +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -3200,7 +3200,7 @@ static int vmx_get_msr(struct kvm_vcpu * + msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP); + break; + case MSR_IA32_BNDCFGS: +- if (!kvm_mpx_supported()) ++ if (!kvm_mpx_supported() || !guest_cpuid_has_mpx(vcpu)) + return 1; + msr_info->data = vmcs_read64(GUEST_BNDCFGS); + break; +@@ -3282,7 +3282,7 @@ static int vmx_set_msr(struct kvm_vcpu * + vmcs_writel(GUEST_SYSENTER_ESP, data); + break; + case MSR_IA32_BNDCFGS: +- if (!kvm_mpx_supported()) ++ if (!kvm_mpx_supported() || !guest_cpuid_has_mpx(vcpu)) + return 1; + vmcs_write64(GUEST_BNDCFGS, data); + break; diff --git a/queue-4.11/pm-qos-return-einval-for-bogus-strings.patch b/queue-4.11/pm-qos-return-einval-for-bogus-strings.patch new file mode 100644 index 00000000000..31f3fc61e51 --- /dev/null +++ b/queue-4.11/pm-qos-return-einval-for-bogus-strings.patch @@ -0,0 +1,34 @@ +From 2ca30331c156ca9e97643ad05dd8930b8fe78b01 Mon Sep 17 00:00:00 2001 +From: Dan Carpenter +Date: Mon, 10 Jul 2017 10:21:40 +0300 +Subject: PM / QoS: return -EINVAL for bogus strings + +From: Dan Carpenter + +commit 2ca30331c156ca9e97643ad05dd8930b8fe78b01 upstream. + +In the current code, if the user accidentally writes a bogus command to +this sysfs file, then we set the latency tolerance to an uninitialized +variable. + +Fixes: 2d984ad132a8 (PM / QoS: Introcuce latency tolerance device PM QoS type) +Signed-off-by: Dan Carpenter +Acked-by: Pavel Machek +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/base/power/sysfs.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/base/power/sysfs.c ++++ b/drivers/base/power/sysfs.c +@@ -272,6 +272,8 @@ static ssize_t pm_qos_latency_tolerance_ + value = PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT; + else if (!strcmp(buf, "any") || !strcmp(buf, "any\n")) + value = PM_QOS_LATENCY_ANY; ++ else ++ return -EINVAL; + } + ret = dev_pm_qos_update_user_latency_tolerance(dev, value); + return ret < 0 ? ret : n; diff --git a/queue-4.11/pm-wakeirq-convert-to-srcu.patch b/queue-4.11/pm-wakeirq-convert-to-srcu.patch new file mode 100644 index 00000000000..ca36ac88df4 --- /dev/null +++ b/queue-4.11/pm-wakeirq-convert-to-srcu.patch @@ -0,0 +1,147 @@ +From ea0212f40c6bc0594c8eff79266759e3ecd4bacc Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Sun, 25 Jun 2017 19:31:13 +0200 +Subject: PM / wakeirq: Convert to SRCU + +From: Thomas Gleixner + +commit ea0212f40c6bc0594c8eff79266759e3ecd4bacc upstream. + +The wakeirq infrastructure uses RCU to protect the list of wakeirqs. That +breaks the irq bus locking infrastructure, which is allows sleeping +functions to be called so interrupt controllers behind slow busses, +e.g. i2c, can be handled. + +The wakeirq functions hold rcu_read_lock and call into irq functions, which +in case of interrupts using the irq bus locking will trigger a +might_sleep() splat. + +Convert the wakeirq infrastructure to Sleepable RCU and unbreak it. + +Fixes: 4990d4fe327b (PM / Wakeirq: Add automated device wake IRQ handling) +Reported-by: Brian Norris +Suggested-by: Paul E. McKenney +Signed-off-by: Thomas Gleixner +Reviewed-by: Paul E. McKenney +Tested-by: Tony Lindgren +Tested-by: Brian Norris +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/base/power/wakeup.c | 32 ++++++++++++++++++-------------- + 1 file changed, 18 insertions(+), 14 deletions(-) + +--- a/drivers/base/power/wakeup.c ++++ b/drivers/base/power/wakeup.c +@@ -60,6 +60,8 @@ static LIST_HEAD(wakeup_sources); + + static DECLARE_WAIT_QUEUE_HEAD(wakeup_count_wait_queue); + ++DEFINE_STATIC_SRCU(wakeup_srcu); ++ + static struct wakeup_source deleted_ws = { + .name = "deleted", + .lock = __SPIN_LOCK_UNLOCKED(deleted_ws.lock), +@@ -198,7 +200,7 @@ void wakeup_source_remove(struct wakeup_ + spin_lock_irqsave(&events_lock, flags); + list_del_rcu(&ws->entry); + spin_unlock_irqrestore(&events_lock, flags); +- synchronize_rcu(); ++ synchronize_srcu(&wakeup_srcu); + } + EXPORT_SYMBOL_GPL(wakeup_source_remove); + +@@ -332,12 +334,12 @@ void device_wakeup_detach_irq(struct dev + void device_wakeup_arm_wake_irqs(void) + { + struct wakeup_source *ws; ++ int srcuidx; + +- rcu_read_lock(); ++ srcuidx = srcu_read_lock(&wakeup_srcu); + list_for_each_entry_rcu(ws, &wakeup_sources, entry) + dev_pm_arm_wake_irq(ws->wakeirq); +- +- rcu_read_unlock(); ++ srcu_read_unlock(&wakeup_srcu, srcuidx); + } + + /** +@@ -348,12 +350,12 @@ void device_wakeup_arm_wake_irqs(void) + void device_wakeup_disarm_wake_irqs(void) + { + struct wakeup_source *ws; ++ int srcuidx; + +- rcu_read_lock(); ++ srcuidx = srcu_read_lock(&wakeup_srcu); + list_for_each_entry_rcu(ws, &wakeup_sources, entry) + dev_pm_disarm_wake_irq(ws->wakeirq); +- +- rcu_read_unlock(); ++ srcu_read_unlock(&wakeup_srcu, srcuidx); + } + + /** +@@ -805,10 +807,10 @@ EXPORT_SYMBOL_GPL(pm_wakeup_event); + void pm_print_active_wakeup_sources(void) + { + struct wakeup_source *ws; +- int active = 0; ++ int srcuidx, active = 0; + struct wakeup_source *last_activity_ws = NULL; + +- rcu_read_lock(); ++ srcuidx = srcu_read_lock(&wakeup_srcu); + list_for_each_entry_rcu(ws, &wakeup_sources, entry) { + if (ws->active) { + pr_debug("active wakeup source: %s\n", ws->name); +@@ -824,7 +826,7 @@ void pm_print_active_wakeup_sources(void + if (!active && last_activity_ws) + pr_debug("last active wakeup source: %s\n", + last_activity_ws->name); +- rcu_read_unlock(); ++ srcu_read_unlock(&wakeup_srcu, srcuidx); + } + EXPORT_SYMBOL_GPL(pm_print_active_wakeup_sources); + +@@ -951,8 +953,9 @@ void pm_wakep_autosleep_enabled(bool set + { + struct wakeup_source *ws; + ktime_t now = ktime_get(); ++ int srcuidx; + +- rcu_read_lock(); ++ srcuidx = srcu_read_lock(&wakeup_srcu); + list_for_each_entry_rcu(ws, &wakeup_sources, entry) { + spin_lock_irq(&ws->lock); + if (ws->autosleep_enabled != set) { +@@ -966,7 +969,7 @@ void pm_wakep_autosleep_enabled(bool set + } + spin_unlock_irq(&ws->lock); + } +- rcu_read_unlock(); ++ srcu_read_unlock(&wakeup_srcu, srcuidx); + } + #endif /* CONFIG_PM_AUTOSLEEP */ + +@@ -1027,15 +1030,16 @@ static int print_wakeup_source_stats(str + static int wakeup_sources_stats_show(struct seq_file *m, void *unused) + { + struct wakeup_source *ws; ++ int srcuidx; + + seq_puts(m, "name\t\tactive_count\tevent_count\twakeup_count\t" + "expire_count\tactive_since\ttotal_time\tmax_time\t" + "last_change\tprevent_suspend_time\n"); + +- rcu_read_lock(); ++ srcuidx = srcu_read_lock(&wakeup_srcu); + list_for_each_entry_rcu(ws, &wakeup_sources, entry) + print_wakeup_source_stats(m, ws); +- rcu_read_unlock(); ++ srcu_read_unlock(&wakeup_srcu, srcuidx); + + print_wakeup_source_stats(m, &deleted_ws); + diff --git a/queue-4.11/sched-fair-cpumask-export-for_each_cpu_wrap.patch b/queue-4.11/sched-fair-cpumask-export-for_each_cpu_wrap.patch new file mode 100644 index 00000000000..6eceb300476 --- /dev/null +++ b/queue-4.11/sched-fair-cpumask-export-for_each_cpu_wrap.patch @@ -0,0 +1,185 @@ +From c6508a39640b9a27fc2bc10cb708152672c82045 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Fri, 14 Apr 2017 14:20:05 +0200 +Subject: sched/fair, cpumask: Export for_each_cpu_wrap() + +From: Peter Zijlstra + +commit c6508a39640b9a27fc2bc10cb708152672c82045 upstream. + +commit c743f0a5c50f2fcbc628526279cfa24f3dabe182 upstream. + +More users for for_each_cpu_wrap() have appeared. Promote the construct +to generic cpumask interface. + +The implementation is slightly modified to reduce arguments. + +Signed-off-by: Peter Zijlstra (Intel) +Cc: Lauro Ramos Venancio +Cc: Linus Torvalds +Cc: Mike Galbraith +Cc: Peter Zijlstra +Cc: Rik van Riel +Cc: Thomas Gleixner +Cc: lwang@redhat.com +Link: http://lkml.kernel.org/r/20170414122005.o35me2h5nowqkxbv@hirez.programming.kicks-ass.net +Signed-off-by: Ingo Molnar +Signed-off-by: Mel Gorman +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/cpumask.h | 17 +++++++++++++++++ + kernel/sched/fair.c | 45 ++++----------------------------------------- + lib/cpumask.c | 32 ++++++++++++++++++++++++++++++++ + 3 files changed, 53 insertions(+), 41 deletions(-) + +--- a/include/linux/cpumask.h ++++ b/include/linux/cpumask.h +@@ -236,6 +236,23 @@ unsigned int cpumask_local_spread(unsign + (cpu) = cpumask_next_zero((cpu), (mask)), \ + (cpu) < nr_cpu_ids;) + ++extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap); ++ ++/** ++ * for_each_cpu_wrap - iterate over every cpu in a mask, starting at a specified location ++ * @cpu: the (optionally unsigned) integer iterator ++ * @mask: the cpumask poiter ++ * @start: the start location ++ * ++ * The implementation does not assume any bit in @mask is set (including @start). ++ * ++ * After the loop, cpu is >= nr_cpu_ids. ++ */ ++#define for_each_cpu_wrap(cpu, mask, start) \ ++ for ((cpu) = cpumask_next_wrap((start)-1, (mask), (start), false); \ ++ (cpu) < nr_cpumask_bits; \ ++ (cpu) = cpumask_next_wrap((cpu), (mask), (start), true)) ++ + /** + * for_each_cpu_and - iterate over every cpu in both masks + * @cpu: the (optionally unsigned) integer iterator +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -5615,43 +5615,6 @@ find_idlest_cpu(struct sched_group *grou + return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : least_loaded_cpu; + } + +-/* +- * Implement a for_each_cpu() variant that starts the scan at a given cpu +- * (@start), and wraps around. +- * +- * This is used to scan for idle CPUs; such that not all CPUs looking for an +- * idle CPU find the same CPU. The down-side is that tasks tend to cycle +- * through the LLC domain. +- * +- * Especially tbench is found sensitive to this. +- */ +- +-static int cpumask_next_wrap(int n, const struct cpumask *mask, int start, int *wrapped) +-{ +- int next; +- +-again: +- next = find_next_bit(cpumask_bits(mask), nr_cpumask_bits, n+1); +- +- if (*wrapped) { +- if (next >= start) +- return nr_cpumask_bits; +- } else { +- if (next >= nr_cpumask_bits) { +- *wrapped = 1; +- n = -1; +- goto again; +- } +- } +- +- return next; +-} +- +-#define for_each_cpu_wrap(cpu, mask, start, wrap) \ +- for ((wrap) = 0, (cpu) = (start)-1; \ +- (cpu) = cpumask_next_wrap((cpu), (mask), (start), &(wrap)), \ +- (cpu) < nr_cpumask_bits; ) +- + #ifdef CONFIG_SCHED_SMT + + static inline void set_idle_cores(int cpu, int val) +@@ -5711,7 +5674,7 @@ unlock: + static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int target) + { + struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask); +- int core, cpu, wrap; ++ int core, cpu; + + if (!static_branch_likely(&sched_smt_present)) + return -1; +@@ -5721,7 +5684,7 @@ static int select_idle_core(struct task_ + + cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed); + +- for_each_cpu_wrap(core, cpus, target, wrap) { ++ for_each_cpu_wrap(core, cpus, target) { + bool idle = true; + + for_each_cpu(cpu, cpu_smt_mask(core)) { +@@ -5787,7 +5750,7 @@ static int select_idle_cpu(struct task_s + u64 avg_cost, avg_idle = this_rq()->avg_idle; + u64 time, cost; + s64 delta; +- int cpu, wrap; ++ int cpu; + + this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc)); + if (!this_sd) +@@ -5804,7 +5767,7 @@ static int select_idle_cpu(struct task_s + + time = local_clock(); + +- for_each_cpu_wrap(cpu, sched_domain_span(sd), target, wrap) { ++ for_each_cpu_wrap(cpu, sched_domain_span(sd), target) { + if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) + continue; + if (idle_cpu(cpu)) +--- a/lib/cpumask.c ++++ b/lib/cpumask.c +@@ -43,6 +43,38 @@ int cpumask_any_but(const struct cpumask + } + EXPORT_SYMBOL(cpumask_any_but); + ++/** ++ * cpumask_next_wrap - helper to implement for_each_cpu_wrap ++ * @n: the cpu prior to the place to search ++ * @mask: the cpumask pointer ++ * @start: the start point of the iteration ++ * @wrap: assume @n crossing @start terminates the iteration ++ * ++ * Returns >= nr_cpu_ids on completion ++ * ++ * Note: the @wrap argument is required for the start condition when ++ * we cannot assume @start is set in @mask. ++ */ ++int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap) ++{ ++ int next; ++ ++again: ++ next = cpumask_next(n, mask); ++ ++ if (wrap && n < start && next >= start) { ++ return nr_cpumask_bits; ++ ++ } else if (next >= nr_cpumask_bits) { ++ wrap = true; ++ n = -1; ++ goto again; ++ } ++ ++ return next; ++} ++EXPORT_SYMBOL(cpumask_next_wrap); ++ + /* These are not inline because of header tangles. */ + #ifdef CONFIG_CPUMASK_OFFSTACK + /** diff --git a/queue-4.11/sched-topology-fix-building-of-overlapping-sched-groups.patch b/queue-4.11/sched-topology-fix-building-of-overlapping-sched-groups.patch new file mode 100644 index 00000000000..c0268000c70 --- /dev/null +++ b/queue-4.11/sched-topology-fix-building-of-overlapping-sched-groups.patch @@ -0,0 +1,65 @@ +From 0372dd2736e02672ac6e189c31f7d8c02ad543cd Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Fri, 14 Apr 2017 17:24:02 +0200 +Subject: sched/topology: Fix building of overlapping sched-groups + +From: Peter Zijlstra + +commit 0372dd2736e02672ac6e189c31f7d8c02ad543cd upstream. + +When building the overlapping groups, we very obviously should start +with the previous domain of _this_ @cpu, not CPU-0. + +This can be readily demonstrated with a topology like: + + node 0 1 2 3 + 0: 10 20 30 20 + 1: 20 10 20 30 + 2: 30 20 10 20 + 3: 20 30 20 10 + +Where (for example) CPU1 ends up generating the following nonsensical groups: + + [] CPU1 attaching sched-domain: + [] domain 0: span 0-2 level NUMA + [] groups: 1 2 0 + [] domain 1: span 0-3 level NUMA + [] groups: 1-3 (cpu_capacity = 3072) 0-1,3 (cpu_capacity = 3072) + +Where the fact that domain 1 doesn't include a group with span 0-2 is +the obvious fail. + +With patch this looks like: + + [] CPU1 attaching sched-domain: + [] domain 0: span 0-2 level NUMA + [] groups: 1 0 2 + [] domain 1: span 0-3 level NUMA + [] groups: 0-2 (cpu_capacity = 3072) 0,2-3 (cpu_capacity = 3072) + +Debugged-by: Lauro Ramos Venancio +Signed-off-by: Peter Zijlstra (Intel) +Cc: Linus Torvalds +Cc: Mike Galbraith +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: linux-kernel@vger.kernel.org +Fixes: e3589f6c81e4 ("sched: Allow for overlapping sched_domain spans") +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/sched/topology.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/sched/topology.c ++++ b/kernel/sched/topology.c +@@ -525,7 +525,7 @@ build_overlap_sched_groups(struct sched_ + + cpumask_clear(covered); + +- for_each_cpu(i, span) { ++ for_each_cpu_wrap(i, span, cpu) { + struct cpumask *sg_span; + + if (cpumask_test_cpu(i, covered)) diff --git a/queue-4.11/sched-topology-fix-overlapping-sched_group_mask.patch b/queue-4.11/sched-topology-fix-overlapping-sched_group_mask.patch new file mode 100644 index 00000000000..a7bfab770b7 --- /dev/null +++ b/queue-4.11/sched-topology-fix-overlapping-sched_group_mask.patch @@ -0,0 +1,99 @@ +From 73bb059f9b8a00c5e1bf2f7ca83138c05d05e600 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Tue, 25 Apr 2017 14:00:49 +0200 +Subject: sched/topology: Fix overlapping sched_group_mask + +From: Peter Zijlstra + +commit 73bb059f9b8a00c5e1bf2f7ca83138c05d05e600 upstream. + +The point of sched_group_mask is to select those CPUs from +sched_group_cpus that can actually arrive at this balance domain. + +The current code gets it wrong, as can be readily demonstrated with a +topology like: + + node 0 1 2 3 + 0: 10 20 30 20 + 1: 20 10 20 30 + 2: 30 20 10 20 + 3: 20 30 20 10 + +Where (for example) domain 1 on CPU1 ends up with a mask that includes +CPU0: + + [] CPU1 attaching sched-domain: + [] domain 0: span 0-2 level NUMA + [] groups: 1 (mask: 1), 2, 0 + [] domain 1: span 0-3 level NUMA + [] groups: 0-2 (mask: 0-2) (cpu_capacity: 3072), 0,2-3 (cpu_capacity: 3072) + +This causes sched_balance_cpu() to compute the wrong CPU and +consequently should_we_balance() will terminate early resulting in +missed load-balance opportunities. + +The fixed topology looks like: + + [] CPU1 attaching sched-domain: + [] domain 0: span 0-2 level NUMA + [] groups: 1 (mask: 1), 2, 0 + [] domain 1: span 0-3 level NUMA + [] groups: 0-2 (mask: 1) (cpu_capacity: 3072), 0,2-3 (cpu_capacity: 3072) + +(note: this relies on OVERLAP domains to always have children, this is + true because the regular topology domains are still here -- this is + before degenerate trimming) + +Debugged-by: Lauro Ramos Venancio +Signed-off-by: Peter Zijlstra (Intel) +Cc: Linus Torvalds +Cc: Mike Galbraith +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: linux-kernel@vger.kernel.org +Fixes: e3589f6c81e4 ("sched: Allow for overlapping sched_domain spans") +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/sched/topology.c | 18 +++++++++++++++++- + 1 file changed, 17 insertions(+), 1 deletion(-) + +--- a/kernel/sched/topology.c ++++ b/kernel/sched/topology.c +@@ -480,6 +480,9 @@ enum s_alloc { + * Build an iteration mask that can exclude certain CPUs from the upwards + * domain traversal. + * ++ * Only CPUs that can arrive at this group should be considered to continue ++ * balancing. ++ * + * Asymmetric node setups can result in situations where the domain tree is of + * unequal depth, make sure to skip domains that already cover the entire + * range. +@@ -497,11 +500,24 @@ static void build_group_mask(struct sche + + for_each_cpu(i, sg_span) { + sibling = *per_cpu_ptr(sdd->sd, i); +- if (!cpumask_test_cpu(i, sched_domain_span(sibling))) ++ ++ /* ++ * Can happen in the asymmetric case, where these siblings are ++ * unused. The mask will not be empty because those CPUs that ++ * do have the top domain _should_ span the domain. ++ */ ++ if (!sibling->child) ++ continue; ++ ++ /* If we would not end up here, we can't continue from here */ ++ if (!cpumask_equal(sg_span, sched_domain_span(sibling->child))) + continue; + + cpumask_set_cpu(i, sched_group_mask(sg)); + } ++ ++ /* We must not have empty masks here */ ++ WARN_ON_ONCE(cpumask_empty(sched_group_mask(sg))); + } + + /* diff --git a/queue-4.11/sched-topology-optimize-build_group_mask.patch b/queue-4.11/sched-topology-optimize-build_group_mask.patch new file mode 100644 index 00000000000..22a9ff85d06 --- /dev/null +++ b/queue-4.11/sched-topology-optimize-build_group_mask.patch @@ -0,0 +1,46 @@ +From f32d782e31bf079f600dcec126ed117b0577e85c Mon Sep 17 00:00:00 2001 +From: Lauro Ramos Venancio +Date: Thu, 20 Apr 2017 16:51:40 -0300 +Subject: sched/topology: Optimize build_group_mask() + +From: Lauro Ramos Venancio + +commit f32d782e31bf079f600dcec126ed117b0577e85c upstream. + +The group mask is always used in intersection with the group CPUs. So, +when building the group mask, we don't have to care about CPUs that are +not part of the group. + +Signed-off-by: Lauro Ramos Venancio +Signed-off-by: Peter Zijlstra (Intel) +Cc: Linus Torvalds +Cc: Mike Galbraith +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: lwang@redhat.com +Cc: riel@redhat.com +Link: http://lkml.kernel.org/r/1492717903-5195-2-git-send-email-lvenanci@redhat.com +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/sched/topology.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/kernel/sched/topology.c ++++ b/kernel/sched/topology.c +@@ -490,12 +490,12 @@ enum s_alloc { + */ + static void build_group_mask(struct sched_domain *sd, struct sched_group *sg) + { +- const struct cpumask *span = sched_domain_span(sd); ++ const struct cpumask *sg_span = sched_group_cpus(sg); + struct sd_data *sdd = sd->private; + struct sched_domain *sibling; + int i; + +- for_each_cpu(i, span) { ++ for_each_cpu(i, sg_span) { + sibling = *per_cpu_ptr(sdd->sd, i); + if (!cpumask_test_cpu(i, sched_domain_span(sibling))) + continue; diff --git a/queue-4.11/series b/queue-4.11/series index 00bf5eafebe..552454e7b1e 100644 --- a/queue-4.11/series +++ b/queue-4.11/series @@ -70,3 +70,19 @@ mnt-in-propgate_umount-handle-visiting-mounts-in-any-order.patch mnt-make-propagate_umount-less-slow-for-overlapping-mount-propagation-trees.patch selftests-capabilities-fix-the-test_execve-test.patch mm-fix-overflow-check-in-expand_upwards.patch +crypto-talitos-extend-max-key-length-for-sha384-512-hmac-and-aead.patch +crypto-atmel-only-treat-ebusy-as-transient-if-backlog.patch +crypto-sha1-ssse3-disable-avx2.patch +crypto-caam-properly-set-iv-after-en-de-crypt.patch +crypto-caam-fix-signals-handling.patch +sched-fair-cpumask-export-for_each_cpu_wrap.patch +sched-topology-fix-building-of-overlapping-sched-groups.patch +sched-topology-optimize-build_group_mask.patch +sched-topology-fix-overlapping-sched_group_mask.patch +pm-wakeirq-convert-to-srcu.patch +alsa-x86-clear-the-pdata.notify_lpe_audio-pointer-before-teardown.patch +pm-qos-return-einval-for-bogus-strings.patch +kvm-vmx-do-not-disable-intercepts-for-bndcfgs.patch +kvm-x86-guest-bndcfgs-requires-guest-mpx-support.patch +kvm-vmx-check-value-written-to-ia32_bndcfgs.patch +kvm-vmx-allow-host-to-access-guest-msr_ia32_bndcfgs.patch -- 2.47.3