From: Greg Kroah-Hartman Date: Mon, 2 May 2016 00:01:32 +0000 (-0700) Subject: 4.4-stable patches X-Git-Tag: v3.14.68~32 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=f188dfcca98f5e5476cb388a8e35198d1658670f;p=thirdparty%2Fkernel%2Fstable-queue.git 4.4-stable patches added patches: asoc-dapm-make-sure-we-have-a-card-when-displaying-component-widgets.patch asoc-rt5640-correct-the-digital-interface-data-select.patch asoc-s3c24xx-use-const-snd_soc_component_driver-pointer.patch asoc-ssm4567-reset-device-before-regcache_sync.patch cgroup-cpuset-replace-cpuset_post_attach_flush-with-cgroup_subsys-post_attach-callback.patch cxl-keep-irq-mappings-on-context-teardown.patch efi-expose-non-blocking-set_variable-wrapper-to-efivars.patch efi-fix-out-of-bounds-read-in-variable_matches.patch ib-mlx5-expose-correct-max_sge_rd-limit.patch ib-security-restrict-use-of-the-write-interface.patch memcg-relocate-charge-moving-from-attach-to-post_attach.patch mm-huge_memory-replace-vm_no_thp-vm_bug_on-with-actual-vma-check.patch mm-hwpoison-fix-wrong-num_poisoned_pages-accounting.patch mm-vmscan-reclaim-highmem-zone-if-buffer_heads-is-over-limit.patch numa-fix-proc-pid-numa_maps-for-thp.patch slub-clean-up-code-for-kmem-cgroup-support-to-kmem_cache_free_bulk.patch v4l2-dv-timings.h-fix-polarity-for-4k-formats.patch vb2-memops-fix-over-allocation-of-frame-vectors.patch videobuf2-v4l2-verify-planes-array-in-buffer-dequeueing.patch workqueue-fix-ghost-pending-flag-while-doing-mq-io.patch x86-apic-handle-zero-vector-gracefully-in-clear_vector_irq.patch --- diff --git a/queue-4.4/asoc-dapm-make-sure-we-have-a-card-when-displaying-component-widgets.patch b/queue-4.4/asoc-dapm-make-sure-we-have-a-card-when-displaying-component-widgets.patch new file mode 100644 index 00000000000..600088826eb --- /dev/null +++ b/queue-4.4/asoc-dapm-make-sure-we-have-a-card-when-displaying-component-widgets.patch @@ -0,0 +1,41 @@ +From 47325078f2a3e543150e7df967e45756b2fff7ec Mon Sep 17 00:00:00 2001 +From: Mark Brown +Date: Fri, 18 Mar 2016 12:04:23 +0000 +Subject: ASoC: dapm: Make sure we have a card when displaying component widgets + +From: Mark Brown + +commit 47325078f2a3e543150e7df967e45756b2fff7ec upstream. + +The dummy component is reused for all cards so we special case and don't +bind it to any of them. This means that code like that displaying the +component widgets that tries to look at the card will crash. In the +future we will fix this by ensuring that the dummy component looks like +other components but that is invasive and so not suitable for a fix. +Instead add a special case check here. + +Reported-by: Harry Pan +Suggested-by: Lars-Peter Clausen +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman + +--- + sound/soc/soc-dapm.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/sound/soc/soc-dapm.c ++++ b/sound/soc/soc-dapm.c +@@ -2188,6 +2188,13 @@ static ssize_t dapm_widget_show_componen + int count = 0; + char *state = "not set"; + ++ /* card won't be set for the dummy component, as a spot fix ++ * we're checking for that case specifically here but in future ++ * we will ensure that the dummy component looks like others. ++ */ ++ if (!cmpnt->card) ++ return 0; ++ + list_for_each_entry(w, &cmpnt->card->widgets, list) { + if (w->dapm != dapm) + continue; diff --git a/queue-4.4/asoc-rt5640-correct-the-digital-interface-data-select.patch b/queue-4.4/asoc-rt5640-correct-the-digital-interface-data-select.patch new file mode 100644 index 00000000000..0d8641eb525 --- /dev/null +++ b/queue-4.4/asoc-rt5640-correct-the-digital-interface-data-select.patch @@ -0,0 +1,92 @@ +From 653aa4645244042826f105aab1be3d01b3d493ca Mon Sep 17 00:00:00 2001 +From: Sugar Zhang +Date: Fri, 18 Mar 2016 14:54:22 +0800 +Subject: ASoC: rt5640: Correct the digital interface data select + +From: Sugar Zhang + +commit 653aa4645244042826f105aab1be3d01b3d493ca upstream. + +this patch corrects the interface adc/dac control register definition +according to datasheet. + +Signed-off-by: Sugar Zhang +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman + +--- + sound/soc/codecs/rt5640.c | 2 +- + sound/soc/codecs/rt5640.h | 36 ++++++++++++++++++------------------ + 2 files changed, 19 insertions(+), 19 deletions(-) + +--- a/sound/soc/codecs/rt5640.c ++++ b/sound/soc/codecs/rt5640.c +@@ -359,7 +359,7 @@ static const DECLARE_TLV_DB_RANGE(bst_tl + + /* Interface data select */ + static const char * const rt5640_data_select[] = { +- "Normal", "left copy to right", "right copy to left", "Swap"}; ++ "Normal", "Swap", "left copy to right", "right copy to left"}; + + static SOC_ENUM_SINGLE_DECL(rt5640_if1_dac_enum, RT5640_DIG_INF_DATA, + RT5640_IF1_DAC_SEL_SFT, rt5640_data_select); +--- a/sound/soc/codecs/rt5640.h ++++ b/sound/soc/codecs/rt5640.h +@@ -442,39 +442,39 @@ + #define RT5640_IF1_DAC_SEL_MASK (0x3 << 14) + #define RT5640_IF1_DAC_SEL_SFT 14 + #define RT5640_IF1_DAC_SEL_NOR (0x0 << 14) +-#define RT5640_IF1_DAC_SEL_L2R (0x1 << 14) +-#define RT5640_IF1_DAC_SEL_R2L (0x2 << 14) +-#define RT5640_IF1_DAC_SEL_SWAP (0x3 << 14) ++#define RT5640_IF1_DAC_SEL_SWAP (0x1 << 14) ++#define RT5640_IF1_DAC_SEL_L2R (0x2 << 14) ++#define RT5640_IF1_DAC_SEL_R2L (0x3 << 14) + #define RT5640_IF1_ADC_SEL_MASK (0x3 << 12) + #define RT5640_IF1_ADC_SEL_SFT 12 + #define RT5640_IF1_ADC_SEL_NOR (0x0 << 12) +-#define RT5640_IF1_ADC_SEL_L2R (0x1 << 12) +-#define RT5640_IF1_ADC_SEL_R2L (0x2 << 12) +-#define RT5640_IF1_ADC_SEL_SWAP (0x3 << 12) ++#define RT5640_IF1_ADC_SEL_SWAP (0x1 << 12) ++#define RT5640_IF1_ADC_SEL_L2R (0x2 << 12) ++#define RT5640_IF1_ADC_SEL_R2L (0x3 << 12) + #define RT5640_IF2_DAC_SEL_MASK (0x3 << 10) + #define RT5640_IF2_DAC_SEL_SFT 10 + #define RT5640_IF2_DAC_SEL_NOR (0x0 << 10) +-#define RT5640_IF2_DAC_SEL_L2R (0x1 << 10) +-#define RT5640_IF2_DAC_SEL_R2L (0x2 << 10) +-#define RT5640_IF2_DAC_SEL_SWAP (0x3 << 10) ++#define RT5640_IF2_DAC_SEL_SWAP (0x1 << 10) ++#define RT5640_IF2_DAC_SEL_L2R (0x2 << 10) ++#define RT5640_IF2_DAC_SEL_R2L (0x3 << 10) + #define RT5640_IF2_ADC_SEL_MASK (0x3 << 8) + #define RT5640_IF2_ADC_SEL_SFT 8 + #define RT5640_IF2_ADC_SEL_NOR (0x0 << 8) +-#define RT5640_IF2_ADC_SEL_L2R (0x1 << 8) +-#define RT5640_IF2_ADC_SEL_R2L (0x2 << 8) +-#define RT5640_IF2_ADC_SEL_SWAP (0x3 << 8) ++#define RT5640_IF2_ADC_SEL_SWAP (0x1 << 8) ++#define RT5640_IF2_ADC_SEL_L2R (0x2 << 8) ++#define RT5640_IF2_ADC_SEL_R2L (0x3 << 8) + #define RT5640_IF3_DAC_SEL_MASK (0x3 << 6) + #define RT5640_IF3_DAC_SEL_SFT 6 + #define RT5640_IF3_DAC_SEL_NOR (0x0 << 6) +-#define RT5640_IF3_DAC_SEL_L2R (0x1 << 6) +-#define RT5640_IF3_DAC_SEL_R2L (0x2 << 6) +-#define RT5640_IF3_DAC_SEL_SWAP (0x3 << 6) ++#define RT5640_IF3_DAC_SEL_SWAP (0x1 << 6) ++#define RT5640_IF3_DAC_SEL_L2R (0x2 << 6) ++#define RT5640_IF3_DAC_SEL_R2L (0x3 << 6) + #define RT5640_IF3_ADC_SEL_MASK (0x3 << 4) + #define RT5640_IF3_ADC_SEL_SFT 4 + #define RT5640_IF3_ADC_SEL_NOR (0x0 << 4) +-#define RT5640_IF3_ADC_SEL_L2R (0x1 << 4) +-#define RT5640_IF3_ADC_SEL_R2L (0x2 << 4) +-#define RT5640_IF3_ADC_SEL_SWAP (0x3 << 4) ++#define RT5640_IF3_ADC_SEL_SWAP (0x1 << 4) ++#define RT5640_IF3_ADC_SEL_L2R (0x2 << 4) ++#define RT5640_IF3_ADC_SEL_R2L (0x3 << 4) + + /* REC Left Mixer Control 1 (0x3b) */ + #define RT5640_G_HP_L_RM_L_MASK (0x7 << 13) diff --git a/queue-4.4/asoc-s3c24xx-use-const-snd_soc_component_driver-pointer.patch b/queue-4.4/asoc-s3c24xx-use-const-snd_soc_component_driver-pointer.patch new file mode 100644 index 00000000000..ab79870242c --- /dev/null +++ b/queue-4.4/asoc-s3c24xx-use-const-snd_soc_component_driver-pointer.patch @@ -0,0 +1,53 @@ +From ba4bc32eaa39ba7687f0958ae90eec94da613b46 Mon Sep 17 00:00:00 2001 +From: Arnd Bergmann +Date: Mon, 25 Jan 2016 18:07:33 +0100 +Subject: ASoC: s3c24xx: use const snd_soc_component_driver pointer + +From: Arnd Bergmann + +commit ba4bc32eaa39ba7687f0958ae90eec94da613b46 upstream. + +An older patch to convert the API in the s3c i2s driver +ended up passing a const pointer into a function that takes +a non-const pointer, so we now get a warning: + +sound/soc/samsung/s3c2412-i2s.c: In function 's3c2412_iis_dev_probe': +sound/soc/samsung/s3c2412-i2s.c:172:9: error: passing argument 3 of 's3c_i2sv2_register_component' discards 'const' qualifier from pointer target type [-Werror=discarded-qualifiers] + +However, the s3c_i2sv2_register_component() function again +passes the pointer into another function taking a const, so +we just need to change its prototype. + +Fixes: eca3b01d0885 ("ASoC: switch over to use snd_soc_register_component() on s3c i2s") +Signed-off-by: Arnd Bergmann +Reviewed-by: Krzysztof Kozlowski +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman + +--- + sound/soc/samsung/s3c-i2s-v2.c | 2 +- + sound/soc/samsung/s3c-i2s-v2.h | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +--- a/sound/soc/samsung/s3c-i2s-v2.c ++++ b/sound/soc/samsung/s3c-i2s-v2.c +@@ -709,7 +709,7 @@ static int s3c2412_i2s_resume(struct snd + #endif + + int s3c_i2sv2_register_component(struct device *dev, int id, +- struct snd_soc_component_driver *cmp_drv, ++ const struct snd_soc_component_driver *cmp_drv, + struct snd_soc_dai_driver *dai_drv) + { + struct snd_soc_dai_ops *ops = (struct snd_soc_dai_ops *)dai_drv->ops; +--- a/sound/soc/samsung/s3c-i2s-v2.h ++++ b/sound/soc/samsung/s3c-i2s-v2.h +@@ -101,7 +101,7 @@ extern int s3c_i2sv2_probe(struct snd_so + * soc core. + */ + extern int s3c_i2sv2_register_component(struct device *dev, int id, +- struct snd_soc_component_driver *cmp_drv, ++ const struct snd_soc_component_driver *cmp_drv, + struct snd_soc_dai_driver *dai_drv); + + #endif /* __SND_SOC_S3C24XX_S3C_I2SV2_I2S_H */ diff --git a/queue-4.4/asoc-ssm4567-reset-device-before-regcache_sync.patch b/queue-4.4/asoc-ssm4567-reset-device-before-regcache_sync.patch new file mode 100644 index 00000000000..efe2e3089c6 --- /dev/null +++ b/queue-4.4/asoc-ssm4567-reset-device-before-regcache_sync.patch @@ -0,0 +1,45 @@ +From 712a8038cc24dba668afe82f0413714ca87184e0 Mon Sep 17 00:00:00 2001 +From: Lars-Peter Clausen +Date: Wed, 27 Jan 2016 14:26:18 +0100 +Subject: ASoC: ssm4567: Reset device before regcache_sync() + +From: Lars-Peter Clausen + +commit 712a8038cc24dba668afe82f0413714ca87184e0 upstream. + +When the ssm4567 is powered up the driver calles regcache_sync() to restore +the register map content. regcache_sync() assumes that the device is in its +power-on reset state. Make sure that this is the case by explicitly +resetting the ssm4567 register map before calling regcache_sync() otherwise +we might end up with a incorrect register map which leads to undefined +behaviour. + +One such undefined behaviour was observed when returning from system +suspend while a playback stream is active, in that case the ssm4567 was +kept muted after resume. + +Fixes: 1ee44ce03011 ("ASoC: ssm4567: Add driver for Analog Devices SSM4567 amplifier") +Reported-by: Harsha Priya +Tested-by: Fang, Yang A +Signed-off-by: Lars-Peter Clausen +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman + +--- + sound/soc/codecs/ssm4567.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/sound/soc/codecs/ssm4567.c ++++ b/sound/soc/codecs/ssm4567.c +@@ -352,6 +352,11 @@ static int ssm4567_set_power(struct ssm4 + regcache_cache_only(ssm4567->regmap, !enable); + + if (enable) { ++ ret = regmap_write(ssm4567->regmap, SSM4567_REG_SOFT_RESET, ++ 0x00); ++ if (ret) ++ return ret; ++ + ret = regmap_update_bits(ssm4567->regmap, + SSM4567_REG_POWER_CTRL, + SSM4567_POWER_SPWDN, 0x00); diff --git a/queue-4.4/cgroup-cpuset-replace-cpuset_post_attach_flush-with-cgroup_subsys-post_attach-callback.patch b/queue-4.4/cgroup-cpuset-replace-cpuset_post_attach_flush-with-cgroup_subsys-post_attach-callback.patch new file mode 100644 index 00000000000..4de607b76d4 --- /dev/null +++ b/queue-4.4/cgroup-cpuset-replace-cpuset_post_attach_flush-with-cgroup_subsys-post_attach-callback.patch @@ -0,0 +1,127 @@ +From 5cf1cacb49aee39c3e02ae87068fc3c6430659b0 Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Thu, 21 Apr 2016 19:06:48 -0400 +Subject: cgroup, cpuset: replace cpuset_post_attach_flush() with cgroup_subsys->post_attach callback + +From: Tejun Heo + +commit 5cf1cacb49aee39c3e02ae87068fc3c6430659b0 upstream. + +Since e93ad19d0564 ("cpuset: make mm migration asynchronous"), cpuset +kicks off asynchronous NUMA node migration if necessary during task +migration and flushes it from cpuset_post_attach_flush() which is +called at the end of __cgroup_procs_write(). This is to avoid +performing migration with cgroup_threadgroup_rwsem write-locked which +can lead to deadlock through dependency on kworker creation. + +memcg has a similar issue with charge moving, so let's convert it to +an official callback rather than the current one-off cpuset specific +function. This patch adds cgroup_subsys->post_attach callback and +makes cpuset register cpuset_post_attach_flush() as its ->post_attach. + +The conversion is mostly one-to-one except that the new callback is +called under cgroup_mutex. This is to guarantee that no other +migration operations are started before ->post_attach callbacks are +finished. cgroup_mutex is one of the outermost mutex in the system +and has never been and shouldn't be a problem. We can add specialized +synchronization around __cgroup_procs_write() but I don't think +there's any noticeable benefit. + +Signed-off-by: Tejun Heo +Cc: Li Zefan +Cc: Johannes Weiner +Cc: Michal Hocko +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/cgroup-defs.h | 1 + + include/linux/cpuset.h | 6 ------ + kernel/cgroup.c | 7 +++++-- + kernel/cpuset.c | 4 ++-- + 4 files changed, 8 insertions(+), 10 deletions(-) + +--- a/include/linux/cgroup-defs.h ++++ b/include/linux/cgroup-defs.h +@@ -434,6 +434,7 @@ struct cgroup_subsys { + int (*can_attach)(struct cgroup_taskset *tset); + void (*cancel_attach)(struct cgroup_taskset *tset); + void (*attach)(struct cgroup_taskset *tset); ++ void (*post_attach)(void); + int (*can_fork)(struct task_struct *task, void **priv_p); + void (*cancel_fork)(struct task_struct *task, void *priv); + void (*fork)(struct task_struct *task, void *priv); +--- a/include/linux/cpuset.h ++++ b/include/linux/cpuset.h +@@ -137,8 +137,6 @@ static inline void set_mems_allowed(node + task_unlock(current); + } + +-extern void cpuset_post_attach_flush(void); +- + #else /* !CONFIG_CPUSETS */ + + static inline bool cpusets_enabled(void) { return false; } +@@ -245,10 +243,6 @@ static inline bool read_mems_allowed_ret + return false; + } + +-static inline void cpuset_post_attach_flush(void) +-{ +-} +- + #endif /* !CONFIG_CPUSETS */ + + #endif /* _LINUX_CPUSET_H */ +--- a/kernel/cgroup.c ++++ b/kernel/cgroup.c +@@ -2721,9 +2721,10 @@ static ssize_t __cgroup_procs_write(stru + size_t nbytes, loff_t off, bool threadgroup) + { + struct task_struct *tsk; ++ struct cgroup_subsys *ss; + struct cgroup *cgrp; + pid_t pid; +- int ret; ++ int ssid, ret; + + if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0) + return -EINVAL; +@@ -2771,8 +2772,10 @@ out_unlock_rcu: + rcu_read_unlock(); + out_unlock_threadgroup: + percpu_up_write(&cgroup_threadgroup_rwsem); ++ for_each_subsys(ss, ssid) ++ if (ss->post_attach) ++ ss->post_attach(); + cgroup_kn_unlock(of->kn); +- cpuset_post_attach_flush(); + return ret ?: nbytes; + } + +--- a/kernel/cpuset.c ++++ b/kernel/cpuset.c +@@ -57,7 +57,6 @@ + #include + #include + #include +-#include + #include + #include + +@@ -1015,7 +1014,7 @@ static void cpuset_migrate_mm(struct mm_ + } + } + +-void cpuset_post_attach_flush(void) ++static void cpuset_post_attach(void) + { + flush_workqueue(cpuset_migrate_mm_wq); + } +@@ -2083,6 +2082,7 @@ struct cgroup_subsys cpuset_cgrp_subsys + .can_attach = cpuset_can_attach, + .cancel_attach = cpuset_cancel_attach, + .attach = cpuset_attach, ++ .post_attach = cpuset_post_attach, + .bind = cpuset_bind, + .legacy_cftypes = files, + .early_init = 1, diff --git a/queue-4.4/cxl-keep-irq-mappings-on-context-teardown.patch b/queue-4.4/cxl-keep-irq-mappings-on-context-teardown.patch new file mode 100644 index 00000000000..580eb72a5aa --- /dev/null +++ b/queue-4.4/cxl-keep-irq-mappings-on-context-teardown.patch @@ -0,0 +1,44 @@ +From d6776bba44d9752f6cdf640046070e71ee4bba7b Mon Sep 17 00:00:00 2001 +From: Michael Neuling +Date: Fri, 22 Apr 2016 14:57:48 +1000 +Subject: cxl: Keep IRQ mappings on context teardown + +From: Michael Neuling + +commit d6776bba44d9752f6cdf640046070e71ee4bba7b upstream. + +Keep IRQ mappings on context teardown. This won't leak IRQs as if we +allocate the mapping again, the generic code will give the same +mapping used last time. + +Doing this works around a race in the generic code. Masking the +interrupt introduces a race which can crash the kernel or result in +IRQ that is never EOIed. The lost of EOI results in all subsequent +mappings to the same HW IRQ never receiving an interrupt. + +We've seen this race with cxl test cases which are doing heavy context +startup and teardown at the same time as heavy interrupt load. + +A fix to the generic code is being investigated also. + +Signed-off-by: Michael Neuling +Tested-by: Andrew Donnellan +Acked-by: Ian Munsie +Tested-by: Vaibhav Jain +Signed-off-by: Michael Ellerman +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/misc/cxl/irq.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/drivers/misc/cxl/irq.c ++++ b/drivers/misc/cxl/irq.c +@@ -288,7 +288,6 @@ unsigned int cxl_map_irq(struct cxl *ada + void cxl_unmap_irq(unsigned int virq, void *cookie) + { + free_irq(virq, cookie); +- irq_dispose_mapping(virq); + } + + static int cxl_register_one_irq(struct cxl *adapter, diff --git a/queue-4.4/efi-expose-non-blocking-set_variable-wrapper-to-efivars.patch b/queue-4.4/efi-expose-non-blocking-set_variable-wrapper-to-efivars.patch new file mode 100644 index 00000000000..c737d66a175 --- /dev/null +++ b/queue-4.4/efi-expose-non-blocking-set_variable-wrapper-to-efivars.patch @@ -0,0 +1,45 @@ +From 9c6672ac9c91f7eb1ec436be1442b8c26d098e55 Mon Sep 17 00:00:00 2001 +From: Ard Biesheuvel +Date: Mon, 1 Feb 2016 22:06:55 +0000 +Subject: efi: Expose non-blocking set_variable() wrapper to efivars + +From: Ard Biesheuvel + +commit 9c6672ac9c91f7eb1ec436be1442b8c26d098e55 upstream. + +Commit 6d80dba1c9fe ("efi: Provide a non-blocking SetVariable() +operation") implemented a non-blocking alternative for the UEFI +SetVariable() invocation performed by efivars, since it may +occur in atomic context. However, this version of the function +was never exposed via the efivars struct, so the non-blocking +versions was not actually callable. Fix that. + +Signed-off-by: Ard Biesheuvel +Signed-off-by: Matt Fleming +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Denys Vlasenko +Cc: H. Peter Anvin +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: linux-efi@vger.kernel.org +Fixes: 6d80dba1c9fe ("efi: Provide a non-blocking SetVariable() operation") +Link: http://lkml.kernel.org/r/1454364428-494-2-git-send-email-matt@codeblueprint.co.uk +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/firmware/efi/efi.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/firmware/efi/efi.c ++++ b/drivers/firmware/efi/efi.c +@@ -180,6 +180,7 @@ static int generic_ops_register(void) + { + generic_ops.get_variable = efi.get_variable; + generic_ops.set_variable = efi.set_variable; ++ generic_ops.set_variable_nonblocking = efi.set_variable_nonblocking; + generic_ops.get_next_variable = efi.get_next_variable; + generic_ops.query_variable_store = efi_query_variable_store; + diff --git a/queue-4.4/efi-fix-out-of-bounds-read-in-variable_matches.patch b/queue-4.4/efi-fix-out-of-bounds-read-in-variable_matches.patch new file mode 100644 index 00000000000..cc7e533432a --- /dev/null +++ b/queue-4.4/efi-fix-out-of-bounds-read-in-variable_matches.patch @@ -0,0 +1,93 @@ +From 630ba0cc7a6dbafbdee43795617c872b35cde1b4 Mon Sep 17 00:00:00 2001 +From: Laszlo Ersek +Date: Thu, 21 Apr 2016 18:21:11 +0200 +Subject: efi: Fix out-of-bounds read in variable_matches() + +From: Laszlo Ersek + +commit 630ba0cc7a6dbafbdee43795617c872b35cde1b4 upstream. + +The variable_matches() function can currently read "var_name[len]", for +example when: + + - var_name[0] == 'a', + - len == 1 + - match_name points to the NUL-terminated string "ab". + +This function is supposed to accept "var_name" inputs that are not +NUL-terminated (hence the "len" parameter"). Document the function, and +access "var_name[*match]" only if "*match" is smaller than "len". + +Reported-by: Chris Wilson +Signed-off-by: Laszlo Ersek +Cc: Peter Jones +Cc: Matthew Garrett +Cc: Jason Andryuk +Cc: Jani Nikula +Link: http://thread.gmane.org/gmane.comp.freedesktop.xorg.drivers.intel/86906 +Signed-off-by: Matt Fleming +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/firmware/efi/vars.c | 39 +++++++++++++++++++++++++++------------ + 1 file changed, 27 insertions(+), 12 deletions(-) + +--- a/drivers/firmware/efi/vars.c ++++ b/drivers/firmware/efi/vars.c +@@ -202,29 +202,44 @@ static const struct variable_validate va + { NULL_GUID, "", NULL }, + }; + ++/* ++ * Check if @var_name matches the pattern given in @match_name. ++ * ++ * @var_name: an array of @len non-NUL characters. ++ * @match_name: a NUL-terminated pattern string, optionally ending in "*". A ++ * final "*" character matches any trailing characters @var_name, ++ * including the case when there are none left in @var_name. ++ * @match: on output, the number of non-wildcard characters in @match_name ++ * that @var_name matches, regardless of the return value. ++ * @return: whether @var_name fully matches @match_name. ++ */ + static bool + variable_matches(const char *var_name, size_t len, const char *match_name, + int *match) + { + for (*match = 0; ; (*match)++) { + char c = match_name[*match]; +- char u = var_name[*match]; + +- /* Wildcard in the matching name means we've matched */ +- if (c == '*') ++ switch (c) { ++ case '*': ++ /* Wildcard in @match_name means we've matched. */ + return true; + +- /* Case sensitive match */ +- if (!c && *match == len) +- return true; +- +- if (c != u) ++ case '\0': ++ /* @match_name has ended. Has @var_name too? */ ++ return (*match == len); ++ ++ default: ++ /* ++ * We've reached a non-wildcard char in @match_name. ++ * Continue only if there's an identical character in ++ * @var_name. ++ */ ++ if (*match < len && c == var_name[*match]) ++ continue; + return false; +- +- if (!c) +- return true; ++ } + } +- return true; + } + + bool diff --git a/queue-4.4/ib-mlx5-expose-correct-max_sge_rd-limit.patch b/queue-4.4/ib-mlx5-expose-correct-max_sge_rd-limit.patch new file mode 100644 index 00000000000..a0ff2af3f67 --- /dev/null +++ b/queue-4.4/ib-mlx5-expose-correct-max_sge_rd-limit.patch @@ -0,0 +1,61 @@ +From 986ef95ecdd3eb6fa29433e68faa94c7624083be Mon Sep 17 00:00:00 2001 +From: Sagi Grimberg +Date: Thu, 31 Mar 2016 19:03:25 +0300 +Subject: IB/mlx5: Expose correct max_sge_rd limit + +From: Sagi Grimberg + +commit 986ef95ecdd3eb6fa29433e68faa94c7624083be upstream. + +mlx5 devices (Connect-IB, ConnectX-4, ConnectX-4-LX) has a limitation +where rdma read work queue entries cannot exceed 512 bytes. +A rdma_read wqe needs to fit in 512 bytes: +- wqe control segment (16 bytes) +- rdma segment (16 bytes) +- scatter elements (16 bytes each) + +So max_sge_rd should be: (512 - 16 - 16) / 16 = 30. + +Reported-by: Christoph Hellwig +Tested-by: Christoph Hellwig +Signed-off-by: Sagi Grimberg +Signed-off-by: Leon Romanovsky +Signed-off-by: Doug Ledford +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/infiniband/hw/mlx5/main.c | 2 +- + include/linux/mlx5/device.h | 11 +++++++++++ + 2 files changed, 12 insertions(+), 1 deletion(-) + +--- a/drivers/infiniband/hw/mlx5/main.c ++++ b/drivers/infiniband/hw/mlx5/main.c +@@ -273,7 +273,7 @@ static int mlx5_ib_query_device(struct i + sizeof(struct mlx5_wqe_ctrl_seg)) / + sizeof(struct mlx5_wqe_data_seg); + props->max_sge = min(max_rq_sg, max_sq_sg); +- props->max_sge_rd = props->max_sge; ++ props->max_sge_rd = MLX5_MAX_SGE_RD; + props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq); + props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1; + props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); +--- a/include/linux/mlx5/device.h ++++ b/include/linux/mlx5/device.h +@@ -334,6 +334,17 @@ enum { + MLX5_CAP_OFF_CMDIF_CSUM = 46, + }; + ++enum { ++ /* ++ * Max wqe size for rdma read is 512 bytes, so this ++ * limits our max_sge_rd as the wqe needs to fit: ++ * - ctrl segment (16 bytes) ++ * - rdma segment (16 bytes) ++ * - scatter elements (16 bytes each) ++ */ ++ MLX5_MAX_SGE_RD = (512 - 16 - 16) / 16 ++}; ++ + struct mlx5_inbox_hdr { + __be16 opcode; + u8 rsvd[4]; diff --git a/queue-4.4/ib-security-restrict-use-of-the-write-interface.patch b/queue-4.4/ib-security-restrict-use-of-the-write-interface.patch new file mode 100644 index 00000000000..1a85577931d --- /dev/null +++ b/queue-4.4/ib-security-restrict-use-of-the-write-interface.patch @@ -0,0 +1,176 @@ +From e6bd18f57aad1a2d1ef40e646d03ed0f2515c9e3 Mon Sep 17 00:00:00 2001 +From: Jason Gunthorpe +Date: Sun, 10 Apr 2016 19:13:13 -0600 +Subject: IB/security: Restrict use of the write() interface + +From: Jason Gunthorpe + +commit e6bd18f57aad1a2d1ef40e646d03ed0f2515c9e3 upstream. + +The drivers/infiniband stack uses write() as a replacement for +bi-directional ioctl(). This is not safe. There are ways to +trigger write calls that result in the return structure that +is normally written to user space being shunted off to user +specified kernel memory instead. + +For the immediate repair, detect and deny suspicious accesses to +the write API. + +For long term, update the user space libraries and the kernel API +to something that doesn't present the same security vulnerabilities +(likely a structured ioctl() interface). + +The impacted uAPI interfaces are generally only available if +hardware from drivers/infiniband is installed in the system. + +Reported-by: Jann Horn +Signed-off-by: Linus Torvalds +Signed-off-by: Jason Gunthorpe +[ Expanded check to all known write() entry points ] +Signed-off-by: Doug Ledford +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/infiniband/core/ucm.c | 4 ++++ + drivers/infiniband/core/ucma.c | 3 +++ + drivers/infiniband/core/uverbs_main.c | 5 +++++ + drivers/infiniband/hw/qib/qib_file_ops.c | 5 +++++ + drivers/staging/rdma/hfi1/TODO | 2 +- + drivers/staging/rdma/hfi1/file_ops.c | 6 ++++++ + include/rdma/ib.h | 16 ++++++++++++++++ + 7 files changed, 40 insertions(+), 1 deletion(-) + +--- a/drivers/infiniband/core/ucm.c ++++ b/drivers/infiniband/core/ucm.c +@@ -48,6 +48,7 @@ + + #include + ++#include + #include + #include + #include +@@ -1103,6 +1104,9 @@ static ssize_t ib_ucm_write(struct file + struct ib_ucm_cmd_hdr hdr; + ssize_t result; + ++ if (WARN_ON_ONCE(!ib_safe_file_access(filp))) ++ return -EACCES; ++ + if (len < sizeof(hdr)) + return -EINVAL; + +--- a/drivers/infiniband/core/ucma.c ++++ b/drivers/infiniband/core/ucma.c +@@ -1574,6 +1574,9 @@ static ssize_t ucma_write(struct file *f + struct rdma_ucm_cmd_hdr hdr; + ssize_t ret; + ++ if (WARN_ON_ONCE(!ib_safe_file_access(filp))) ++ return -EACCES; ++ + if (len < sizeof(hdr)) + return -EINVAL; + +--- a/drivers/infiniband/core/uverbs_main.c ++++ b/drivers/infiniband/core/uverbs_main.c +@@ -48,6 +48,8 @@ + + #include + ++#include ++ + #include "uverbs.h" + + MODULE_AUTHOR("Roland Dreier"); +@@ -682,6 +684,9 @@ static ssize_t ib_uverbs_write(struct fi + int srcu_key; + ssize_t ret; + ++ if (WARN_ON_ONCE(!ib_safe_file_access(filp))) ++ return -EACCES; ++ + if (count < sizeof hdr) + return -EINVAL; + +--- a/drivers/infiniband/hw/qib/qib_file_ops.c ++++ b/drivers/infiniband/hw/qib/qib_file_ops.c +@@ -45,6 +45,8 @@ + #include + #include + ++#include ++ + #include "qib.h" + #include "qib_common.h" + #include "qib_user_sdma.h" +@@ -2067,6 +2069,9 @@ static ssize_t qib_write(struct file *fp + ssize_t ret = 0; + void *dest; + ++ if (WARN_ON_ONCE(!ib_safe_file_access(fp))) ++ return -EACCES; ++ + if (count < sizeof(cmd.type)) { + ret = -EINVAL; + goto bail; +--- a/drivers/staging/rdma/hfi1/TODO ++++ b/drivers/staging/rdma/hfi1/TODO +@@ -3,4 +3,4 @@ July, 2015 + - Remove unneeded file entries in sysfs + - Remove software processing of IB protocol and place in library for use + by qib, ipath (if still present), hfi1, and eventually soft-roce +- ++- Replace incorrect uAPI +--- a/drivers/staging/rdma/hfi1/file_ops.c ++++ b/drivers/staging/rdma/hfi1/file_ops.c +@@ -62,6 +62,8 @@ + #include + #include + ++#include ++ + #include "hfi.h" + #include "pio.h" + #include "device.h" +@@ -214,6 +216,10 @@ static ssize_t hfi1_file_write(struct fi + int uctxt_required = 1; + int must_be_root = 0; + ++ /* FIXME: This interface cannot continue out of staging */ ++ if (WARN_ON_ONCE(!ib_safe_file_access(fp))) ++ return -EACCES; ++ + if (count < sizeof(cmd)) { + ret = -EINVAL; + goto bail; +--- a/include/rdma/ib.h ++++ b/include/rdma/ib.h +@@ -34,6 +34,7 @@ + #define _RDMA_IB_H + + #include ++#include + + struct ib_addr { + union { +@@ -86,4 +87,19 @@ struct sockaddr_ib { + __u64 sib_scope_id; + }; + ++/* ++ * The IB interfaces that use write() as bi-directional ioctl() are ++ * fundamentally unsafe, since there are lots of ways to trigger "write()" ++ * calls from various contexts with elevated privileges. That includes the ++ * traditional suid executable error message writes, but also various kernel ++ * interfaces that can write to file descriptors. ++ * ++ * This function provides protection for the legacy API by restricting the ++ * calling context. ++ */ ++static inline bool ib_safe_file_access(struct file *filp) ++{ ++ return filp->f_cred == current_cred() && segment_eq(get_fs(), USER_DS); ++} ++ + #endif /* _RDMA_IB_H */ diff --git a/queue-4.4/memcg-relocate-charge-moving-from-attach-to-post_attach.patch b/queue-4.4/memcg-relocate-charge-moving-from-attach-to-post_attach.patch new file mode 100644 index 00000000000..c9be290f8c9 --- /dev/null +++ b/queue-4.4/memcg-relocate-charge-moving-from-attach-to-post_attach.patch @@ -0,0 +1,168 @@ +From 264a0ae164bc0e9144bebcd25ff030d067b1a878 Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Thu, 21 Apr 2016 19:09:02 -0400 +Subject: memcg: relocate charge moving from ->attach to ->post_attach + +From: Tejun Heo + +commit 264a0ae164bc0e9144bebcd25ff030d067b1a878 upstream. + +Hello, + +So, this ended up a lot simpler than I originally expected. I tested +it lightly and it seems to work fine. Petr, can you please test these +two patches w/o the lru drain drop patch and see whether the problem +is gone? + +Thanks. +------ 8< ------ +If charge moving is used, memcg performs relabeling of the affected +pages from its ->attach callback which is called under both +cgroup_threadgroup_rwsem and thus can't create new kthreads. This is +fragile as various operations may depend on workqueues making forward +progress which relies on the ability to create new kthreads. + +There's no reason to perform charge moving from ->attach which is deep +in the task migration path. Move it to ->post_attach which is called +after the actual migration is finished and cgroup_threadgroup_rwsem is +dropped. + +* move_charge_struct->mm is added and ->can_attach is now responsible + for pinning and recording the target mm. mem_cgroup_clear_mc() is + updated accordingly. This also simplifies mem_cgroup_move_task(). + +* mem_cgroup_move_task() is now called from ->post_attach instead of + ->attach. + +Signed-off-by: Tejun Heo +Cc: Johannes Weiner +Acked-by: Michal Hocko +Debugged-and-tested-by: Petr Mladek +Reported-by: Cyril Hrubis +Reported-by: Johannes Weiner +Fixes: 1ed1328792ff ("sched, cgroup: replace signal_struct->group_rwsem with a global percpu_rwsem") +Signed-off-by: Greg Kroah-Hartman + +--- + mm/memcontrol.c | 37 +++++++++++++++++++------------------ + 1 file changed, 19 insertions(+), 18 deletions(-) + +--- a/mm/memcontrol.c ++++ b/mm/memcontrol.c +@@ -196,6 +196,7 @@ static void mem_cgroup_oom_notify(struct + /* "mc" and its members are protected by cgroup_mutex */ + static struct move_charge_struct { + spinlock_t lock; /* for from, to */ ++ struct mm_struct *mm; + struct mem_cgroup *from; + struct mem_cgroup *to; + unsigned long flags; +@@ -4800,6 +4801,8 @@ static void __mem_cgroup_clear_mc(void) + + static void mem_cgroup_clear_mc(void) + { ++ struct mm_struct *mm = mc.mm; ++ + /* + * we must clear moving_task before waking up waiters at the end of + * task migration. +@@ -4809,7 +4812,10 @@ static void mem_cgroup_clear_mc(void) + spin_lock(&mc.lock); + mc.from = NULL; + mc.to = NULL; ++ mc.mm = NULL; + spin_unlock(&mc.lock); ++ ++ mmput(mm); + } + + static int mem_cgroup_can_attach(struct cgroup_taskset *tset) +@@ -4866,6 +4872,7 @@ static int mem_cgroup_can_attach(struct + VM_BUG_ON(mc.moved_swap); + + spin_lock(&mc.lock); ++ mc.mm = mm; + mc.from = from; + mc.to = memcg; + mc.flags = move_flags; +@@ -4875,8 +4882,9 @@ static int mem_cgroup_can_attach(struct + ret = mem_cgroup_precharge_mc(mm); + if (ret) + mem_cgroup_clear_mc(); ++ } else { ++ mmput(mm); + } +- mmput(mm); + return ret; + } + +@@ -4985,11 +4993,11 @@ put: /* get_mctgt_type() gets the page + return ret; + } + +-static void mem_cgroup_move_charge(struct mm_struct *mm) ++static void mem_cgroup_move_charge(void) + { + struct mm_walk mem_cgroup_move_charge_walk = { + .pmd_entry = mem_cgroup_move_charge_pte_range, +- .mm = mm, ++ .mm = mc.mm, + }; + + lru_add_drain_all(); +@@ -5001,7 +5009,7 @@ static void mem_cgroup_move_charge(struc + atomic_inc(&mc.from->moving_account); + synchronize_rcu(); + retry: +- if (unlikely(!down_read_trylock(&mm->mmap_sem))) { ++ if (unlikely(!down_read_trylock(&mc.mm->mmap_sem))) { + /* + * Someone who are holding the mmap_sem might be waiting in + * waitq. So we cancel all extra charges, wake up all waiters, +@@ -5018,23 +5026,16 @@ retry: + * additional charge, the page walk just aborts. + */ + walk_page_range(0, ~0UL, &mem_cgroup_move_charge_walk); +- up_read(&mm->mmap_sem); ++ up_read(&mc.mm->mmap_sem); + atomic_dec(&mc.from->moving_account); + } + +-static void mem_cgroup_move_task(struct cgroup_taskset *tset) ++static void mem_cgroup_move_task(void) + { +- struct cgroup_subsys_state *css; +- struct task_struct *p = cgroup_taskset_first(tset, &css); +- struct mm_struct *mm = get_task_mm(p); +- +- if (mm) { +- if (mc.to) +- mem_cgroup_move_charge(mm); +- mmput(mm); +- } +- if (mc.to) ++ if (mc.to) { ++ mem_cgroup_move_charge(); + mem_cgroup_clear_mc(); ++ } + } + #else /* !CONFIG_MMU */ + static int mem_cgroup_can_attach(struct cgroup_taskset *tset) +@@ -5044,7 +5045,7 @@ static int mem_cgroup_can_attach(struct + static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset) + { + } +-static void mem_cgroup_move_task(struct cgroup_taskset *tset) ++static void mem_cgroup_move_task(void) + { + } + #endif +@@ -5258,7 +5259,7 @@ struct cgroup_subsys memory_cgrp_subsys + .css_reset = mem_cgroup_css_reset, + .can_attach = mem_cgroup_can_attach, + .cancel_attach = mem_cgroup_cancel_attach, +- .attach = mem_cgroup_move_task, ++ .post_attach = mem_cgroup_move_task, + .bind = mem_cgroup_bind, + .dfl_cftypes = memory_files, + .legacy_cftypes = mem_cgroup_legacy_files, diff --git a/queue-4.4/mm-huge_memory-replace-vm_no_thp-vm_bug_on-with-actual-vma-check.patch b/queue-4.4/mm-huge_memory-replace-vm_no_thp-vm_bug_on-with-actual-vma-check.patch new file mode 100644 index 00000000000..afcb24c2f48 --- /dev/null +++ b/queue-4.4/mm-huge_memory-replace-vm_no_thp-vm_bug_on-with-actual-vma-check.patch @@ -0,0 +1,56 @@ +From 3486b85a29c1741db99d0c522211c82d2b7a56d0 Mon Sep 17 00:00:00 2001 +From: Konstantin Khlebnikov +Date: Thu, 28 Apr 2016 16:18:32 -0700 +Subject: mm/huge_memory: replace VM_NO_THP VM_BUG_ON with actual VMA check + +From: Konstantin Khlebnikov + +commit 3486b85a29c1741db99d0c522211c82d2b7a56d0 upstream. + +Khugepaged detects own VMAs by checking vm_file and vm_ops but this way +it cannot distinguish private /dev/zero mappings from other special +mappings like /dev/hpet which has no vm_ops and popultes PTEs in mmap. + +This fixes false-positive VM_BUG_ON and prevents installing THP where +they are not expected. + +Link: http://lkml.kernel.org/r/CACT4Y+ZmuZMV5CjSFOeXviwQdABAgT7T+StKfTqan9YDtgEi5g@mail.gmail.com +Fixes: 78f11a255749 ("mm: thp: fix /dev/zero MAP_PRIVATE and vm_flags cleanups") +Signed-off-by: Konstantin Khlebnikov +Reported-by: Dmitry Vyukov +Acked-by: Vlastimil Babka +Acked-by: Kirill A. Shutemov +Cc: Dmitry Vyukov +Cc: Andrea Arcangeli +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/huge_memory.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -2134,10 +2134,9 @@ int khugepaged_enter_vma_merge(struct vm + * page fault if needed. + */ + return 0; +- if (vma->vm_ops) ++ if (vma->vm_ops || (vm_flags & VM_NO_THP)) + /* khugepaged not yet working on file or special mappings */ + return 0; +- VM_BUG_ON_VMA(vm_flags & VM_NO_THP, vma); + hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; + hend = vma->vm_end & HPAGE_PMD_MASK; + if (hstart < hend) +@@ -2498,8 +2497,7 @@ static bool hugepage_vma_check(struct vm + return false; + if (is_vma_temporary_stack(vma)) + return false; +- VM_BUG_ON_VMA(vma->vm_flags & VM_NO_THP, vma); +- return true; ++ return !(vma->vm_flags & VM_NO_THP); + } + + static void collapse_huge_page(struct mm_struct *mm, diff --git a/queue-4.4/mm-hwpoison-fix-wrong-num_poisoned_pages-accounting.patch b/queue-4.4/mm-hwpoison-fix-wrong-num_poisoned_pages-accounting.patch new file mode 100644 index 00000000000..7663a933268 --- /dev/null +++ b/queue-4.4/mm-hwpoison-fix-wrong-num_poisoned_pages-accounting.patch @@ -0,0 +1,45 @@ +From d7e69488bd04de165667f6bc741c1c0ec6042ab9 Mon Sep 17 00:00:00 2001 +From: Minchan Kim +Date: Thu, 28 Apr 2016 16:18:44 -0700 +Subject: mm/hwpoison: fix wrong num_poisoned_pages accounting + +From: Minchan Kim + +commit d7e69488bd04de165667f6bc741c1c0ec6042ab9 upstream. + +Currently, migration code increses num_poisoned_pages on *failed* +migration page as well as successfully migrated one at the trial of +memory-failure. It will make the stat wrong. As well, it marks the +page as PG_HWPoison even if the migration trial failed. It would mean +we cannot recover the corrupted page using memory-failure facility. + +This patches fixes it. + +Signed-off-by: Minchan Kim +Reported-by: Vlastimil Babka +Acked-by: Naoya Horiguchi +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/migrate.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/mm/migrate.c ++++ b/mm/migrate.c +@@ -963,7 +963,13 @@ out: + dec_zone_page_state(page, NR_ISOLATED_ANON + + page_is_file_cache(page)); + /* Soft-offlined page shouldn't go through lru cache list */ +- if (reason == MR_MEMORY_FAILURE) { ++ if (reason == MR_MEMORY_FAILURE && rc == MIGRATEPAGE_SUCCESS) { ++ /* ++ * With this release, we free successfully migrated ++ * page and set PG_HWPoison on just freed page ++ * intentionally. Although it's rather weird, it's how ++ * HWPoison flag works at the moment. ++ */ + put_page(page); + if (!test_set_page_hwpoison(page)) + num_poisoned_pages_inc(); diff --git a/queue-4.4/mm-vmscan-reclaim-highmem-zone-if-buffer_heads-is-over-limit.patch b/queue-4.4/mm-vmscan-reclaim-highmem-zone-if-buffer_heads-is-over-limit.patch new file mode 100644 index 00000000000..e08fe1fca68 --- /dev/null +++ b/queue-4.4/mm-vmscan-reclaim-highmem-zone-if-buffer_heads-is-over-limit.patch @@ -0,0 +1,36 @@ +From 7bf52fb891b64b8d61caf0b82060adb9db761aec Mon Sep 17 00:00:00 2001 +From: Minchan Kim +Date: Thu, 28 Apr 2016 16:18:38 -0700 +Subject: mm: vmscan: reclaim highmem zone if buffer_heads is over limit + +From: Minchan Kim + +commit 7bf52fb891b64b8d61caf0b82060adb9db761aec upstream. + +We have been reclaimed highmem zone if buffer_heads is over limit but +commit 6b4f7799c6a5 ("mm: vmscan: invoke slab shrinkers from +shrink_zone()") changed the behavior so it doesn't reclaim highmem zone +although buffer_heads is over the limit. This patch restores the logic. + +Fixes: 6b4f7799c6a5 ("mm: vmscan: invoke slab shrinkers from shrink_zone()") +Signed-off-by: Minchan Kim +Cc: Johannes Weiner +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/vmscan.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -2534,7 +2534,7 @@ static bool shrink_zones(struct zonelist + sc->gfp_mask |= __GFP_HIGHMEM; + + for_each_zone_zonelist_nodemask(zone, z, zonelist, +- requested_highidx, sc->nodemask) { ++ gfp_zone(sc->gfp_mask), sc->nodemask) { + enum zone_type classzone_idx; + + if (!populated_zone(zone)) diff --git a/queue-4.4/numa-fix-proc-pid-numa_maps-for-thp.patch b/queue-4.4/numa-fix-proc-pid-numa_maps-for-thp.patch new file mode 100644 index 00000000000..8eb81f59901 --- /dev/null +++ b/queue-4.4/numa-fix-proc-pid-numa_maps-for-thp.patch @@ -0,0 +1,170 @@ +From 28093f9f34cedeaea0f481c58446d9dac6dd620f Mon Sep 17 00:00:00 2001 +From: Gerald Schaefer +Date: Thu, 28 Apr 2016 16:18:35 -0700 +Subject: numa: fix /proc//numa_maps for THP + +From: Gerald Schaefer + +commit 28093f9f34cedeaea0f481c58446d9dac6dd620f upstream. + +In gather_pte_stats() a THP pmd is cast into a pte, which is wrong +because the layouts may differ depending on the architecture. On s390 +this will lead to inaccurate numa_maps accounting in /proc because of +misguided pte_present() and pte_dirty() checks on the fake pte. + +On other architectures pte_present() and pte_dirty() may work by chance, +but there may be an issue with direct-access (dax) mappings w/o +underlying struct pages when HAVE_PTE_SPECIAL is set and THP is +available. In vm_normal_page() the fake pte will be checked with +pte_special() and because there is no "special" bit in a pmd, this will +always return false and the VM_PFNMAP | VM_MIXEDMAP checking will be +skipped. On dax mappings w/o struct pages, an invalid struct page +pointer would then be returned that can crash the kernel. + +This patch fixes the numa_maps THP handling by introducing new "_pmd" +variants of the can_gather_numa_stats() and vm_normal_page() functions. + +Signed-off-by: Gerald Schaefer +Cc: Naoya Horiguchi +Cc: "Kirill A . Shutemov" +Cc: Konstantin Khlebnikov +Cc: Michal Hocko +Cc: Vlastimil Babka +Cc: Jerome Marchand +Cc: Johannes Weiner +Cc: Dave Hansen +Cc: Mel Gorman +Cc: Dan Williams +Cc: Martin Schwidefsky +Cc: Heiko Carstens +Cc: Michael Holzheu +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/proc/task_mmu.c | 33 ++++++++++++++++++++++++++++++--- + include/linux/mm.h | 2 ++ + mm/memory.c | 40 ++++++++++++++++++++++++++++++++++++++++ + 3 files changed, 72 insertions(+), 3 deletions(-) + +--- a/fs/proc/task_mmu.c ++++ b/fs/proc/task_mmu.c +@@ -1435,6 +1435,32 @@ static struct page *can_gather_numa_stat + return page; + } + ++#ifdef CONFIG_TRANSPARENT_HUGEPAGE ++static struct page *can_gather_numa_stats_pmd(pmd_t pmd, ++ struct vm_area_struct *vma, ++ unsigned long addr) ++{ ++ struct page *page; ++ int nid; ++ ++ if (!pmd_present(pmd)) ++ return NULL; ++ ++ page = vm_normal_page_pmd(vma, addr, pmd); ++ if (!page) ++ return NULL; ++ ++ if (PageReserved(page)) ++ return NULL; ++ ++ nid = page_to_nid(page); ++ if (!node_isset(nid, node_states[N_MEMORY])) ++ return NULL; ++ ++ return page; ++} ++#endif ++ + static int gather_pte_stats(pmd_t *pmd, unsigned long addr, + unsigned long end, struct mm_walk *walk) + { +@@ -1444,13 +1470,13 @@ static int gather_pte_stats(pmd_t *pmd, + pte_t *orig_pte; + pte_t *pte; + ++#ifdef CONFIG_TRANSPARENT_HUGEPAGE + if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { +- pte_t huge_pte = *(pte_t *)pmd; + struct page *page; + +- page = can_gather_numa_stats(huge_pte, vma, addr); ++ page = can_gather_numa_stats_pmd(*pmd, vma, addr); + if (page) +- gather_stats(page, md, pte_dirty(huge_pte), ++ gather_stats(page, md, pmd_dirty(*pmd), + HPAGE_PMD_SIZE/PAGE_SIZE); + spin_unlock(ptl); + return 0; +@@ -1458,6 +1484,7 @@ static int gather_pte_stats(pmd_t *pmd, + + if (pmd_trans_unstable(pmd)) + return 0; ++#endif + orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + do { + struct page *page = can_gather_numa_stats(*pte, vma, addr); +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -1084,6 +1084,8 @@ struct zap_details { + + struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, + pte_t pte); ++struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, ++ pmd_t pmd); + + int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, + unsigned long size); +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -797,6 +797,46 @@ out: + return pfn_to_page(pfn); + } + ++#ifdef CONFIG_TRANSPARENT_HUGEPAGE ++struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, ++ pmd_t pmd) ++{ ++ unsigned long pfn = pmd_pfn(pmd); ++ ++ /* ++ * There is no pmd_special() but there may be special pmds, e.g. ++ * in a direct-access (dax) mapping, so let's just replicate the ++ * !HAVE_PTE_SPECIAL case from vm_normal_page() here. ++ */ ++ if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { ++ if (vma->vm_flags & VM_MIXEDMAP) { ++ if (!pfn_valid(pfn)) ++ return NULL; ++ goto out; ++ } else { ++ unsigned long off; ++ off = (addr - vma->vm_start) >> PAGE_SHIFT; ++ if (pfn == vma->vm_pgoff + off) ++ return NULL; ++ if (!is_cow_mapping(vma->vm_flags)) ++ return NULL; ++ } ++ } ++ ++ if (is_zero_pfn(pfn)) ++ return NULL; ++ if (unlikely(pfn > highest_memmap_pfn)) ++ return NULL; ++ ++ /* ++ * NOTE! We still have PageReserved() pages in the page tables. ++ * eg. VDSO mappings can cause them to exist. ++ */ ++out: ++ return pfn_to_page(pfn); ++} ++#endif ++ + /* + * copy one vm_area from one task to the other. Assumes the page tables + * already present in the new task to be cleared in the whole range diff --git a/queue-4.4/series b/queue-4.4/series index 70d8f9b365f..8312baa340f 100644 --- a/queue-4.4/series +++ b/queue-4.4/series @@ -87,3 +87,24 @@ i2c-cpm-fix-build-break-due-to-incompatible-pointer-types.patch i2c-exynos5-fix-possible-abba-deadlock-by-keeping-i2c-clock-prepared.patch toshiba_acpi-fix-regression-caused-by-hotkey-enabling-value.patch edac-i7core-sb_edac-don-t-return-notify_bad-from-mce_decoder-callback.patch +asoc-s3c24xx-use-const-snd_soc_component_driver-pointer.patch +asoc-ssm4567-reset-device-before-regcache_sync.patch +asoc-dapm-make-sure-we-have-a-card-when-displaying-component-widgets.patch +asoc-rt5640-correct-the-digital-interface-data-select.patch +vb2-memops-fix-over-allocation-of-frame-vectors.patch +videobuf2-v4l2-verify-planes-array-in-buffer-dequeueing.patch +v4l2-dv-timings.h-fix-polarity-for-4k-formats.patch +cxl-keep-irq-mappings-on-context-teardown.patch +ib-mlx5-expose-correct-max_sge_rd-limit.patch +ib-security-restrict-use-of-the-write-interface.patch +efi-fix-out-of-bounds-read-in-variable_matches.patch +efi-expose-non-blocking-set_variable-wrapper-to-efivars.patch +x86-apic-handle-zero-vector-gracefully-in-clear_vector_irq.patch +workqueue-fix-ghost-pending-flag-while-doing-mq-io.patch +slub-clean-up-code-for-kmem-cgroup-support-to-kmem_cache_free_bulk.patch +cgroup-cpuset-replace-cpuset_post_attach_flush-with-cgroup_subsys-post_attach-callback.patch +memcg-relocate-charge-moving-from-attach-to-post_attach.patch +mm-huge_memory-replace-vm_no_thp-vm_bug_on-with-actual-vma-check.patch +numa-fix-proc-pid-numa_maps-for-thp.patch +mm-vmscan-reclaim-highmem-zone-if-buffer_heads-is-over-limit.patch +mm-hwpoison-fix-wrong-num_poisoned_pages-accounting.patch diff --git a/queue-4.4/slub-clean-up-code-for-kmem-cgroup-support-to-kmem_cache_free_bulk.patch b/queue-4.4/slub-clean-up-code-for-kmem-cgroup-support-to-kmem_cache_free_bulk.patch new file mode 100644 index 00000000000..92b489a4657 --- /dev/null +++ b/queue-4.4/slub-clean-up-code-for-kmem-cgroup-support-to-kmem_cache_free_bulk.patch @@ -0,0 +1,115 @@ +From 376bf125ac781d32e202760ed7deb1ae4ed35d31 Mon Sep 17 00:00:00 2001 +From: Jesper Dangaard Brouer +Date: Tue, 15 Mar 2016 14:53:32 -0700 +Subject: slub: clean up code for kmem cgroup support to kmem_cache_free_bulk + +From: Jesper Dangaard Brouer + +commit 376bf125ac781d32e202760ed7deb1ae4ed35d31 upstream. + +This change is primarily an attempt to make it easier to realize the +optimizations the compiler performs in-case CONFIG_MEMCG_KMEM is not +enabled. + +Performance wise, even when CONFIG_MEMCG_KMEM is compiled in, the +overhead is zero. This is because, as long as no process have enabled +kmem cgroups accounting, the assignment is replaced by asm-NOP +operations. This is possible because memcg_kmem_enabled() uses a +static_key_false() construct. + +It also helps readability as it avoid accessing the p[] array like: +p[size - 1] which "expose" that the array is processed backwards inside +helper function build_detached_freelist(). + +Lastly this also makes the code more robust, in error case like passing +NULL pointers in the array. Which were previously handled before commit +033745189b1b ("slub: add missing kmem cgroup support to +kmem_cache_free_bulk"). + +Fixes: 033745189b1b ("slub: add missing kmem cgroup support to kmem_cache_free_bulk") +Signed-off-by: Jesper Dangaard Brouer +Cc: Christoph Lameter +Cc: Pekka Enberg +Cc: David Rientjes +Cc: Joonsoo Kim +Cc: Vladimir Davydov +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/slub.c | 22 +++++++++++----------- + 1 file changed, 11 insertions(+), 11 deletions(-) + +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -2819,6 +2819,7 @@ struct detached_freelist { + void *tail; + void *freelist; + int cnt; ++ struct kmem_cache *s; + }; + + /* +@@ -2833,8 +2834,9 @@ struct detached_freelist { + * synchronization primitive. Look ahead in the array is limited due + * to performance reasons. + */ +-static int build_detached_freelist(struct kmem_cache *s, size_t size, +- void **p, struct detached_freelist *df) ++static inline ++int build_detached_freelist(struct kmem_cache *s, size_t size, ++ void **p, struct detached_freelist *df) + { + size_t first_skipped_index = 0; + int lookahead = 3; +@@ -2850,8 +2852,11 @@ static int build_detached_freelist(struc + if (!object) + return 0; + ++ /* Support for memcg, compiler can optimize this out */ ++ df->s = cache_from_obj(s, object); ++ + /* Start new detached freelist */ +- set_freepointer(s, object, NULL); ++ set_freepointer(df->s, object, NULL); + df->page = virt_to_head_page(object); + df->tail = object; + df->freelist = object; +@@ -2866,7 +2871,7 @@ static int build_detached_freelist(struc + /* df->page is always set at this point */ + if (df->page == virt_to_head_page(object)) { + /* Opportunity build freelist */ +- set_freepointer(s, object, df->freelist); ++ set_freepointer(df->s, object, df->freelist); + df->freelist = object; + df->cnt++; + p[size] = NULL; /* mark object processed */ +@@ -2885,25 +2890,20 @@ static int build_detached_freelist(struc + return first_skipped_index; + } + +- + /* Note that interrupts must be enabled when calling this function. */ +-void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p) ++void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) + { + if (WARN_ON(!size)) + return; + + do { + struct detached_freelist df; +- struct kmem_cache *s; +- +- /* Support for memcg */ +- s = cache_from_obj(orig_s, p[size - 1]); + + size = build_detached_freelist(s, size, p, &df); + if (unlikely(!df.page)) + continue; + +- slab_free(s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_); ++ slab_free(df.s, df.page, df.freelist, df.tail, df.cnt,_RET_IP_); + } while (likely(size)); + } + EXPORT_SYMBOL(kmem_cache_free_bulk); diff --git a/queue-4.4/v4l2-dv-timings.h-fix-polarity-for-4k-formats.patch b/queue-4.4/v4l2-dv-timings.h-fix-polarity-for-4k-formats.patch new file mode 100644 index 00000000000..a3420a8c05b --- /dev/null +++ b/queue-4.4/v4l2-dv-timings.h-fix-polarity-for-4k-formats.patch @@ -0,0 +1,120 @@ +From 3020ca711871fdaf0c15c8bab677a6bc302e28fe Mon Sep 17 00:00:00 2001 +From: Hans Verkuil +Date: Fri, 22 Apr 2016 04:00:50 -0300 +Subject: [media] v4l2-dv-timings.h: fix polarity for 4k formats + +From: Hans Verkuil + +commit 3020ca711871fdaf0c15c8bab677a6bc302e28fe upstream. + +The VSync polarity was negative instead of positive for the 4k CEA formats. +I probably copy-and-pasted these from the DMT 4k format, which does have a +negative VSync polarity. + +Signed-off-by: Hans Verkuil +Reported-by: Martin Bugge +Signed-off-by: Mauro Carvalho Chehab +Signed-off-by: Greg Kroah-Hartman + +--- + include/uapi/linux/v4l2-dv-timings.h | 30 ++++++++++++++++++++---------- + 1 file changed, 20 insertions(+), 10 deletions(-) + +--- a/include/uapi/linux/v4l2-dv-timings.h ++++ b/include/uapi/linux/v4l2-dv-timings.h +@@ -183,7 +183,8 @@ + + #define V4L2_DV_BT_CEA_3840X2160P24 { \ + .type = V4L2_DV_BT_656_1120, \ +- V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ ++ V4L2_INIT_BT_TIMINGS(3840, 2160, 0, \ ++ V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ + 297000000, 1276, 88, 296, 8, 10, 72, 0, 0, 0, \ + V4L2_DV_BT_STD_CEA861, \ + V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \ +@@ -191,14 +192,16 @@ + + #define V4L2_DV_BT_CEA_3840X2160P25 { \ + .type = V4L2_DV_BT_656_1120, \ +- V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ ++ V4L2_INIT_BT_TIMINGS(3840, 2160, 0, \ ++ V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ + 297000000, 1056, 88, 296, 8, 10, 72, 0, 0, 0, \ + V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \ + } + + #define V4L2_DV_BT_CEA_3840X2160P30 { \ + .type = V4L2_DV_BT_656_1120, \ +- V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ ++ V4L2_INIT_BT_TIMINGS(3840, 2160, 0, \ ++ V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ + 297000000, 176, 88, 296, 8, 10, 72, 0, 0, 0, \ + V4L2_DV_BT_STD_CEA861, \ + V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \ +@@ -206,14 +209,16 @@ + + #define V4L2_DV_BT_CEA_3840X2160P50 { \ + .type = V4L2_DV_BT_656_1120, \ +- V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ ++ V4L2_INIT_BT_TIMINGS(3840, 2160, 0, \ ++ V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ + 594000000, 1056, 88, 296, 8, 10, 72, 0, 0, 0, \ + V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \ + } + + #define V4L2_DV_BT_CEA_3840X2160P60 { \ + .type = V4L2_DV_BT_656_1120, \ +- V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ ++ V4L2_INIT_BT_TIMINGS(3840, 2160, 0, \ ++ V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ + 594000000, 176, 88, 296, 8, 10, 72, 0, 0, 0, \ + V4L2_DV_BT_STD_CEA861, \ + V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \ +@@ -221,7 +226,8 @@ + + #define V4L2_DV_BT_CEA_4096X2160P24 { \ + .type = V4L2_DV_BT_656_1120, \ +- V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ ++ V4L2_INIT_BT_TIMINGS(4096, 2160, 0, \ ++ V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ + 297000000, 1020, 88, 296, 8, 10, 72, 0, 0, 0, \ + V4L2_DV_BT_STD_CEA861, \ + V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \ +@@ -229,14 +235,16 @@ + + #define V4L2_DV_BT_CEA_4096X2160P25 { \ + .type = V4L2_DV_BT_656_1120, \ +- V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ ++ V4L2_INIT_BT_TIMINGS(4096, 2160, 0, \ ++ V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ + 297000000, 968, 88, 128, 8, 10, 72, 0, 0, 0, \ + V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \ + } + + #define V4L2_DV_BT_CEA_4096X2160P30 { \ + .type = V4L2_DV_BT_656_1120, \ +- V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ ++ V4L2_INIT_BT_TIMINGS(4096, 2160, 0, \ ++ V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ + 297000000, 88, 88, 128, 8, 10, 72, 0, 0, 0, \ + V4L2_DV_BT_STD_CEA861, \ + V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \ +@@ -244,14 +252,16 @@ + + #define V4L2_DV_BT_CEA_4096X2160P50 { \ + .type = V4L2_DV_BT_656_1120, \ +- V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ ++ V4L2_INIT_BT_TIMINGS(4096, 2160, 0, \ ++ V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ + 594000000, 968, 88, 128, 8, 10, 72, 0, 0, 0, \ + V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \ + } + + #define V4L2_DV_BT_CEA_4096X2160P60 { \ + .type = V4L2_DV_BT_656_1120, \ +- V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ ++ V4L2_INIT_BT_TIMINGS(4096, 2160, 0, \ ++ V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ + 594000000, 88, 88, 128, 8, 10, 72, 0, 0, 0, \ + V4L2_DV_BT_STD_CEA861, \ + V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \ diff --git a/queue-4.4/vb2-memops-fix-over-allocation-of-frame-vectors.patch b/queue-4.4/vb2-memops-fix-over-allocation-of-frame-vectors.patch new file mode 100644 index 00000000000..ee076d50705 --- /dev/null +++ b/queue-4.4/vb2-memops-fix-over-allocation-of-frame-vectors.patch @@ -0,0 +1,66 @@ +From 89a095668304e8a02502ffd35edacffdbf49aa8c Mon Sep 17 00:00:00 2001 +From: Ricardo Ribalda +Date: Thu, 3 Mar 2016 16:12:48 -0300 +Subject: [media] vb2-memops: Fix over allocation of frame vectors + +From: Ricardo Ribalda Delgado + +commit 89a095668304e8a02502ffd35edacffdbf49aa8c upstream. + +On page unaligned frames, create_framevec forces get_vaddr_frames to +allocate an extra page at the end of the buffer. Under some +circumstances, this leads to -EINVAL on VIDIOC_QBUF. + +E.g: +We have vm_a that vm_area that goes from 0x1000 to 0x3000. And a +frame that goes from 0x1800 to 0x2800, i.e. 2 pages. + +frame_vector_create will be called with the following params: + +get_vaddr_frames(0x1800, 2, write, 1, vec); + +get_vaddr will allocate the first page after checking that the memory +0x1800-0x27ff is valid, but it will not allocate the second page because +the range 0x2800-0x37ff is out of the vm_a range. This results in +create_framevec returning -EFAULT + +Error Trace: +[ 9083.793015] video0: VIDIOC_QBUF: 00:00:00.00000000 index=1, +type=vid-cap, flags=0x00002002, field=any, sequence=0, +memory=userptr, bytesused=0, offset/userptr=0x7ff2b023ca80, length=5765760 +[ 9083.793028] timecode=00:00:00 type=0, flags=0x00000000, +frames=0, userbits=0x00000000 +[ 9083.793117] video0: VIDIOC_QBUF: error -22: 00:00:00.00000000 +index=2, type=vid-cap, flags=0x00000000, field=any, sequence=0, +memory=userptr, bytesused=0, offset/userptr=0x7ff2b07bc500, length=5765760 + +Also use true instead of 1 since that argument is a bool in the +get_vaddr_frames() prototype. + +Fixes: 21fb0cb7ec65 ("[media] vb2: Provide helpers for mapping virtual addresses") + +Reported-by: Albert Antony +Signed-off-by: Ricardo Ribalda Delgado +[hans.verkuil@cisco.com: merged the 'bool' change into this patch] +Acked-by: Marek Szyprowski +Reviewed-by: Jan Kara +Signed-off-by: Hans Verkuil +Signed-off-by: Greg Kroah-Hartman + +Signed-off-by: Mauro Carvalho Chehab + +--- + drivers/media/v4l2-core/videobuf2-memops.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/media/v4l2-core/videobuf2-memops.c ++++ b/drivers/media/v4l2-core/videobuf2-memops.c +@@ -49,7 +49,7 @@ struct frame_vector *vb2_create_framevec + vec = frame_vector_create(nr); + if (!vec) + return ERR_PTR(-ENOMEM); +- ret = get_vaddr_frames(start, nr, write, 1, vec); ++ ret = get_vaddr_frames(start & PAGE_MASK, nr, write, true, vec); + if (ret < 0) + goto out_destroy; + /* We accept only complete set of PFNs */ diff --git a/queue-4.4/videobuf2-v4l2-verify-planes-array-in-buffer-dequeueing.patch b/queue-4.4/videobuf2-v4l2-verify-planes-array-in-buffer-dequeueing.patch new file mode 100644 index 00000000000..47115b7ff8c --- /dev/null +++ b/queue-4.4/videobuf2-v4l2-verify-planes-array-in-buffer-dequeueing.patch @@ -0,0 +1,55 @@ +From 2c1f6951a8a82e6de0d82b1158b5e493fc6c54ab Mon Sep 17 00:00:00 2001 +From: Sakari Ailus +Date: Sun, 3 Apr 2016 16:31:03 -0300 +Subject: [media] videobuf2-v4l2: Verify planes array in buffer dequeueing + +From: Sakari Ailus + +commit 2c1f6951a8a82e6de0d82b1158b5e493fc6c54ab upstream. + +When a buffer is being dequeued using VIDIOC_DQBUF IOCTL, the exact buffer +which will be dequeued is not known until the buffer has been removed from +the queue. The number of planes is specific to a buffer, not to the queue. + +This does lead to the situation where multi-plane buffers may be requested +and queued with n planes, but VIDIOC_DQBUF IOCTL may be passed an argument +struct with fewer planes. + +__fill_v4l2_buffer() however uses the number of planes from the dequeued +videobuf2 buffer, overwriting kernel memory (the m.planes array allocated +in video_usercopy() in v4l2-ioctl.c) if the user provided fewer +planes than the dequeued buffer had. Oops! + +Fixes: b0e0e1f83de3 ("[media] media: videobuf2: Prepare to divide videobuf2") + +Signed-off-by: Sakari Ailus +Acked-by: Hans Verkuil +Signed-off-by: Mauro Carvalho Chehab +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/media/v4l2-core/videobuf2-v4l2.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/drivers/media/v4l2-core/videobuf2-v4l2.c ++++ b/drivers/media/v4l2-core/videobuf2-v4l2.c +@@ -67,6 +67,11 @@ static int __verify_planes_array(struct + return 0; + } + ++static int __verify_planes_array_core(struct vb2_buffer *vb, const void *pb) ++{ ++ return __verify_planes_array(vb, pb); ++} ++ + /** + * __verify_length() - Verify that the bytesused value for each plane fits in + * the plane length and that the data offset doesn't exceed the bytesused value. +@@ -432,6 +437,7 @@ static int __fill_vb2_buffer(struct vb2_ + } + + static const struct vb2_buf_ops v4l2_buf_ops = { ++ .verify_planes_array = __verify_planes_array_core, + .fill_user_buffer = __fill_v4l2_buffer, + .fill_vb2_buffer = __fill_vb2_buffer, + .set_timestamp = __set_timestamp, diff --git a/queue-4.4/workqueue-fix-ghost-pending-flag-while-doing-mq-io.patch b/queue-4.4/workqueue-fix-ghost-pending-flag-while-doing-mq-io.patch new file mode 100644 index 00000000000..e24fd084da7 --- /dev/null +++ b/queue-4.4/workqueue-fix-ghost-pending-flag-while-doing-mq-io.patch @@ -0,0 +1,163 @@ +From 346c09f80459a3ad97df1816d6d606169a51001a Mon Sep 17 00:00:00 2001 +From: Roman Pen +Date: Tue, 26 Apr 2016 13:15:35 +0200 +Subject: workqueue: fix ghost PENDING flag while doing MQ IO + +From: Roman Pen + +commit 346c09f80459a3ad97df1816d6d606169a51001a upstream. + +The bug in a workqueue leads to a stalled IO request in MQ ctx->rq_list +with the following backtrace: + +[ 601.347452] INFO: task kworker/u129:5:1636 blocked for more than 120 seconds. +[ 601.347574] Tainted: G O 4.4.5-1-storage+ #6 +[ 601.347651] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. +[ 601.348142] kworker/u129:5 D ffff880803077988 0 1636 2 0x00000000 +[ 601.348519] Workqueue: ibnbd_server_fileio_wq ibnbd_dev_file_submit_io_worker [ibnbd_server] +[ 601.348999] ffff880803077988 ffff88080466b900 ffff8808033f9c80 ffff880803078000 +[ 601.349662] ffff880807c95000 7fffffffffffffff ffffffff815b0920 ffff880803077ad0 +[ 601.350333] ffff8808030779a0 ffffffff815b01d5 0000000000000000 ffff880803077a38 +[ 601.350965] Call Trace: +[ 601.351203] [] ? bit_wait+0x60/0x60 +[ 601.351444] [] schedule+0x35/0x80 +[ 601.351709] [] schedule_timeout+0x192/0x230 +[ 601.351958] [] ? blk_flush_plug_list+0xc7/0x220 +[ 601.352208] [] ? ktime_get+0x37/0xa0 +[ 601.352446] [] ? bit_wait+0x60/0x60 +[ 601.352688] [] io_schedule_timeout+0xa4/0x110 +[ 601.352951] [] ? _raw_spin_unlock_irqrestore+0xe/0x10 +[ 601.353196] [] bit_wait_io+0x1b/0x70 +[ 601.353440] [] __wait_on_bit+0x5d/0x90 +[ 601.353689] [] wait_on_page_bit+0xc0/0xd0 +[ 601.353958] [] ? autoremove_wake_function+0x40/0x40 +[ 601.354200] [] __filemap_fdatawait_range+0xe4/0x140 +[ 601.354441] [] filemap_fdatawait_range+0x14/0x30 +[ 601.354688] [] filemap_write_and_wait_range+0x3f/0x70 +[ 601.354932] [] blkdev_fsync+0x1b/0x50 +[ 601.355193] [] vfs_fsync_range+0x49/0xa0 +[ 601.355432] [] blkdev_write_iter+0xca/0x100 +[ 601.355679] [] __vfs_write+0xaa/0xe0 +[ 601.355925] [] vfs_write+0xa9/0x1a0 +[ 601.356164] [] kernel_write+0x38/0x50 + +The underlying device is a null_blk, with default parameters: + + queue_mode = MQ + submit_queues = 1 + +Verification that nullb0 has something inflight: + +root@pserver8:~# cat /sys/block/nullb0/inflight + 0 1 +root@pserver8:~# find /sys/block/nullb0/mq/0/cpu* -name rq_list -print -exec cat {} \; +... +/sys/block/nullb0/mq/0/cpu2/rq_list +CTX pending: + ffff8838038e2400 +... + +During debug it became clear that stalled request is always inserted in +the rq_list from the following path: + + save_stack_trace_tsk + 34 + blk_mq_insert_requests + 231 + blk_mq_flush_plug_list + 281 + blk_flush_plug_list + 199 + wait_on_page_bit + 192 + __filemap_fdatawait_range + 228 + filemap_fdatawait_range + 20 + filemap_write_and_wait_range + 63 + blkdev_fsync + 27 + vfs_fsync_range + 73 + blkdev_write_iter + 202 + __vfs_write + 170 + vfs_write + 169 + kernel_write + 56 + +So blk_flush_plug_list() was called with from_schedule == true. + +If from_schedule is true, that means that finally blk_mq_insert_requests() +offloads execution of __blk_mq_run_hw_queue() and uses kblockd workqueue, +i.e. it calls kblockd_schedule_delayed_work_on(). + +That means, that we race with another CPU, which is about to execute +__blk_mq_run_hw_queue() work. + +Further debugging shows the following traces from different CPUs: + + CPU#0 CPU#1 + ---------------------------------- ------------------------------- + reqeust A inserted + STORE hctx->ctx_map[0] bit marked + kblockd_schedule...() returns 1 + + request B inserted + STORE hctx->ctx_map[1] bit marked + kblockd_schedule...() returns 0 + *** WORK PENDING bit is cleared *** + flush_busy_ctxs() is executed, but + bit 1, set by CPU#1, is not observed + +As a result request B pended forever. + +This behaviour can be explained by speculative LOAD of hctx->ctx_map on +CPU#0, which is reordered with clear of PENDING bit and executed _before_ +actual STORE of bit 1 on CPU#1. + +The proper fix is an explicit full barrier , which guarantees +that clear of PENDING bit is to be executed before all possible +speculative LOADS or STORES inside actual work function. + +Signed-off-by: Roman Pen +Cc: Gioh Kim +Cc: Michael Wang +Cc: Tejun Heo +Cc: Jens Axboe +Cc: linux-block@vger.kernel.org +Cc: linux-kernel@vger.kernel.org +Signed-off-by: Tejun Heo +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/workqueue.c | 29 +++++++++++++++++++++++++++++ + 1 file changed, 29 insertions(+) + +--- a/kernel/workqueue.c ++++ b/kernel/workqueue.c +@@ -649,6 +649,35 @@ static void set_work_pool_and_clear_pend + */ + smp_wmb(); + set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0); ++ /* ++ * The following mb guarantees that previous clear of a PENDING bit ++ * will not be reordered with any speculative LOADS or STORES from ++ * work->current_func, which is executed afterwards. This possible ++ * reordering can lead to a missed execution on attempt to qeueue ++ * the same @work. E.g. consider this case: ++ * ++ * CPU#0 CPU#1 ++ * ---------------------------- -------------------------------- ++ * ++ * 1 STORE event_indicated ++ * 2 queue_work_on() { ++ * 3 test_and_set_bit(PENDING) ++ * 4 } set_..._and_clear_pending() { ++ * 5 set_work_data() # clear bit ++ * 6 smp_mb() ++ * 7 work->current_func() { ++ * 8 LOAD event_indicated ++ * } ++ * ++ * Without an explicit full barrier speculative LOAD on line 8 can ++ * be executed before CPU#0 does STORE on line 1. If that happens, ++ * CPU#0 observes the PENDING bit is still set and new execution of ++ * a @work is not queued in a hope, that CPU#1 will eventually ++ * finish the queued @work. Meanwhile CPU#1 does not see ++ * event_indicated is set, because speculative LOAD was executed ++ * before actual STORE. ++ */ ++ smp_mb(); + } + + static void clear_work_data(struct work_struct *work) diff --git a/queue-4.4/x86-apic-handle-zero-vector-gracefully-in-clear_vector_irq.patch b/queue-4.4/x86-apic-handle-zero-vector-gracefully-in-clear_vector_irq.patch new file mode 100644 index 00000000000..883dd692649 --- /dev/null +++ b/queue-4.4/x86-apic-handle-zero-vector-gracefully-in-clear_vector_irq.patch @@ -0,0 +1,44 @@ +From 1bdb8970392a68489b469c3a330a1adb5ef61beb Mon Sep 17 00:00:00 2001 +From: Keith Busch +Date: Wed, 27 Apr 2016 14:22:32 -0600 +Subject: x86/apic: Handle zero vector gracefully in clear_vector_irq() + +From: Keith Busch + +commit 1bdb8970392a68489b469c3a330a1adb5ef61beb upstream. + +If x86_vector_alloc_irq() fails x86_vector_free_irqs() is invoked to cleanup +the already allocated vectors. This subsequently calls clear_vector_irq(). + +The failed irq has no vector assigned, which triggers the BUG_ON(!vector) in +clear_vector_irq(). + +We cannot suppress the call to x86_vector_free_irqs() for the failed +interrupt, because the other data related to this irq must be cleaned up as +well. So calling clear_vector_irq() with vector == 0 is legitimate. + +Remove the BUG_ON and return if vector is zero, + +[ tglx: Massaged changelog ] + +Fixes: b5dc8e6c21e7 "x86/irq: Use hierarchical irqdomain to manage CPU interrupt vectors" +Signed-off-by: Keith Busch +Signed-off-by: Thomas Gleixner +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/apic/vector.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/arch/x86/kernel/apic/vector.c ++++ b/arch/x86/kernel/apic/vector.c +@@ -254,7 +254,8 @@ static void clear_irq_vector(int irq, st + struct irq_desc *desc; + int cpu, vector; + +- BUG_ON(!data->cfg.vector); ++ if (!data->cfg.vector) ++ return; + + vector = data->cfg.vector; + for_each_cpu_and(cpu, data->domain, cpu_online_mask)