From: Greg Kroah-Hartman Date: Sat, 13 Sep 2014 01:31:08 +0000 (-0700) Subject: 3.10-stable patches X-Git-Tag: v3.10.55~22 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=58460a6126cd6809d49eeac622ce799f62ec8541;p=thirdparty%2Fkernel%2Fstable-queue.git 3.10-stable patches added patches: acpi-cpuidle-fix-deadlock-between-cpuidle_lock-and-cpu_hotplug.lock.patch acpi-run-fixed-event-device-notifications-in-process-context.patch acpica-utilities-fix-memory-leak-in-acpi_ut_copy_iobject_to_iobject.patch asoc-max98090-fix-missing-free_irq.patch asoc-pcm-fix-dpcm_path_put-in-dpcm-runtime-update.patch asoc-pxa-ssp-drop-sndrv_pcm_fmtbit_s24_le.patch asoc-samsung-correct-i2s-dai-suspend-resume-ops.patch asoc-wm_adsp-add-missing-module_license.patch bfa-fix-undefined-bit-shift-on-big-endian-architectures-with-32-bit-dma-address.patch mnt-add-tests-for-unprivileged-remount-cases-that-have-found-to-be-faulty.patch mnt-change-the-default-remount-atime-from-relatime-to-the-existing-value.patch mnt-correct-permission-checks-in-do_remount.patch mnt-move-the-test-for-mnt_lock_readonly-from-change_mount_flags-into-do_remount.patch mnt-only-change-user-settable-mount-flags-in-remount.patch ring-buffer-always-reset-iterator-to-reader-page.patch ring-buffer-up-rb_iter_peek-loop-count-to-3.patch --- diff --git a/queue-3.10/acpi-cpuidle-fix-deadlock-between-cpuidle_lock-and-cpu_hotplug.lock.patch b/queue-3.10/acpi-cpuidle-fix-deadlock-between-cpuidle_lock-and-cpu_hotplug.lock.patch new file mode 100644 index 00000000000..fda2061753a --- /dev/null +++ b/queue-3.10/acpi-cpuidle-fix-deadlock-between-cpuidle_lock-and-cpu_hotplug.lock.patch @@ -0,0 +1,71 @@ +From 6726655dfdd2dc60c035c690d9f10cb69d7ea075 Mon Sep 17 00:00:00 2001 +From: Jiri Kosina +Date: Wed, 3 Sep 2014 15:04:28 +0200 +Subject: ACPI / cpuidle: fix deadlock between cpuidle_lock and cpu_hotplug.lock + +From: Jiri Kosina + +commit 6726655dfdd2dc60c035c690d9f10cb69d7ea075 upstream. + +There is a following AB-BA dependency between cpu_hotplug.lock and +cpuidle_lock: + +1) cpu_hotplug.lock -> cpuidle_lock +enable_nonboot_cpus() + _cpu_up() + cpu_hotplug_begin() + LOCK(cpu_hotplug.lock) + cpu_notify() + ... + acpi_processor_hotplug() + cpuidle_pause_and_lock() + LOCK(cpuidle_lock) + +2) cpuidle_lock -> cpu_hotplug.lock +acpi_os_execute_deferred() workqueue + ... + acpi_processor_cst_has_changed() + cpuidle_pause_and_lock() + LOCK(cpuidle_lock) + get_online_cpus() + LOCK(cpu_hotplug.lock) + +Fix this by reversing the order acpi_processor_cst_has_changed() does +thigs -- let it first execute the protection against CPU hotplug by +calling get_online_cpus() and obtain the cpuidle lock only after that (and +perform the symmentric change when allowing CPUs hotplug again and +dropping cpuidle lock). + +Spotted by lockdep. + +Signed-off-by: Jiri Kosina +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/acpi/processor_idle.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/acpi/processor_idle.c ++++ b/drivers/acpi/processor_idle.c +@@ -1101,9 +1101,9 @@ int acpi_processor_cst_has_changed(struc + + if (pr->id == 0 && cpuidle_get_driver() == &acpi_idle_driver) { + +- cpuidle_pause_and_lock(); + /* Protect against cpu-hotplug */ + get_online_cpus(); ++ cpuidle_pause_and_lock(); + + /* Disable all cpuidle devices */ + for_each_online_cpu(cpu) { +@@ -1130,8 +1130,8 @@ int acpi_processor_cst_has_changed(struc + cpuidle_enable_device(dev); + } + } +- put_online_cpus(); + cpuidle_resume_and_unlock(); ++ put_online_cpus(); + } + + return 0; diff --git a/queue-3.10/acpi-run-fixed-event-device-notifications-in-process-context.patch b/queue-3.10/acpi-run-fixed-event-device-notifications-in-process-context.patch new file mode 100644 index 00000000000..25685cafbda --- /dev/null +++ b/queue-3.10/acpi-run-fixed-event-device-notifications-in-process-context.patch @@ -0,0 +1,85 @@ +From 236105db632c6279a020f78c83e22eaef746006b Mon Sep 17 00:00:00 2001 +From: Lan Tianyu +Date: Tue, 26 Aug 2014 01:29:24 +0200 +Subject: ACPI: Run fixed event device notifications in process context + +From: Lan Tianyu + +commit 236105db632c6279a020f78c83e22eaef746006b upstream. + +Currently, notify callbacks for fixed button events are run from +interrupt context. That is not necessary and after commit 0bf6368ee8f2 +(ACPI / button: Add ACPI Button event via netlink routine) it causes +netlink routines to be called from interrupt context which is not +correct. + +Also, that is different from non-fixed device events (including +non-fixed button events) whose notify callbacks are all executed from +process context. + +For the above reasons, make fixed button device notify callbacks run +in process context which will avoid the deadlock when using netlink +to report button events to user space. + +Fixes: 0bf6368ee8f2 (ACPI / button: Add ACPI Button event via netlink routine) +Link: https://lkml.org/lkml/2014/8/21/606 +Reported-by: Benjamin Block +Reported-by: Knut Petersen +Signed-off-by: Lan Tianyu +[rjw: Function names, subject and changelog.] +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/acpi/scan.c | 15 ++++++++++----- + 1 file changed, 10 insertions(+), 5 deletions(-) + +--- a/drivers/acpi/scan.c ++++ b/drivers/acpi/scan.c +@@ -769,12 +769,17 @@ static void acpi_device_notify(acpi_hand + device->driver->ops.notify(device, event); + } + +-static acpi_status acpi_device_notify_fixed(void *data) ++static void acpi_device_notify_fixed(void *data) + { + struct acpi_device *device = data; + + /* Fixed hardware devices have no handles */ + acpi_device_notify(NULL, ACPI_FIXED_HARDWARE_EVENT, device); ++} ++ ++static acpi_status acpi_device_fixed_event(void *data) ++{ ++ acpi_os_execute(OSL_NOTIFY_HANDLER, acpi_device_notify_fixed, data); + return AE_OK; + } + +@@ -785,12 +790,12 @@ static int acpi_device_install_notify_ha + if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) + status = + acpi_install_fixed_event_handler(ACPI_EVENT_POWER_BUTTON, +- acpi_device_notify_fixed, ++ acpi_device_fixed_event, + device); + else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON) + status = + acpi_install_fixed_event_handler(ACPI_EVENT_SLEEP_BUTTON, +- acpi_device_notify_fixed, ++ acpi_device_fixed_event, + device); + else + status = acpi_install_notify_handler(device->handle, +@@ -807,10 +812,10 @@ static void acpi_device_remove_notify_ha + { + if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) + acpi_remove_fixed_event_handler(ACPI_EVENT_POWER_BUTTON, +- acpi_device_notify_fixed); ++ acpi_device_fixed_event); + else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON) + acpi_remove_fixed_event_handler(ACPI_EVENT_SLEEP_BUTTON, +- acpi_device_notify_fixed); ++ acpi_device_fixed_event); + else + acpi_remove_notify_handler(device->handle, ACPI_DEVICE_NOTIFY, + acpi_device_notify); diff --git a/queue-3.10/acpica-utilities-fix-memory-leak-in-acpi_ut_copy_iobject_to_iobject.patch b/queue-3.10/acpica-utilities-fix-memory-leak-in-acpi_ut_copy_iobject_to_iobject.patch new file mode 100644 index 00000000000..d8bfc759aca --- /dev/null +++ b/queue-3.10/acpica-utilities-fix-memory-leak-in-acpi_ut_copy_iobject_to_iobject.patch @@ -0,0 +1,42 @@ +From 8aa5e56eeb61a099ea6519eb30ee399e1bc043ce Mon Sep 17 00:00:00 2001 +From: "David E. Box" +Date: Tue, 8 Jul 2014 10:05:52 +0800 +Subject: ACPICA: Utilities: Fix memory leak in acpi_ut_copy_iobject_to_iobject + +From: "David E. Box" + +commit 8aa5e56eeb61a099ea6519eb30ee399e1bc043ce upstream. + +Adds return status check on copy routines to delete the allocated destination +object if either copy fails. Reported by Colin Ian King on bugs.acpica.org, +Bug 1087. +The last applicable commit: + Commit: 3371c19c294a4cb3649aa4e84606be8a1d999e61 + Subject: ACPICA: Remove ACPI_GET_OBJECT_TYPE macro + +Link: https://bugs.acpica.org/show_bug.cgi?id=1087 +Reported-by: Colin Ian King +Signed-off-by: David E. Box +Signed-off-by: Bob Moore +Signed-off-by: Lv Zheng +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/acpi/acpica/utcopy.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/drivers/acpi/acpica/utcopy.c ++++ b/drivers/acpi/acpica/utcopy.c +@@ -998,5 +998,11 @@ acpi_ut_copy_iobject_to_iobject(union ac + status = acpi_ut_copy_simple_object(source_desc, *dest_desc); + } + ++ /* Delete the allocated object if copy failed */ ++ ++ if (ACPI_FAILURE(status)) { ++ acpi_ut_remove_reference(*dest_desc); ++ } ++ + return_ACPI_STATUS(status); + } diff --git a/queue-3.10/asoc-max98090-fix-missing-free_irq.patch b/queue-3.10/asoc-max98090-fix-missing-free_irq.patch new file mode 100644 index 00000000000..d8599ce85e3 --- /dev/null +++ b/queue-3.10/asoc-max98090-fix-missing-free_irq.patch @@ -0,0 +1,34 @@ +From 4adeb0ccf86a5af1825bbfe290dee9e60a5ab870 Mon Sep 17 00:00:00 2001 +From: Jarkko Nikula +Date: Thu, 19 Jun 2014 09:32:05 +0300 +Subject: ASoC: max98090: Fix missing free_irq + +From: Jarkko Nikula + +commit 4adeb0ccf86a5af1825bbfe290dee9e60a5ab870 upstream. + +max98090.c doesn't free the threaded interrupt it requests. This causes +an oops when doing "cat /proc/interrupts" after snd-soc-max98090.ko is +unloaded. + +Fix this by requesting the interrupt by using devm_request_threaded_irq(). + +Signed-off-by: Jarkko Nikula +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman + +--- + sound/soc/codecs/max98090.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/sound/soc/codecs/max98090.c ++++ b/sound/soc/codecs/max98090.c +@@ -2234,7 +2234,7 @@ static int max98090_probe(struct snd_soc + /* Register for interrupts */ + dev_dbg(codec->dev, "irq = %d\n", max98090->irq); + +- ret = request_threaded_irq(max98090->irq, NULL, ++ ret = devm_request_threaded_irq(codec->dev, max98090->irq, NULL, + max98090_interrupt, IRQF_TRIGGER_FALLING | IRQF_ONESHOT, + "max98090_interrupt", codec); + if (ret < 0) { diff --git a/queue-3.10/asoc-pcm-fix-dpcm_path_put-in-dpcm-runtime-update.patch b/queue-3.10/asoc-pcm-fix-dpcm_path_put-in-dpcm-runtime-update.patch new file mode 100644 index 00000000000..6fec0080f32 --- /dev/null +++ b/queue-3.10/asoc-pcm-fix-dpcm_path_put-in-dpcm-runtime-update.patch @@ -0,0 +1,31 @@ +From 7ed9de76ff342cbd717a9cf897044b99272cb8f8 Mon Sep 17 00:00:00 2001 +From: Qiao Zhou +Date: Wed, 4 Jun 2014 19:42:06 +0800 +Subject: ASoC: pcm: fix dpcm_path_put in dpcm runtime update + +From: Qiao Zhou + +commit 7ed9de76ff342cbd717a9cf897044b99272cb8f8 upstream. + +we need to release dapm widget list after dpcm_path_get in +soc_dpcm_runtime_update. otherwise, there will be potential memory +leak. add dpcm_path_put to fix it. + +Signed-off-by: Qiao Zhou +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman + +--- + sound/soc/soc-pcm.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/sound/soc/soc-pcm.c ++++ b/sound/soc/soc-pcm.c +@@ -1886,6 +1886,7 @@ int soc_dpcm_runtime_update(struct snd_s + dpcm_be_disconnect(fe, SNDRV_PCM_STREAM_PLAYBACK); + } + ++ dpcm_path_put(&list); + capture: + /* skip if FE doesn't have capture capability */ + if (!fe->cpu_dai->driver->capture.channels_min) diff --git a/queue-3.10/asoc-pxa-ssp-drop-sndrv_pcm_fmtbit_s24_le.patch b/queue-3.10/asoc-pxa-ssp-drop-sndrv_pcm_fmtbit_s24_le.patch new file mode 100644 index 00000000000..f4c76e09865 --- /dev/null +++ b/queue-3.10/asoc-pxa-ssp-drop-sndrv_pcm_fmtbit_s24_le.patch @@ -0,0 +1,34 @@ +From 9301503af016eb537ccce76adec0c1bb5c84871e Mon Sep 17 00:00:00 2001 +From: Daniel Mack +Date: Wed, 13 Aug 2014 21:51:06 +0200 +Subject: ASoC: pxa-ssp: drop SNDRV_PCM_FMTBIT_S24_LE + +From: Daniel Mack + +commit 9301503af016eb537ccce76adec0c1bb5c84871e upstream. + +This mode is unsupported, as the DMA controller can't do zero-padding +of samples. + +Signed-off-by: Daniel Mack +Reported-by: Johannes Stezenbach +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman + +--- + sound/soc/pxa/pxa-ssp.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/sound/soc/pxa/pxa-ssp.c ++++ b/sound/soc/pxa/pxa-ssp.c +@@ -757,9 +757,7 @@ static int pxa_ssp_remove(struct snd_soc + SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_64000 | \ + SNDRV_PCM_RATE_88200 | SNDRV_PCM_RATE_96000) + +-#define PXA_SSP_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\ +- SNDRV_PCM_FMTBIT_S24_LE | \ +- SNDRV_PCM_FMTBIT_S32_LE) ++#define PXA_SSP_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S32_LE) + + static const struct snd_soc_dai_ops pxa_ssp_dai_ops = { + .startup = pxa_ssp_startup, diff --git a/queue-3.10/asoc-samsung-correct-i2s-dai-suspend-resume-ops.patch b/queue-3.10/asoc-samsung-correct-i2s-dai-suspend-resume-ops.patch new file mode 100644 index 00000000000..547866affb7 --- /dev/null +++ b/queue-3.10/asoc-samsung-correct-i2s-dai-suspend-resume-ops.patch @@ -0,0 +1,55 @@ +From d3d4e5247b013008a39e4d5f69ce4c60ed57f997 Mon Sep 17 00:00:00 2001 +From: Sylwester Nawrocki +Date: Fri, 4 Jul 2014 16:05:45 +0200 +Subject: ASoC: samsung: Correct I2S DAI suspend/resume ops + +From: Sylwester Nawrocki + +commit d3d4e5247b013008a39e4d5f69ce4c60ed57f997 upstream. + +We should save/restore relevant I2S registers regardless of +the dai->active flag, otherwise some settings are being lost +after system suspend/resume cycle. E.g. I2S slave mode set only +during dai initialization is not preserved and the device ends +up in master mode after system resume. + +Signed-off-by: Sylwester Nawrocki +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman + +--- + sound/soc/samsung/i2s.c | 16 ++++++---------- + 1 file changed, 6 insertions(+), 10 deletions(-) + +--- a/sound/soc/samsung/i2s.c ++++ b/sound/soc/samsung/i2s.c +@@ -853,11 +853,9 @@ static int i2s_suspend(struct snd_soc_da + { + struct i2s_dai *i2s = to_info(dai); + +- if (dai->active) { +- i2s->suspend_i2smod = readl(i2s->addr + I2SMOD); +- i2s->suspend_i2scon = readl(i2s->addr + I2SCON); +- i2s->suspend_i2spsr = readl(i2s->addr + I2SPSR); +- } ++ i2s->suspend_i2smod = readl(i2s->addr + I2SMOD); ++ i2s->suspend_i2scon = readl(i2s->addr + I2SCON); ++ i2s->suspend_i2spsr = readl(i2s->addr + I2SPSR); + + return 0; + } +@@ -866,11 +864,9 @@ static int i2s_resume(struct snd_soc_dai + { + struct i2s_dai *i2s = to_info(dai); + +- if (dai->active) { +- writel(i2s->suspend_i2scon, i2s->addr + I2SCON); +- writel(i2s->suspend_i2smod, i2s->addr + I2SMOD); +- writel(i2s->suspend_i2spsr, i2s->addr + I2SPSR); +- } ++ writel(i2s->suspend_i2scon, i2s->addr + I2SCON); ++ writel(i2s->suspend_i2smod, i2s->addr + I2SMOD); ++ writel(i2s->suspend_i2spsr, i2s->addr + I2SPSR); + + return 0; + } diff --git a/queue-3.10/asoc-wm_adsp-add-missing-module_license.patch b/queue-3.10/asoc-wm_adsp-add-missing-module_license.patch new file mode 100644 index 00000000000..195a0af9ef8 --- /dev/null +++ b/queue-3.10/asoc-wm_adsp-add-missing-module_license.patch @@ -0,0 +1,30 @@ +From 0a37c6efec4a2fdc2563c5a8faa472b814deee80 Mon Sep 17 00:00:00 2001 +From: Praveen Diwakar +Date: Fri, 4 Jul 2014 11:17:41 +0530 +Subject: ASoC: wm_adsp: Add missing MODULE_LICENSE + +From: Praveen Diwakar + +commit 0a37c6efec4a2fdc2563c5a8faa472b814deee80 upstream. + +Since MODULE_LICENSE is missing the module load fails, +so add this for module. + +Signed-off-by: Praveen Diwakar +Signed-off-by: Vinod Koul +Reviewed-by: Charles Keepax +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman + +--- + sound/soc/codecs/wm_adsp.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/sound/soc/codecs/wm_adsp.c ++++ b/sound/soc/codecs/wm_adsp.c +@@ -1284,3 +1284,5 @@ int wm_adsp2_init(struct wm_adsp *adsp, + return 0; + } + EXPORT_SYMBOL_GPL(wm_adsp2_init); ++ ++MODULE_LICENSE("GPL v2"); diff --git a/queue-3.10/bfa-fix-undefined-bit-shift-on-big-endian-architectures-with-32-bit-dma-address.patch b/queue-3.10/bfa-fix-undefined-bit-shift-on-big-endian-architectures-with-32-bit-dma-address.patch new file mode 100644 index 00000000000..c1919e1577d --- /dev/null +++ b/queue-3.10/bfa-fix-undefined-bit-shift-on-big-endian-architectures-with-32-bit-dma-address.patch @@ -0,0 +1,49 @@ +From 03a6c3ff3282ee9fa893089304d951e0be93a144 Mon Sep 17 00:00:00 2001 +From: Ben Hutchings +Date: Sun, 8 Jun 2014 23:33:25 +0100 +Subject: bfa: Fix undefined bit shift on big-endian architectures with 32-bit DMA address + +From: Ben Hutchings + +commit 03a6c3ff3282ee9fa893089304d951e0be93a144 upstream. + +bfa_swap_words() shifts its argument (assumed to be 64-bit) by 32 bits +each way. In two places the argument type is dma_addr_t, which may be +32-bit, in which case the effect of the bit shift is undefined: + +drivers/scsi/bfa/bfa_fcpim.c: In function 'bfa_ioim_send_ioreq': +drivers/scsi/bfa/bfa_fcpim.c:2497:4: warning: left shift count >= width of type [enabled by default] + addr = bfa_sgaddr_le(sg_dma_address(sg)); + ^ +drivers/scsi/bfa/bfa_fcpim.c:2497:4: warning: right shift count >= width of type [enabled by default] +drivers/scsi/bfa/bfa_fcpim.c:2509:4: warning: left shift count >= width of type [enabled by default] + addr = bfa_sgaddr_le(sg_dma_address(sg)); + ^ +drivers/scsi/bfa/bfa_fcpim.c:2509:4: warning: right shift count >= width of type [enabled by default] + +Avoid this by adding casts to u64 in bfa_swap_words(). + +Compile-tested only. + +Signed-off-by: Ben Hutchings +Reviewed-by: Martin K. Petersen +Acked-by: Anil Gurumurthy +Fixes: f16a17507b09 ('[SCSI] bfa: remove all OS wrappers') +Signed-off-by: Christoph Hellwig +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/scsi/bfa/bfa_ioc.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/scsi/bfa/bfa_ioc.h ++++ b/drivers/scsi/bfa/bfa_ioc.h +@@ -72,7 +72,7 @@ struct bfa_sge_s { + } while (0) + + #define bfa_swap_words(_x) ( \ +- ((_x) << 32) | ((_x) >> 32)) ++ ((u64)(_x) << 32) | ((u64)(_x) >> 32)) + + #ifdef __BIG_ENDIAN + #define bfa_sge_to_be(_x) diff --git a/queue-3.10/mnt-add-tests-for-unprivileged-remount-cases-that-have-found-to-be-faulty.patch b/queue-3.10/mnt-add-tests-for-unprivileged-remount-cases-that-have-found-to-be-faulty.patch new file mode 100644 index 00000000000..890aa07d6b6 --- /dev/null +++ b/queue-3.10/mnt-add-tests-for-unprivileged-remount-cases-that-have-found-to-be-faulty.patch @@ -0,0 +1,309 @@ +From db181ce011e3c033328608299cd6fac06ea50130 Mon Sep 17 00:00:00 2001 +From: "Eric W. Biederman" +Date: Tue, 29 Jul 2014 15:50:44 -0700 +Subject: mnt: Add tests for unprivileged remount cases that have found to be faulty + +From: "Eric W. Biederman" + +commit db181ce011e3c033328608299cd6fac06ea50130 upstream. + +Kenton Varda discovered that by remounting a +read-only bind mount read-only in a user namespace the +MNT_LOCK_READONLY bit would be cleared, allowing an unprivileged user +to the remount a read-only mount read-write. + +Upon review of the code in remount it was discovered that the code allowed +nosuid, noexec, and nodev to be cleared. It was also discovered that +the code was allowing the per mount atime flags to be changed. + +The first naive patch to fix these issues contained the flaw that using +default atime settings when remounting a filesystem could be disallowed. + +To avoid this problems in the future add tests to ensure unprivileged +remounts are succeeding and failing at the appropriate times. + +Acked-by: Serge E. Hallyn +Signed-off-by: "Eric W. Biederman" +Signed-off-by: Greg Kroah-Hartman + +--- + tools/testing/selftests/Makefile | 1 + tools/testing/selftests/mount/Makefile | 17 + tools/testing/selftests/mount/unprivileged-remount-test.c | 242 ++++++++++++++ + 3 files changed, 260 insertions(+) + +--- a/tools/testing/selftests/Makefile ++++ b/tools/testing/selftests/Makefile +@@ -4,6 +4,7 @@ TARGETS += efivarfs + TARGETS += kcmp + TARGETS += memory-hotplug + TARGETS += mqueue ++TARGETS += mount + TARGETS += net + TARGETS += ptrace + TARGETS += vm +--- /dev/null ++++ b/tools/testing/selftests/mount/Makefile +@@ -0,0 +1,17 @@ ++# Makefile for mount selftests. ++ ++all: unprivileged-remount-test ++ ++unprivileged-remount-test: unprivileged-remount-test.c ++ gcc -Wall -O2 unprivileged-remount-test.c -o unprivileged-remount-test ++ ++# Allow specific tests to be selected. ++test_unprivileged_remount: unprivileged-remount-test ++ @if [ -f /proc/self/uid_map ] ; then ./unprivileged-remount-test ; fi ++ ++run_tests: all test_unprivileged_remount ++ ++clean: ++ rm -f unprivileged-remount-test ++ ++.PHONY: all test_unprivileged_remount +--- /dev/null ++++ b/tools/testing/selftests/mount/unprivileged-remount-test.c +@@ -0,0 +1,242 @@ ++#define _GNU_SOURCE ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifndef CLONE_NEWNS ++# define CLONE_NEWNS 0x00020000 ++#endif ++#ifndef CLONE_NEWUTS ++# define CLONE_NEWUTS 0x04000000 ++#endif ++#ifndef CLONE_NEWIPC ++# define CLONE_NEWIPC 0x08000000 ++#endif ++#ifndef CLONE_NEWNET ++# define CLONE_NEWNET 0x40000000 ++#endif ++#ifndef CLONE_NEWUSER ++# define CLONE_NEWUSER 0x10000000 ++#endif ++#ifndef CLONE_NEWPID ++# define CLONE_NEWPID 0x20000000 ++#endif ++ ++#ifndef MS_RELATIME ++#define MS_RELATIME (1 << 21) ++#endif ++#ifndef MS_STRICTATIME ++#define MS_STRICTATIME (1 << 24) ++#endif ++ ++static void die(char *fmt, ...) ++{ ++ va_list ap; ++ va_start(ap, fmt); ++ vfprintf(stderr, fmt, ap); ++ va_end(ap); ++ exit(EXIT_FAILURE); ++} ++ ++static void write_file(char *filename, char *fmt, ...) ++{ ++ char buf[4096]; ++ int fd; ++ ssize_t written; ++ int buf_len; ++ va_list ap; ++ ++ va_start(ap, fmt); ++ buf_len = vsnprintf(buf, sizeof(buf), fmt, ap); ++ va_end(ap); ++ if (buf_len < 0) { ++ die("vsnprintf failed: %s\n", ++ strerror(errno)); ++ } ++ if (buf_len >= sizeof(buf)) { ++ die("vsnprintf output truncated\n"); ++ } ++ ++ fd = open(filename, O_WRONLY); ++ if (fd < 0) { ++ die("open of %s failed: %s\n", ++ filename, strerror(errno)); ++ } ++ written = write(fd, buf, buf_len); ++ if (written != buf_len) { ++ if (written >= 0) { ++ die("short write to %s\n", filename); ++ } else { ++ die("write to %s failed: %s\n", ++ filename, strerror(errno)); ++ } ++ } ++ if (close(fd) != 0) { ++ die("close of %s failed: %s\n", ++ filename, strerror(errno)); ++ } ++} ++ ++static void create_and_enter_userns(void) ++{ ++ uid_t uid; ++ gid_t gid; ++ ++ uid = getuid(); ++ gid = getgid(); ++ ++ if (unshare(CLONE_NEWUSER) !=0) { ++ die("unshare(CLONE_NEWUSER) failed: %s\n", ++ strerror(errno)); ++ } ++ ++ write_file("/proc/self/uid_map", "0 %d 1", uid); ++ write_file("/proc/self/gid_map", "0 %d 1", gid); ++ ++ if (setgroups(0, NULL) != 0) { ++ die("setgroups failed: %s\n", ++ strerror(errno)); ++ } ++ if (setgid(0) != 0) { ++ die ("setgid(0) failed %s\n", ++ strerror(errno)); ++ } ++ if (setuid(0) != 0) { ++ die("setuid(0) failed %s\n", ++ strerror(errno)); ++ } ++} ++ ++static ++bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags) ++{ ++ pid_t child; ++ ++ child = fork(); ++ if (child == -1) { ++ die("fork failed: %s\n", ++ strerror(errno)); ++ } ++ if (child != 0) { /* parent */ ++ pid_t pid; ++ int status; ++ pid = waitpid(child, &status, 0); ++ if (pid == -1) { ++ die("waitpid failed: %s\n", ++ strerror(errno)); ++ } ++ if (pid != child) { ++ die("waited for %d got %d\n", ++ child, pid); ++ } ++ if (!WIFEXITED(status)) { ++ die("child did not terminate cleanly\n"); ++ } ++ return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false; ++ } ++ ++ create_and_enter_userns(); ++ if (unshare(CLONE_NEWNS) != 0) { ++ die("unshare(CLONE_NEWNS) failed: %s\n", ++ strerror(errno)); ++ } ++ ++ if (mount("testing", "/tmp", "ramfs", mount_flags, NULL) != 0) { ++ die("mount of /tmp failed: %s\n", ++ strerror(errno)); ++ } ++ ++ create_and_enter_userns(); ++ ++ if (unshare(CLONE_NEWNS) != 0) { ++ die("unshare(CLONE_NEWNS) failed: %s\n", ++ strerror(errno)); ++ } ++ ++ if (mount("/tmp", "/tmp", "none", ++ MS_REMOUNT | MS_BIND | remount_flags, NULL) != 0) { ++ /* system("cat /proc/self/mounts"); */ ++ die("remount of /tmp failed: %s\n", ++ strerror(errno)); ++ } ++ ++ if (mount("/tmp", "/tmp", "none", ++ MS_REMOUNT | MS_BIND | invalid_flags, NULL) == 0) { ++ /* system("cat /proc/self/mounts"); */ ++ die("remount of /tmp with invalid flags " ++ "succeeded unexpectedly\n"); ++ } ++ exit(EXIT_SUCCESS); ++} ++ ++static bool test_unpriv_remount_simple(int mount_flags) ++{ ++ return test_unpriv_remount(mount_flags, mount_flags, 0); ++} ++ ++static bool test_unpriv_remount_atime(int mount_flags, int invalid_flags) ++{ ++ return test_unpriv_remount(mount_flags, mount_flags, invalid_flags); ++} ++ ++int main(int argc, char **argv) ++{ ++ if (!test_unpriv_remount_simple(MS_RDONLY|MS_NODEV)) { ++ die("MS_RDONLY malfunctions\n"); ++ } ++ if (!test_unpriv_remount_simple(MS_NODEV)) { ++ die("MS_NODEV malfunctions\n"); ++ } ++ if (!test_unpriv_remount_simple(MS_NOSUID|MS_NODEV)) { ++ die("MS_NOSUID malfunctions\n"); ++ } ++ if (!test_unpriv_remount_simple(MS_NOEXEC|MS_NODEV)) { ++ die("MS_NOEXEC malfunctions\n"); ++ } ++ if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODEV, ++ MS_NOATIME|MS_NODEV)) ++ { ++ die("MS_RELATIME malfunctions\n"); ++ } ++ if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODEV, ++ MS_NOATIME|MS_NODEV)) ++ { ++ die("MS_STRICTATIME malfunctions\n"); ++ } ++ if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODEV, ++ MS_STRICTATIME|MS_NODEV)) ++ { ++ die("MS_RELATIME malfunctions\n"); ++ } ++ if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODIRATIME|MS_NODEV, ++ MS_NOATIME|MS_NODEV)) ++ { ++ die("MS_RELATIME malfunctions\n"); ++ } ++ if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODIRATIME|MS_NODEV, ++ MS_NOATIME|MS_NODEV)) ++ { ++ die("MS_RELATIME malfunctions\n"); ++ } ++ if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODIRATIME|MS_NODEV, ++ MS_STRICTATIME|MS_NODEV)) ++ { ++ die("MS_RELATIME malfunctions\n"); ++ } ++ if (!test_unpriv_remount(MS_STRICTATIME|MS_NODEV, MS_NODEV, ++ MS_NOATIME|MS_NODEV)) ++ { ++ die("Default atime malfunctions\n"); ++ } ++ return EXIT_SUCCESS; ++} diff --git a/queue-3.10/mnt-change-the-default-remount-atime-from-relatime-to-the-existing-value.patch b/queue-3.10/mnt-change-the-default-remount-atime-from-relatime-to-the-existing-value.patch new file mode 100644 index 00000000000..0d7caa3027d --- /dev/null +++ b/queue-3.10/mnt-change-the-default-remount-atime-from-relatime-to-the-existing-value.patch @@ -0,0 +1,57 @@ +From ffbc6f0ead47fa5a1dc9642b0331cb75c20a640e Mon Sep 17 00:00:00 2001 +From: "Eric W. Biederman" +Date: Mon, 28 Jul 2014 17:36:04 -0700 +Subject: mnt: Change the default remount atime from relatime to the existing value + +From: "Eric W. Biederman" + +commit ffbc6f0ead47fa5a1dc9642b0331cb75c20a640e upstream. + +Since March 2009 the kernel has treated the state that if no +MS_..ATIME flags are passed then the kernel defaults to relatime. + +Defaulting to relatime instead of the existing atime state during a +remount is silly, and causes problems in practice for people who don't +specify any MS_...ATIME flags and to get the default filesystem atime +setting. Those users may encounter a permission error because the +default atime setting does not work. + +A default that does not work and causes permission problems is +ridiculous, so preserve the existing value to have a default +atime setting that is always guaranteed to work. + +Using the default atime setting in this way is particularly +interesting for applications built to run in restricted userspace +environments without /proc mounted, as the existing atime mount +options of a filesystem can not be read from /proc/mounts. + +In practice this fixes user space that uses the default atime +setting on remount that are broken by the permission checks +keeping less privileged users from changing more privileged users +atime settings. + +Acked-by: Serge E. Hallyn +Signed-off-by: "Eric W. Biederman" +Signed-off-by: Greg Kroah-Hartman + +--- + fs/namespace.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/fs/namespace.c ++++ b/fs/namespace.c +@@ -2346,6 +2346,14 @@ long do_mount(const char *dev_name, cons + if (flags & MS_RDONLY) + mnt_flags |= MNT_READONLY; + ++ /* The default atime for remount is preservation */ ++ if ((flags & MS_REMOUNT) && ++ ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME | ++ MS_STRICTATIME)) == 0)) { ++ mnt_flags &= ~MNT_ATIME_MASK; ++ mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK; ++ } ++ + flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN | + MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT | + MS_STRICTATIME); diff --git a/queue-3.10/mnt-correct-permission-checks-in-do_remount.patch b/queue-3.10/mnt-correct-permission-checks-in-do_remount.patch new file mode 100644 index 00000000000..f6a5fa554d1 --- /dev/null +++ b/queue-3.10/mnt-correct-permission-checks-in-do_remount.patch @@ -0,0 +1,127 @@ +From 9566d6742852c527bf5af38af5cbb878dad75705 Mon Sep 17 00:00:00 2001 +From: "Eric W. Biederman" +Date: Mon, 28 Jul 2014 17:26:07 -0700 +Subject: mnt: Correct permission checks in do_remount + +From: "Eric W. Biederman" + +commit 9566d6742852c527bf5af38af5cbb878dad75705 upstream. + +While invesgiating the issue where in "mount --bind -oremount,ro ..." +would result in later "mount --bind -oremount,rw" succeeding even if +the mount started off locked I realized that there are several +additional mount flags that should be locked and are not. + +In particular MNT_NOSUID, MNT_NODEV, MNT_NOEXEC, and the atime +flags in addition to MNT_READONLY should all be locked. These +flags are all per superblock, can all be changed with MS_BIND, +and should not be changable if set by a more privileged user. + +The following additions to the current logic are added in this patch. +- nosuid may not be clearable by a less privileged user. +- nodev may not be clearable by a less privielged user. +- noexec may not be clearable by a less privileged user. +- atime flags may not be changeable by a less privileged user. + +The logic with atime is that always setting atime on access is a +global policy and backup software and auditing software could break if +atime bits are not updated (when they are configured to be updated), +and serious performance degradation could result (DOS attack) if atime +updates happen when they have been explicitly disabled. Therefore an +unprivileged user should not be able to mess with the atime bits set +by a more privileged user. + +The additional restrictions are implemented with the addition of +MNT_LOCK_NOSUID, MNT_LOCK_NODEV, MNT_LOCK_NOEXEC, and MNT_LOCK_ATIME +mnt flags. + +Taken together these changes and the fixes for MNT_LOCK_READONLY +should make it safe for an unprivileged user to create a user +namespace and to call "mount --bind -o remount,... ..." without +the danger of mount flags being changed maliciously. + +Acked-by: Serge E. Hallyn +Signed-off-by: "Eric W. Biederman" +Signed-off-by: Greg Kroah-Hartman + +--- + fs/namespace.c | 36 +++++++++++++++++++++++++++++++++--- + include/linux/mount.h | 5 +++++ + 2 files changed, 38 insertions(+), 3 deletions(-) + +--- a/fs/namespace.c ++++ b/fs/namespace.c +@@ -828,8 +828,21 @@ static struct mount *clone_mnt(struct mo + + mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD; + /* Don't allow unprivileged users to change mount flags */ +- if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY)) +- mnt->mnt.mnt_flags |= MNT_LOCK_READONLY; ++ if (flag & CL_UNPRIVILEGED) { ++ mnt->mnt.mnt_flags |= MNT_LOCK_ATIME; ++ ++ if (mnt->mnt.mnt_flags & MNT_READONLY) ++ mnt->mnt.mnt_flags |= MNT_LOCK_READONLY; ++ ++ if (mnt->mnt.mnt_flags & MNT_NODEV) ++ mnt->mnt.mnt_flags |= MNT_LOCK_NODEV; ++ ++ if (mnt->mnt.mnt_flags & MNT_NOSUID) ++ mnt->mnt.mnt_flags |= MNT_LOCK_NOSUID; ++ ++ if (mnt->mnt.mnt_flags & MNT_NOEXEC) ++ mnt->mnt.mnt_flags |= MNT_LOCK_NOEXEC; ++ } + + atomic_inc(&sb->s_active); + mnt->mnt.mnt_sb = sb; +@@ -1799,6 +1812,23 @@ static int do_remount(struct path *path, + !(mnt_flags & MNT_READONLY)) { + return -EPERM; + } ++ if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) && ++ !(mnt_flags & MNT_NODEV)) { ++ return -EPERM; ++ } ++ if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) && ++ !(mnt_flags & MNT_NOSUID)) { ++ return -EPERM; ++ } ++ if ((mnt->mnt.mnt_flags & MNT_LOCK_NOEXEC) && ++ !(mnt_flags & MNT_NOEXEC)) { ++ return -EPERM; ++ } ++ if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) && ++ ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) { ++ return -EPERM; ++ } ++ + err = security_sb_remount(sb, data); + if (err) + return err; +@@ -1998,7 +2028,7 @@ static int do_new_mount(struct path *pat + */ + if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) { + flags |= MS_NODEV; +- mnt_flags |= MNT_NODEV; ++ mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV; + } + } + +--- a/include/linux/mount.h ++++ b/include/linux/mount.h +@@ -46,9 +46,14 @@ struct mnt_namespace; + | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME \ + | MNT_READONLY) + ++#define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME ) + + #define MNT_INTERNAL 0x4000 + ++#define MNT_LOCK_ATIME 0x040000 ++#define MNT_LOCK_NOEXEC 0x080000 ++#define MNT_LOCK_NOSUID 0x100000 ++#define MNT_LOCK_NODEV 0x200000 + #define MNT_LOCK_READONLY 0x400000 + + struct vfsmount { diff --git a/queue-3.10/mnt-move-the-test-for-mnt_lock_readonly-from-change_mount_flags-into-do_remount.patch b/queue-3.10/mnt-move-the-test-for-mnt_lock_readonly-from-change_mount_flags-into-do_remount.patch new file mode 100644 index 00000000000..37baed13085 --- /dev/null +++ b/queue-3.10/mnt-move-the-test-for-mnt_lock_readonly-from-change_mount_flags-into-do_remount.patch @@ -0,0 +1,54 @@ +From 07b645589dcda8b7a5249e096fece2a67556f0f4 Mon Sep 17 00:00:00 2001 +From: "Eric W. Biederman" +Date: Mon, 28 Jul 2014 17:10:56 -0700 +Subject: mnt: Move the test for MNT_LOCK_READONLY from change_mount_flags into do_remount + +From: "Eric W. Biederman" + +commit 07b645589dcda8b7a5249e096fece2a67556f0f4 upstream. + +There are no races as locked mount flags are guaranteed to never change. + +Moving the test into do_remount makes it more visible, and ensures all +filesystem remounts pass the MNT_LOCK_READONLY permission check. This +second case is not an issue today as filesystem remounts are guarded +by capable(CAP_DAC_ADMIN) and thus will always fail in less privileged +mount namespaces, but it could become an issue in the future. + +Acked-by: Serge E. Hallyn +Signed-off-by: "Eric W. Biederman" +Signed-off-by: Greg Kroah-Hartman + +--- + fs/namespace.c | 13 ++++++++++--- + 1 file changed, 10 insertions(+), 3 deletions(-) + +--- a/fs/namespace.c ++++ b/fs/namespace.c +@@ -1764,9 +1764,6 @@ static int change_mount_flags(struct vfs + if (readonly_request == __mnt_is_readonly(mnt)) + return 0; + +- if (mnt->mnt_flags & MNT_LOCK_READONLY) +- return -EPERM; +- + if (readonly_request) + error = mnt_make_readonly(real_mount(mnt)); + else +@@ -1792,6 +1789,16 @@ static int do_remount(struct path *path, + if (path->dentry != path->mnt->mnt_root) + return -EINVAL; + ++ /* Don't allow changing of locked mnt flags. ++ * ++ * No locks need to be held here while testing the various ++ * MNT_LOCK flags because those flags can never be cleared ++ * once they are set. ++ */ ++ if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) && ++ !(mnt_flags & MNT_READONLY)) { ++ return -EPERM; ++ } + err = security_sb_remount(sb, data); + if (err) + return err; diff --git a/queue-3.10/mnt-only-change-user-settable-mount-flags-in-remount.patch b/queue-3.10/mnt-only-change-user-settable-mount-flags-in-remount.patch new file mode 100644 index 00000000000..db8545b1e26 --- /dev/null +++ b/queue-3.10/mnt-only-change-user-settable-mount-flags-in-remount.patch @@ -0,0 +1,53 @@ +From a6138db815df5ee542d848318e5dae681590fccd Mon Sep 17 00:00:00 2001 +From: "Eric W. Biederman" +Date: Mon, 28 Jul 2014 16:26:53 -0700 +Subject: mnt: Only change user settable mount flags in remount + +From: "Eric W. Biederman" + +commit a6138db815df5ee542d848318e5dae681590fccd upstream. + +Kenton Varda discovered that by remounting a +read-only bind mount read-only in a user namespace the +MNT_LOCK_READONLY bit would be cleared, allowing an unprivileged user +to the remount a read-only mount read-write. + +Correct this by replacing the mask of mount flags to preserve +with a mask of mount flags that may be changed, and preserve +all others. This ensures that any future bugs with this mask and +remount will fail in an easy to detect way where new mount flags +simply won't change. + +Acked-by: Serge E. Hallyn +Signed-off-by: "Eric W. Biederman" +Signed-off-by: Greg Kroah-Hartman + +--- + fs/namespace.c | 2 +- + include/linux/mount.h | 4 +++- + 2 files changed, 4 insertions(+), 2 deletions(-) + +--- a/fs/namespace.c ++++ b/fs/namespace.c +@@ -1805,7 +1805,7 @@ static int do_remount(struct path *path, + err = do_remount_sb(sb, flags, data, 0); + if (!err) { + br_write_lock(&vfsmount_lock); +- mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK; ++ mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK; + mnt->mnt.mnt_flags = mnt_flags; + br_write_unlock(&vfsmount_lock); + } +--- a/include/linux/mount.h ++++ b/include/linux/mount.h +@@ -42,7 +42,9 @@ struct mnt_namespace; + * flag, consider how it interacts with shared mounts. + */ + #define MNT_SHARED_MASK (MNT_UNBINDABLE) +-#define MNT_PROPAGATION_MASK (MNT_SHARED | MNT_UNBINDABLE) ++#define MNT_USER_SETTABLE_MASK (MNT_NOSUID | MNT_NODEV | MNT_NOEXEC \ ++ | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME \ ++ | MNT_READONLY) + + + #define MNT_INTERNAL 0x4000 diff --git a/queue-3.10/ring-buffer-always-reset-iterator-to-reader-page.patch b/queue-3.10/ring-buffer-always-reset-iterator-to-reader-page.patch new file mode 100644 index 00000000000..969fabfb0de --- /dev/null +++ b/queue-3.10/ring-buffer-always-reset-iterator-to-reader-page.patch @@ -0,0 +1,131 @@ +From 651e22f2701b4113989237c3048d17337dd2185c Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (Red Hat)" +Date: Wed, 6 Aug 2014 14:11:33 -0400 +Subject: ring-buffer: Always reset iterator to reader page + +From: "Steven Rostedt (Red Hat)" + +commit 651e22f2701b4113989237c3048d17337dd2185c upstream. + +When performing a consuming read, the ring buffer swaps out a +page from the ring buffer with a empty page and this page that +was swapped out becomes the new reader page. The reader page +is owned by the reader and since it was swapped out of the ring +buffer, writers do not have access to it (there's an exception +to that rule, but it's out of scope for this commit). + +When reading the "trace" file, it is a non consuming read, which +means that the data in the ring buffer will not be modified. +When the trace file is opened, a ring buffer iterator is allocated +and writes to the ring buffer are disabled, such that the iterator +will not have issues iterating over the data. + +Although the ring buffer disabled writes, it does not disable other +reads, or even consuming reads. If a consuming read happens, then +the iterator is reset and starts reading from the beginning again. + +My tests would sometimes trigger this bug on my i386 box: + +WARNING: CPU: 0 PID: 5175 at kernel/trace/trace.c:1527 __trace_find_cmdline+0x66/0xaa() +Modules linked in: +CPU: 0 PID: 5175 Comm: grep Not tainted 3.16.0-rc3-test+ #8 +Hardware name: /DG965MQ, BIOS MQ96510J.86A.0372.2006.0605.1717 06/05/2006 + 00000000 00000000 f09c9e1c c18796b3 c1b5d74c f09c9e4c c103a0e3 c1b5154b + f09c9e78 00001437 c1b5d74c 000005f7 c10bd85a c10bd85a c1cac57c f09c9eb0 + ed0e0000 f09c9e64 c103a185 00000009 f09c9e5c c1b5154b f09c9e78 f09c9e80^M +Call Trace: + [] dump_stack+0x4b/0x75 + [] warn_slowpath_common+0x7e/0x95 + [] ? __trace_find_cmdline+0x66/0xaa + [] ? __trace_find_cmdline+0x66/0xaa + [] warn_slowpath_fmt+0x33/0x35 + [] __trace_find_cmdline+0x66/0xaa^M + [] trace_find_cmdline+0x40/0x64 + [] trace_print_context+0x27/0xec + [] ? trace_seq_printf+0x37/0x5b + [] print_trace_line+0x319/0x39b + [] ? ring_buffer_read+0x47/0x50 + [] s_show+0x192/0x1ab + [] ? s_next+0x5a/0x7c + [] seq_read+0x267/0x34c + [] vfs_read+0x8c/0xef + [] ? seq_lseek+0x154/0x154 + [] SyS_read+0x54/0x7f + [] syscall_call+0x7/0xb +---[ end trace 3f507febd6b4cc83 ]--- +>>>> ##### CPU 1 buffer started #### + +Which was the __trace_find_cmdline() function complaining about the pid +in the event record being negative. + +After adding more test cases, this would trigger more often. Strangely +enough, it would never trigger on a single test, but instead would trigger +only when running all the tests. I believe that was the case because it +required one of the tests to be shutting down via delayed instances while +a new test started up. + +After spending several days debugging this, I found that it was caused by +the iterator becoming corrupted. Debugging further, I found out why +the iterator became corrupted. It happened with the rb_iter_reset(). + +As consuming reads may not read the full reader page, and only part +of it, there's a "read" field to know where the last read took place. +The iterator, must also start at the read position. In the rb_iter_reset() +code, if the reader page was disconnected from the ring buffer, the iterator +would start at the head page within the ring buffer (where writes still +happen). But the mistake there was that it still used the "read" field +to start the iterator on the head page, where it should always start +at zero because readers never read from within the ring buffer where +writes occur. + +I originally wrote a patch to have it set the iter->head to 0 instead +of iter->head_page->read, but then I questioned why it wasn't always +setting the iter to point to the reader page, as the reader page is +still valid. The list_empty(reader_page->list) just means that it was +successful in swapping out. But the reader_page may still have data. + +There was a bug report a long time ago that was not reproducible that +had something about trace_pipe (consuming read) not matching trace +(iterator read). This may explain why that happened. + +Anyway, the correct answer to this bug is to always use the reader page +an not reset the iterator to inside the writable ring buffer. + +Fixes: d769041f8653 "ring_buffer: implement new locking" +Signed-off-by: Steven Rostedt +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/trace/ring_buffer.c | 17 ++++++----------- + 1 file changed, 6 insertions(+), 11 deletions(-) + +--- a/kernel/trace/ring_buffer.c ++++ b/kernel/trace/ring_buffer.c +@@ -3353,21 +3353,16 @@ static void rb_iter_reset(struct ring_bu + struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; + + /* Iterator usage is expected to have record disabled */ +- if (list_empty(&cpu_buffer->reader_page->list)) { +- iter->head_page = rb_set_head_page(cpu_buffer); +- if (unlikely(!iter->head_page)) +- return; +- iter->head = iter->head_page->read; +- } else { +- iter->head_page = cpu_buffer->reader_page; +- iter->head = cpu_buffer->reader_page->read; +- } ++ iter->head_page = cpu_buffer->reader_page; ++ iter->head = cpu_buffer->reader_page->read; ++ ++ iter->cache_reader_page = iter->head_page; ++ iter->cache_read = iter->head; ++ + if (iter->head) + iter->read_stamp = cpu_buffer->read_stamp; + else + iter->read_stamp = iter->head_page->page->time_stamp; +- iter->cache_reader_page = cpu_buffer->reader_page; +- iter->cache_read = cpu_buffer->read; + } + + /** diff --git a/queue-3.10/ring-buffer-up-rb_iter_peek-loop-count-to-3.patch b/queue-3.10/ring-buffer-up-rb_iter_peek-loop-count-to-3.patch new file mode 100644 index 00000000000..050a8075228 --- /dev/null +++ b/queue-3.10/ring-buffer-up-rb_iter_peek-loop-count-to-3.patch @@ -0,0 +1,100 @@ +From 021de3d904b88b1771a3a2cfc5b75023c391e646 Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (Red Hat)" +Date: Wed, 6 Aug 2014 15:36:31 -0400 +Subject: ring-buffer: Up rb_iter_peek() loop count to 3 + +From: "Steven Rostedt (Red Hat)" + +commit 021de3d904b88b1771a3a2cfc5b75023c391e646 upstream. + +After writting a test to try to trigger the bug that caused the +ring buffer iterator to become corrupted, I hit another bug: + + WARNING: CPU: 1 PID: 5281 at kernel/trace/ring_buffer.c:3766 rb_iter_peek+0x113/0x238() + Modules linked in: ipt_MASQUERADE sunrpc [...] + CPU: 1 PID: 5281 Comm: grep Tainted: G W 3.16.0-rc3-test+ #143 + Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS SDBLI944.86P 05/08/2007 + 0000000000000000 ffffffff81809a80 ffffffff81503fb0 0000000000000000 + ffffffff81040ca1 ffff8800796d6010 ffffffff810c138d ffff8800796d6010 + ffff880077438c80 ffff8800796d6010 ffff88007abbe600 0000000000000003 + Call Trace: + [] ? dump_stack+0x4a/0x75 + [] ? warn_slowpath_common+0x7e/0x97 + [] ? rb_iter_peek+0x113/0x238 + [] ? rb_iter_peek+0x113/0x238 + [] ? ring_buffer_iter_peek+0x2d/0x5c + [] ? tracing_iter_reset+0x6e/0x96 + [] ? s_start+0xd7/0x17b + [] ? kmem_cache_alloc_trace+0xda/0xea + [] ? seq_read+0x148/0x361 + [] ? vfs_read+0x93/0xf1 + [] ? SyS_read+0x60/0x8e + [] ? tracesys+0xdd/0xe2 + +Debugging this bug, which triggers when the rb_iter_peek() loops too +many times (more than 2 times), I discovered there's a case that can +cause that function to legitimately loop 3 times! + +rb_iter_peek() is different than rb_buffer_peek() as the rb_buffer_peek() +only deals with the reader page (it's for consuming reads). The +rb_iter_peek() is for traversing the buffer without consuming it, and as +such, it can loop for one more reason. That is, if we hit the end of +the reader page or any page, it will go to the next page and try again. + +That is, we have this: + + 1. iter->head > iter->head_page->page->commit + (rb_inc_iter() which moves the iter to the next page) + try again + + 2. event = rb_iter_head_event() + event->type_len == RINGBUF_TYPE_TIME_EXTEND + rb_advance_iter() + try again + + 3. read the event. + +But we never get to 3, because the count is greater than 2 and we +cause the WARNING and return NULL. + +Up the counter to 3. + +Fixes: 69d1b839f7ee "ring-buffer: Bind time extend and data events together" +Signed-off-by: Steven Rostedt +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/trace/ring_buffer.c | 14 ++++++++------ + 1 file changed, 8 insertions(+), 6 deletions(-) + +--- a/kernel/trace/ring_buffer.c ++++ b/kernel/trace/ring_buffer.c +@@ -1980,7 +1980,7 @@ rb_add_time_stamp(struct ring_buffer_eve + + /** + * rb_update_event - update event type and data +- * @event: the even to update ++ * @event: the event to update + * @type: the type of event + * @length: the size of the event field in the ring buffer + * +@@ -3755,12 +3755,14 @@ rb_iter_peek(struct ring_buffer_iter *it + return NULL; + + /* +- * We repeat when a time extend is encountered. +- * Since the time extend is always attached to a data event, +- * we should never loop more than once. +- * (We never hit the following condition more than twice). ++ * We repeat when a time extend is encountered or we hit ++ * the end of the page. Since the time extend is always attached ++ * to a data event, we should never loop more than three times. ++ * Once for going to next page, once on time extend, and ++ * finally once to get the event. ++ * (We never hit the following condition more than thrice). + */ +- if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2)) ++ if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) + return NULL; + + if (rb_per_cpu_empty(cpu_buffer)) diff --git a/queue-3.10/series b/queue-3.10/series index 5e6e0263cd8..3dfcb787f21 100644 --- a/queue-3.10/series +++ b/queue-3.10/series @@ -26,3 +26,19 @@ mips-asm-thread_info-add-_tif_seccomp-flag.patch mips-octeon-make-get_system_type-thread-safe.patch mips-fix-accessing-to-per-cpu-data-when-flushing-the-cache.patch openrisc-rework-signal-handling.patch +asoc-pcm-fix-dpcm_path_put-in-dpcm-runtime-update.patch +asoc-wm_adsp-add-missing-module_license.patch +asoc-samsung-correct-i2s-dai-suspend-resume-ops.patch +asoc-max98090-fix-missing-free_irq.patch +asoc-pxa-ssp-drop-sndrv_pcm_fmtbit_s24_le.patch +bfa-fix-undefined-bit-shift-on-big-endian-architectures-with-32-bit-dma-address.patch +acpica-utilities-fix-memory-leak-in-acpi_ut_copy_iobject_to_iobject.patch +acpi-run-fixed-event-device-notifications-in-process-context.patch +acpi-cpuidle-fix-deadlock-between-cpuidle_lock-and-cpu_hotplug.lock.patch +ring-buffer-always-reset-iterator-to-reader-page.patch +ring-buffer-up-rb_iter_peek-loop-count-to-3.patch +mnt-only-change-user-settable-mount-flags-in-remount.patch +mnt-move-the-test-for-mnt_lock_readonly-from-change_mount_flags-into-do_remount.patch +mnt-correct-permission-checks-in-do_remount.patch +mnt-change-the-default-remount-atime-from-relatime-to-the-existing-value.patch +mnt-add-tests-for-unprivileged-remount-cases-that-have-found-to-be-faulty.patch