From c6755fcd93b8a7f7641c65bfb46589331af434cc Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 26 Aug 2019 11:11:39 +0200 Subject: [PATCH] 4.9-stable patches added patches: dm-btree-fix-order-of-block-initialization-in-btree_split_beneath.patch dm-space-map-metadata-fix-missing-store-of-apply_bops-return-value.patch dm-table-fix-invalid-memory-accesses-with-too-high-sector-number.patch gpiolib-never-report-open-drain-source-lines-as-input-to-user-space.patch hid-wacom-correct-distance-scale-for-2nd-gen-intuos-devices.patch hid-wacom-correct-misreported-ekr-ring-values.patch revert-dm-bufio-fix-deadlock-with-loop-device.patch userfaultfd_release-always-remove-uffd-flags-and-clear-vm_userfaultfd_ctx.patch x86-apic-handle-missing-global-clockevent-gracefully.patch x86-boot-fix-boot-regression-caused-by-bootparam-sanitizing.patch x86-boot-save-fields-explicitly-zero-out-everything-else.patch x86-retpoline-don-t-clobber-rflags-during-call_nospec-on-i386.patch --- ...nitialization-in-btree_split_beneath.patch | 88 ++++++++++ ...ing-store-of-apply_bops-return-value.patch | 36 ++++ ...accesses-with-too-high-sector-number.patch | 52 ++++++ ...-source-lines-as-input-to-user-space.patch | 50 ++++++ ...nce-scale-for-2nd-gen-intuos-devices.patch | 36 ++++ ...-correct-misreported-ekr-ring-values.patch | 36 ++++ ...-bufio-fix-deadlock-with-loop-device.patch | 61 +++++++ queue-4.9/series | 12 ++ ...d-flags-and-clear-vm_userfaultfd_ctx.patch | 87 ++++++++++ ...missing-global-clockevent-gracefully.patch | 154 ++++++++++++++++++ ...ssion-caused-by-bootparam-sanitizing.patch | 41 +++++ ...-explicitly-zero-out-everything-else.patch | 105 ++++++++++++ ...er-rflags-during-call_nospec-on-i386.patch | 79 +++++++++ 13 files changed, 837 insertions(+) create mode 100644 queue-4.9/dm-btree-fix-order-of-block-initialization-in-btree_split_beneath.patch create mode 100644 queue-4.9/dm-space-map-metadata-fix-missing-store-of-apply_bops-return-value.patch create mode 100644 queue-4.9/dm-table-fix-invalid-memory-accesses-with-too-high-sector-number.patch create mode 100644 queue-4.9/gpiolib-never-report-open-drain-source-lines-as-input-to-user-space.patch create mode 100644 queue-4.9/hid-wacom-correct-distance-scale-for-2nd-gen-intuos-devices.patch create mode 100644 queue-4.9/hid-wacom-correct-misreported-ekr-ring-values.patch create mode 100644 queue-4.9/revert-dm-bufio-fix-deadlock-with-loop-device.patch create mode 100644 queue-4.9/userfaultfd_release-always-remove-uffd-flags-and-clear-vm_userfaultfd_ctx.patch create mode 100644 queue-4.9/x86-apic-handle-missing-global-clockevent-gracefully.patch create mode 100644 queue-4.9/x86-boot-fix-boot-regression-caused-by-bootparam-sanitizing.patch create mode 100644 queue-4.9/x86-boot-save-fields-explicitly-zero-out-everything-else.patch create mode 100644 queue-4.9/x86-retpoline-don-t-clobber-rflags-during-call_nospec-on-i386.patch diff --git a/queue-4.9/dm-btree-fix-order-of-block-initialization-in-btree_split_beneath.patch b/queue-4.9/dm-btree-fix-order-of-block-initialization-in-btree_split_beneath.patch new file mode 100644 index 00000000000..6ee5cd9a99b --- /dev/null +++ b/queue-4.9/dm-btree-fix-order-of-block-initialization-in-btree_split_beneath.patch @@ -0,0 +1,88 @@ +From e4f9d6013820d1eba1432d51dd1c5795759aa77f Mon Sep 17 00:00:00 2001 +From: ZhangXiaoxu +Date: Sat, 17 Aug 2019 13:32:40 +0800 +Subject: dm btree: fix order of block initialization in btree_split_beneath + +From: ZhangXiaoxu + +commit e4f9d6013820d1eba1432d51dd1c5795759aa77f upstream. + +When btree_split_beneath() splits a node to two new children, it will +allocate two blocks: left and right. If right block's allocation +failed, the left block will be unlocked and marked dirty. If this +happened, the left block'ss content is zero, because it wasn't +initialized with the btree struct before the attempot to allocate the +right block. Upon return, when flushing the left block to disk, the +validator will fail when check this block. Then a BUG_ON is raised. + +Fix this by completely initializing the left block before allocating and +initializing the right block. + +Fixes: 4dcb8b57df359 ("dm btree: fix leak of bufio-backed block in btree_split_beneath error path") +Cc: stable@vger.kernel.org +Signed-off-by: ZhangXiaoxu +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/persistent-data/dm-btree.c | 31 ++++++++++++++++--------------- + 1 file changed, 16 insertions(+), 15 deletions(-) + +--- a/drivers/md/persistent-data/dm-btree.c ++++ b/drivers/md/persistent-data/dm-btree.c +@@ -623,39 +623,40 @@ static int btree_split_beneath(struct sh + + new_parent = shadow_current(s); + ++ pn = dm_block_data(new_parent); ++ size = le32_to_cpu(pn->header.flags) & INTERNAL_NODE ? ++ sizeof(__le64) : s->info->value_type.size; ++ ++ /* create & init the left block */ + r = new_block(s->info, &left); + if (r < 0) + return r; + ++ ln = dm_block_data(left); ++ nr_left = le32_to_cpu(pn->header.nr_entries) / 2; ++ ++ ln->header.flags = pn->header.flags; ++ ln->header.nr_entries = cpu_to_le32(nr_left); ++ ln->header.max_entries = pn->header.max_entries; ++ ln->header.value_size = pn->header.value_size; ++ memcpy(ln->keys, pn->keys, nr_left * sizeof(pn->keys[0])); ++ memcpy(value_ptr(ln, 0), value_ptr(pn, 0), nr_left * size); ++ ++ /* create & init the right block */ + r = new_block(s->info, &right); + if (r < 0) { + unlock_block(s->info, left); + return r; + } + +- pn = dm_block_data(new_parent); +- ln = dm_block_data(left); + rn = dm_block_data(right); +- +- nr_left = le32_to_cpu(pn->header.nr_entries) / 2; + nr_right = le32_to_cpu(pn->header.nr_entries) - nr_left; + +- ln->header.flags = pn->header.flags; +- ln->header.nr_entries = cpu_to_le32(nr_left); +- ln->header.max_entries = pn->header.max_entries; +- ln->header.value_size = pn->header.value_size; +- + rn->header.flags = pn->header.flags; + rn->header.nr_entries = cpu_to_le32(nr_right); + rn->header.max_entries = pn->header.max_entries; + rn->header.value_size = pn->header.value_size; +- +- memcpy(ln->keys, pn->keys, nr_left * sizeof(pn->keys[0])); + memcpy(rn->keys, pn->keys + nr_left, nr_right * sizeof(pn->keys[0])); +- +- size = le32_to_cpu(pn->header.flags) & INTERNAL_NODE ? +- sizeof(__le64) : s->info->value_type.size; +- memcpy(value_ptr(ln, 0), value_ptr(pn, 0), nr_left * size); + memcpy(value_ptr(rn, 0), value_ptr(pn, nr_left), + nr_right * size); + diff --git a/queue-4.9/dm-space-map-metadata-fix-missing-store-of-apply_bops-return-value.patch b/queue-4.9/dm-space-map-metadata-fix-missing-store-of-apply_bops-return-value.patch new file mode 100644 index 00000000000..cb153ecb72d --- /dev/null +++ b/queue-4.9/dm-space-map-metadata-fix-missing-store-of-apply_bops-return-value.patch @@ -0,0 +1,36 @@ +From ae148243d3f0816b37477106c05a2ec7d5f32614 Mon Sep 17 00:00:00 2001 +From: ZhangXiaoxu +Date: Mon, 19 Aug 2019 11:31:21 +0800 +Subject: dm space map metadata: fix missing store of apply_bops() return value + +From: ZhangXiaoxu + +commit ae148243d3f0816b37477106c05a2ec7d5f32614 upstream. + +In commit 6096d91af0b6 ("dm space map metadata: fix occasional leak +of a metadata block on resize"), we refactor the commit logic to a new +function 'apply_bops'. But when that logic was replaced in out() the +return value was not stored. This may lead out() returning a wrong +value to the caller. + +Fixes: 6096d91af0b6 ("dm space map metadata: fix occasional leak of a metadata block on resize") +Cc: stable@vger.kernel.org +Signed-off-by: ZhangXiaoxu +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/persistent-data/dm-space-map-metadata.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/md/persistent-data/dm-space-map-metadata.c ++++ b/drivers/md/persistent-data/dm-space-map-metadata.c +@@ -248,7 +248,7 @@ static int out(struct sm_metadata *smm) + } + + if (smm->recursion_count == 1) +- apply_bops(smm); ++ r = apply_bops(smm); + + smm->recursion_count--; + diff --git a/queue-4.9/dm-table-fix-invalid-memory-accesses-with-too-high-sector-number.patch b/queue-4.9/dm-table-fix-invalid-memory-accesses-with-too-high-sector-number.patch new file mode 100644 index 00000000000..5c6e3aedf57 --- /dev/null +++ b/queue-4.9/dm-table-fix-invalid-memory-accesses-with-too-high-sector-number.patch @@ -0,0 +1,52 @@ +From 1cfd5d3399e87167b7f9157ef99daa0e959f395d Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Fri, 23 Aug 2019 09:54:09 -0400 +Subject: dm table: fix invalid memory accesses with too high sector number + +From: Mikulas Patocka + +commit 1cfd5d3399e87167b7f9157ef99daa0e959f395d upstream. + +If the sector number is too high, dm_table_find_target() should return a +pointer to a zeroed dm_target structure (the caller should test it with +dm_target_is_valid). + +However, for some table sizes, the code in dm_table_find_target() that +performs btree lookup will access out of bound memory structures. + +Fix this bug by testing the sector number at the beginning of +dm_table_find_target(). Also, add an "inline" keyword to the function +dm_table_get_size() because this is a hot path. + +Fixes: 512875bd9661 ("dm: table detect io beyond device") +Cc: stable@vger.kernel.org +Reported-by: Zhang Tao +Signed-off-by: Mikulas Patocka +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/dm-table.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/md/dm-table.c ++++ b/drivers/md/dm-table.c +@@ -1263,7 +1263,7 @@ void dm_table_event(struct dm_table *t) + } + EXPORT_SYMBOL(dm_table_event); + +-sector_t dm_table_get_size(struct dm_table *t) ++inline sector_t dm_table_get_size(struct dm_table *t) + { + return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0; + } +@@ -1288,6 +1288,9 @@ struct dm_target *dm_table_find_target(s + unsigned int l, n = 0, k = 0; + sector_t *node; + ++ if (unlikely(sector >= dm_table_get_size(t))) ++ return &t->targets[t->num_targets]; ++ + for (l = 0; l < t->depth; l++) { + n = get_child(n, k); + node = get_node(t, l, n); diff --git a/queue-4.9/gpiolib-never-report-open-drain-source-lines-as-input-to-user-space.patch b/queue-4.9/gpiolib-never-report-open-drain-source-lines-as-input-to-user-space.patch new file mode 100644 index 00000000000..f7df7323bd5 --- /dev/null +++ b/queue-4.9/gpiolib-never-report-open-drain-source-lines-as-input-to-user-space.patch @@ -0,0 +1,50 @@ +From 2c60e6b5c9241b24b8b523fefd3e44fb85622cda Mon Sep 17 00:00:00 2001 +From: Bartosz Golaszewski +Date: Tue, 6 Aug 2019 13:41:51 +0200 +Subject: gpiolib: never report open-drain/source lines as 'input' to user-space + +From: Bartosz Golaszewski + +commit 2c60e6b5c9241b24b8b523fefd3e44fb85622cda upstream. + +If the driver doesn't support open-drain/source config options, we +emulate this behavior when setting the direction by calling +gpiod_direction_input() if the default value is 0 (open-source) or +1 (open-drain), thus not actively driving the line in those cases. + +This however clears the FLAG_IS_OUT bit for the GPIO line descriptor +and makes the LINEINFO ioctl() incorrectly report this line's mode as +'input' to user-space. + +This commit modifies the ioctl() to always set the GPIOLINE_FLAG_IS_OUT +bit in the lineinfo structure's flags field. Since it's impossible to +use the input mode and open-drain/source options at the same time, we +can be sure the reported information will be correct. + +Fixes: 521a2ad6f862 ("gpio: add userspace ABI for GPIO line information") +Cc: stable +Signed-off-by: Bartosz Golaszewski +Link: https://lore.kernel.org/r/20190806114151.17652-1-brgl@bgdev.pl +Signed-off-by: Linus Walleij +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpio/gpiolib.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/drivers/gpio/gpiolib.c ++++ b/drivers/gpio/gpiolib.c +@@ -953,9 +953,11 @@ static long gpio_ioctl(struct file *filp + if (test_bit(FLAG_ACTIVE_LOW, &desc->flags)) + lineinfo.flags |= GPIOLINE_FLAG_ACTIVE_LOW; + if (test_bit(FLAG_OPEN_DRAIN, &desc->flags)) +- lineinfo.flags |= GPIOLINE_FLAG_OPEN_DRAIN; ++ lineinfo.flags |= (GPIOLINE_FLAG_OPEN_DRAIN | ++ GPIOLINE_FLAG_IS_OUT); + if (test_bit(FLAG_OPEN_SOURCE, &desc->flags)) +- lineinfo.flags |= GPIOLINE_FLAG_OPEN_SOURCE; ++ lineinfo.flags |= (GPIOLINE_FLAG_OPEN_SOURCE | ++ GPIOLINE_FLAG_IS_OUT); + + if (copy_to_user(ip, &lineinfo, sizeof(lineinfo))) + return -EFAULT; diff --git a/queue-4.9/hid-wacom-correct-distance-scale-for-2nd-gen-intuos-devices.patch b/queue-4.9/hid-wacom-correct-distance-scale-for-2nd-gen-intuos-devices.patch new file mode 100644 index 00000000000..37ee2390d0b --- /dev/null +++ b/queue-4.9/hid-wacom-correct-distance-scale-for-2nd-gen-intuos-devices.patch @@ -0,0 +1,36 @@ +From b72fb1dcd2ea9d29417711cb302cef3006fa8d5a Mon Sep 17 00:00:00 2001 +From: Jason Gerecke +Date: Wed, 7 Aug 2019 14:11:55 -0700 +Subject: HID: wacom: Correct distance scale for 2nd-gen Intuos devices + +From: Jason Gerecke + +commit b72fb1dcd2ea9d29417711cb302cef3006fa8d5a upstream. + +Distance values reported by 2nd-gen Intuos tablets are on an inverted +scale (0 == far, 63 == near). We need to change them over to a normal +scale before reporting to userspace or else userspace drivers and +applications can get confused. + +Ref: https://github.com/linuxwacom/input-wacom/issues/98 +Fixes: eda01dab53 ("HID: wacom: Add four new Intuos devices") +Signed-off-by: Jason Gerecke +Cc: # v4.4+ +Signed-off-by: Jiri Kosina +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/hid/wacom_wac.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/hid/wacom_wac.c ++++ b/drivers/hid/wacom_wac.c +@@ -949,6 +949,8 @@ static int wacom_intuos_general(struct w + y >>= 1; + distance >>= 1; + } ++ if (features->type == INTUOSHT2) ++ distance = features->distance_max - distance; + input_report_abs(input, ABS_X, x); + input_report_abs(input, ABS_Y, y); + input_report_abs(input, ABS_DISTANCE, distance); diff --git a/queue-4.9/hid-wacom-correct-misreported-ekr-ring-values.patch b/queue-4.9/hid-wacom-correct-misreported-ekr-ring-values.patch new file mode 100644 index 00000000000..553ce4de93d --- /dev/null +++ b/queue-4.9/hid-wacom-correct-misreported-ekr-ring-values.patch @@ -0,0 +1,36 @@ +From fcf887e7caaa813eea821d11bf2b7619a37df37a Mon Sep 17 00:00:00 2001 +From: Aaron Armstrong Skomra +Date: Fri, 16 Aug 2019 12:00:54 -0700 +Subject: HID: wacom: correct misreported EKR ring values + +From: Aaron Armstrong Skomra + +commit fcf887e7caaa813eea821d11bf2b7619a37df37a upstream. + +The EKR ring claims a range of 0 to 71 but actually reports +values 1 to 72. The ring is used in relative mode so this +change should not affect users. + +Signed-off-by: Aaron Armstrong Skomra +Fixes: 72b236d60218f ("HID: wacom: Add support for Express Key Remote.") +Cc: # v4.3+ +Reviewed-by: Ping Cheng +Reviewed-by: Jason Gerecke +Signed-off-by: Jiri Kosina +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/hid/wacom_wac.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/hid/wacom_wac.c ++++ b/drivers/hid/wacom_wac.c +@@ -819,7 +819,7 @@ static int wacom_remote_irq(struct wacom + input_report_key(input, BTN_BASE2, (data[11] & 0x02)); + + if (data[12] & 0x80) +- input_report_abs(input, ABS_WHEEL, (data[12] & 0x7f)); ++ input_report_abs(input, ABS_WHEEL, (data[12] & 0x7f) - 1); + else + input_report_abs(input, ABS_WHEEL, 0); + diff --git a/queue-4.9/revert-dm-bufio-fix-deadlock-with-loop-device.patch b/queue-4.9/revert-dm-bufio-fix-deadlock-with-loop-device.patch new file mode 100644 index 00000000000..b23cd647c89 --- /dev/null +++ b/queue-4.9/revert-dm-bufio-fix-deadlock-with-loop-device.patch @@ -0,0 +1,61 @@ +From cf3591ef832915892f2499b7e54b51d4c578b28c Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Thu, 8 Aug 2019 05:40:04 -0400 +Subject: Revert "dm bufio: fix deadlock with loop device" + +From: Mikulas Patocka + +commit cf3591ef832915892f2499b7e54b51d4c578b28c upstream. + +Revert the commit bd293d071ffe65e645b4d8104f9d8fe15ea13862. The proper +fix has been made available with commit d0a255e795ab ("loop: set +PF_MEMALLOC_NOIO for the worker thread"). + +Note that the fix offered by commit bd293d071ffe doesn't really prevent +the deadlock from occuring - if we look at the stacktrace reported by +Junxiao Bi, we see that it hangs in bit_wait_io and not on the mutex - +i.e. it has already successfully taken the mutex. Changing the mutex +from mutex_lock to mutex_trylock won't help with deadlocks that happen +afterwards. + +PID: 474 TASK: ffff8813e11f4600 CPU: 10 COMMAND: "kswapd0" + #0 [ffff8813dedfb938] __schedule at ffffffff8173f405 + #1 [ffff8813dedfb990] schedule at ffffffff8173fa27 + #2 [ffff8813dedfb9b0] schedule_timeout at ffffffff81742fec + #3 [ffff8813dedfba60] io_schedule_timeout at ffffffff8173f186 + #4 [ffff8813dedfbaa0] bit_wait_io at ffffffff8174034f + #5 [ffff8813dedfbac0] __wait_on_bit at ffffffff8173fec8 + #6 [ffff8813dedfbb10] out_of_line_wait_on_bit at ffffffff8173ff81 + #7 [ffff8813dedfbb90] __make_buffer_clean at ffffffffa038736f [dm_bufio] + #8 [ffff8813dedfbbb0] __try_evict_buffer at ffffffffa0387bb8 [dm_bufio] + #9 [ffff8813dedfbbd0] dm_bufio_shrink_scan at ffffffffa0387cc3 [dm_bufio] + #10 [ffff8813dedfbc40] shrink_slab at ffffffff811a87ce + #11 [ffff8813dedfbd30] shrink_zone at ffffffff811ad778 + #12 [ffff8813dedfbdc0] kswapd at ffffffff811ae92f + #13 [ffff8813dedfbec0] kthread at ffffffff810a8428 + #14 [ffff8813dedfbf50] ret_from_fork at ffffffff81745242 + +Signed-off-by: Mikulas Patocka +Cc: stable@vger.kernel.org +Fixes: bd293d071ffe ("dm bufio: fix deadlock with loop device") +Depends-on: d0a255e795ab ("loop: set PF_MEMALLOC_NOIO for the worker thread") +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/dm-bufio.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/md/dm-bufio.c ++++ b/drivers/md/dm-bufio.c +@@ -1585,7 +1585,9 @@ dm_bufio_shrink_scan(struct shrinker *sh + unsigned long freed; + + c = container_of(shrink, struct dm_bufio_client, shrinker); +- if (!dm_bufio_trylock(c)) ++ if (sc->gfp_mask & __GFP_FS) ++ dm_bufio_lock(c); ++ else if (!dm_bufio_trylock(c)) + return SHRINK_STOP; + + freed = __scan(c, sc->nr_to_scan, sc->gfp_mask); diff --git a/queue-4.9/series b/queue-4.9/series index cd36feaef4c..f255051b5e9 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -25,3 +25,15 @@ x86-lib-cpu-address-missing-prototypes-warning.patch drm-vmwgfx-fix-memory-leak-when-too-many-retries-hav.patch perf-pmu-events-fix-missing-cpu_clk_unhalted.core-ev.patch selftests-kvm-adding-config-fragments.patch +hid-wacom-correct-misreported-ekr-ring-values.patch +hid-wacom-correct-distance-scale-for-2nd-gen-intuos-devices.patch +revert-dm-bufio-fix-deadlock-with-loop-device.patch +gpiolib-never-report-open-drain-source-lines-as-input-to-user-space.patch +userfaultfd_release-always-remove-uffd-flags-and-clear-vm_userfaultfd_ctx.patch +x86-retpoline-don-t-clobber-rflags-during-call_nospec-on-i386.patch +x86-apic-handle-missing-global-clockevent-gracefully.patch +x86-boot-save-fields-explicitly-zero-out-everything-else.patch +x86-boot-fix-boot-regression-caused-by-bootparam-sanitizing.patch +dm-btree-fix-order-of-block-initialization-in-btree_split_beneath.patch +dm-space-map-metadata-fix-missing-store-of-apply_bops-return-value.patch +dm-table-fix-invalid-memory-accesses-with-too-high-sector-number.patch diff --git a/queue-4.9/userfaultfd_release-always-remove-uffd-flags-and-clear-vm_userfaultfd_ctx.patch b/queue-4.9/userfaultfd_release-always-remove-uffd-flags-and-clear-vm_userfaultfd_ctx.patch new file mode 100644 index 00000000000..5e4eeb4b05f --- /dev/null +++ b/queue-4.9/userfaultfd_release-always-remove-uffd-flags-and-clear-vm_userfaultfd_ctx.patch @@ -0,0 +1,87 @@ +From 46d0b24c5ee10a15dfb25e20642f5a5ed59c5003 Mon Sep 17 00:00:00 2001 +From: Oleg Nesterov +Date: Sat, 24 Aug 2019 17:54:56 -0700 +Subject: userfaultfd_release: always remove uffd flags and clear vm_userfaultfd_ctx + +From: Oleg Nesterov + +commit 46d0b24c5ee10a15dfb25e20642f5a5ed59c5003 upstream. + +userfaultfd_release() should clear vm_flags/vm_userfaultfd_ctx even if +mm->core_state != NULL. + +Otherwise a page fault can see userfaultfd_missing() == T and use an +already freed userfaultfd_ctx. + +Link: http://lkml.kernel.org/r/20190820160237.GB4983@redhat.com +Fixes: 04f5866e41fb ("coredump: fix race condition between mmget_not_zero()/get_task_mm() and core dumping") +Signed-off-by: Oleg Nesterov +Reported-by: Kefeng Wang +Reviewed-by: Andrea Arcangeli +Tested-by: Kefeng Wang +Cc: Peter Xu +Cc: Mike Rapoport +Cc: Jann Horn +Cc: Jason Gunthorpe +Cc: Michal Hocko +Cc: Tetsuo Handa +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/userfaultfd.c | 25 +++++++++++++------------ + 1 file changed, 13 insertions(+), 12 deletions(-) + +--- a/fs/userfaultfd.c ++++ b/fs/userfaultfd.c +@@ -464,6 +464,7 @@ static int userfaultfd_release(struct in + /* len == 0 means wake all */ + struct userfaultfd_wake_range range = { .len = 0, }; + unsigned long new_flags; ++ bool still_valid; + + ACCESS_ONCE(ctx->released) = true; + +@@ -479,8 +480,7 @@ static int userfaultfd_release(struct in + * taking the mmap_sem for writing. + */ + down_write(&mm->mmap_sem); +- if (!mmget_still_valid(mm)) +- goto skip_mm; ++ still_valid = mmget_still_valid(mm); + prev = NULL; + for (vma = mm->mmap; vma; vma = vma->vm_next) { + cond_resched(); +@@ -491,19 +491,20 @@ static int userfaultfd_release(struct in + continue; + } + new_flags = vma->vm_flags & ~(VM_UFFD_MISSING | VM_UFFD_WP); +- prev = vma_merge(mm, prev, vma->vm_start, vma->vm_end, +- new_flags, vma->anon_vma, +- vma->vm_file, vma->vm_pgoff, +- vma_policy(vma), +- NULL_VM_UFFD_CTX); +- if (prev) +- vma = prev; +- else +- prev = vma; ++ if (still_valid) { ++ prev = vma_merge(mm, prev, vma->vm_start, vma->vm_end, ++ new_flags, vma->anon_vma, ++ vma->vm_file, vma->vm_pgoff, ++ vma_policy(vma), ++ NULL_VM_UFFD_CTX); ++ if (prev) ++ vma = prev; ++ else ++ prev = vma; ++ } + vma->vm_flags = new_flags; + vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; + } +-skip_mm: + up_write(&mm->mmap_sem); + mmput(mm); + wakeup: diff --git a/queue-4.9/x86-apic-handle-missing-global-clockevent-gracefully.patch b/queue-4.9/x86-apic-handle-missing-global-clockevent-gracefully.patch new file mode 100644 index 00000000000..5448785de93 --- /dev/null +++ b/queue-4.9/x86-apic-handle-missing-global-clockevent-gracefully.patch @@ -0,0 +1,154 @@ +From f897e60a12f0b9146357780d317879bce2a877dc Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Fri, 9 Aug 2019 14:54:07 +0200 +Subject: x86/apic: Handle missing global clockevent gracefully + +From: Thomas Gleixner + +commit f897e60a12f0b9146357780d317879bce2a877dc upstream. + +Some newer machines do not advertise legacy timers. The kernel can handle +that situation if the TSC and the CPU frequency are enumerated by CPUID or +MSRs and the CPU supports TSC deadline timer. If the CPU does not support +TSC deadline timer the local APIC timer frequency has to be known as well. + +Some Ryzens machines do not advertize legacy timers, but there is no +reliable way to determine the bus frequency which feeds the local APIC +timer when the machine allows overclocking of that frequency. + +As there is no legacy timer the local APIC timer calibration crashes due to +a NULL pointer dereference when accessing the not installed global clock +event device. + +Switch the calibration loop to a non interrupt based one, which polls +either TSC (if frequency is known) or jiffies. The latter requires a global +clockevent. As the machines which do not have a global clockevent installed +have a known TSC frequency this is a non issue. For older machines where +TSC frequency is not known, there is no known case where the legacy timers +do not exist as that would have been reported long ago. + +Reported-by: Daniel Drake +Reported-by: Jiri Slaby +Signed-off-by: Thomas Gleixner +Tested-by: Daniel Drake +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1908091443030.21433@nanos.tec.linutronix.de +Link: http://bugzilla.opensuse.org/show_bug.cgi?id=1142926#c12 +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/apic/apic.c | 68 ++++++++++++++++++++++++++++++++++---------- + 1 file changed, 53 insertions(+), 15 deletions(-) + +--- a/arch/x86/kernel/apic/apic.c ++++ b/arch/x86/kernel/apic/apic.c +@@ -629,7 +629,7 @@ static __initdata unsigned long lapic_ca + static __initdata unsigned long lapic_cal_j1, lapic_cal_j2; + + /* +- * Temporary interrupt handler. ++ * Temporary interrupt handler and polled calibration function. + */ + static void __init lapic_cal_handler(struct clock_event_device *dev) + { +@@ -713,7 +713,8 @@ calibrate_by_pmtimer(long deltapm, long + static int __init calibrate_APIC_clock(void) + { + struct clock_event_device *levt = this_cpu_ptr(&lapic_events); +- void (*real_handler)(struct clock_event_device *dev); ++ u64 tsc_perj = 0, tsc_start = 0; ++ unsigned long jif_start; + unsigned long deltaj; + long delta, deltatsc; + int pm_referenced = 0; +@@ -742,29 +743,65 @@ static int __init calibrate_APIC_clock(v + apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" + "calibrating APIC timer ...\n"); + ++ /* ++ * There are platforms w/o global clockevent devices. Instead of ++ * making the calibration conditional on that, use a polling based ++ * approach everywhere. ++ */ + local_irq_disable(); + +- /* Replace the global interrupt handler */ +- real_handler = global_clock_event->event_handler; +- global_clock_event->event_handler = lapic_cal_handler; +- + /* + * Setup the APIC counter to maximum. There is no way the lapic + * can underflow in the 100ms detection time frame + */ + __setup_APIC_LVTT(0xffffffff, 0, 0); + +- /* Let the interrupts run */ ++ /* ++ * Methods to terminate the calibration loop: ++ * 1) Global clockevent if available (jiffies) ++ * 2) TSC if available and frequency is known ++ */ ++ jif_start = READ_ONCE(jiffies); ++ ++ if (tsc_khz) { ++ tsc_start = rdtsc(); ++ tsc_perj = div_u64((u64)tsc_khz * 1000, HZ); ++ } ++ ++ /* ++ * Enable interrupts so the tick can fire, if a global ++ * clockevent device is available ++ */ + local_irq_enable(); + +- while (lapic_cal_loops <= LAPIC_CAL_LOOPS) +- cpu_relax(); ++ while (lapic_cal_loops <= LAPIC_CAL_LOOPS) { ++ /* Wait for a tick to elapse */ ++ while (1) { ++ if (tsc_khz) { ++ u64 tsc_now = rdtsc(); ++ if ((tsc_now - tsc_start) >= tsc_perj) { ++ tsc_start += tsc_perj; ++ break; ++ } ++ } else { ++ unsigned long jif_now = READ_ONCE(jiffies); ++ ++ if (time_after(jif_now, jif_start)) { ++ jif_start = jif_now; ++ break; ++ } ++ } ++ cpu_relax(); ++ } ++ ++ /* Invoke the calibration routine */ ++ local_irq_disable(); ++ lapic_cal_handler(NULL); ++ local_irq_enable(); ++ } + + local_irq_disable(); + +- /* Restore the real event handler */ +- global_clock_event->event_handler = real_handler; +- + /* Build delta t1-t2 as apic timer counts down */ + delta = lapic_cal_t1 - lapic_cal_t2; + apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); +@@ -814,10 +851,11 @@ static int __init calibrate_APIC_clock(v + levt->features &= ~CLOCK_EVT_FEAT_DUMMY; + + /* +- * PM timer calibration failed or not turned on +- * so lets try APIC timer based calibration ++ * PM timer calibration failed or not turned on so lets try APIC ++ * timer based calibration, if a global clockevent device is ++ * available. + */ +- if (!pm_referenced) { ++ if (!pm_referenced && global_clock_event) { + apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); + + /* diff --git a/queue-4.9/x86-boot-fix-boot-regression-caused-by-bootparam-sanitizing.patch b/queue-4.9/x86-boot-fix-boot-regression-caused-by-bootparam-sanitizing.patch new file mode 100644 index 00000000000..2429e42b1ad --- /dev/null +++ b/queue-4.9/x86-boot-fix-boot-regression-caused-by-bootparam-sanitizing.patch @@ -0,0 +1,41 @@ +From 7846f58fba964af7cb8cf77d4d13c33254725211 Mon Sep 17 00:00:00 2001 +From: John Hubbard +Date: Wed, 21 Aug 2019 12:25:13 -0700 +Subject: x86/boot: Fix boot regression caused by bootparam sanitizing + +From: John Hubbard + +commit 7846f58fba964af7cb8cf77d4d13c33254725211 upstream. + +commit a90118c445cc ("x86/boot: Save fields explicitly, zero out everything +else") had two errors: + + * It preserved boot_params.acpi_rsdp_addr, and + * It failed to preserve boot_params.hdr + +Therefore, zero out acpi_rsdp_addr, and preserve hdr. + +Fixes: a90118c445cc ("x86/boot: Save fields explicitly, zero out everything else") +Reported-by: Neil MacLeod +Suggested-by: Thomas Gleixner +Signed-off-by: John Hubbard +Signed-off-by: Thomas Gleixner +Tested-by: Neil MacLeod +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20190821192513.20126-1-jhubbard@nvidia.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/bootparam_utils.h | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/x86/include/asm/bootparam_utils.h ++++ b/arch/x86/include/asm/bootparam_utils.h +@@ -70,6 +70,7 @@ static void sanitize_boot_params(struct + BOOT_PARAM_PRESERVE(eddbuf_entries), + BOOT_PARAM_PRESERVE(edd_mbr_sig_buf_entries), + BOOT_PARAM_PRESERVE(edd_mbr_sig_buffer), ++ BOOT_PARAM_PRESERVE(hdr), + BOOT_PARAM_PRESERVE(eddbuf), + }; + diff --git a/queue-4.9/x86-boot-save-fields-explicitly-zero-out-everything-else.patch b/queue-4.9/x86-boot-save-fields-explicitly-zero-out-everything-else.patch new file mode 100644 index 00000000000..d262251d9a6 --- /dev/null +++ b/queue-4.9/x86-boot-save-fields-explicitly-zero-out-everything-else.patch @@ -0,0 +1,105 @@ +From a90118c445cc7f07781de26a9684d4ec58bfcfd1 Mon Sep 17 00:00:00 2001 +From: John Hubbard +Date: Tue, 30 Jul 2019 22:46:27 -0700 +Subject: x86/boot: Save fields explicitly, zero out everything else + +From: John Hubbard + +commit a90118c445cc7f07781de26a9684d4ec58bfcfd1 upstream. + +Recent gcc compilers (gcc 9.1) generate warnings about an out of bounds +memset, if the memset goes accross several fields of a struct. This +generated a couple of warnings on x86_64 builds in sanitize_boot_params(). + +Fix this by explicitly saving the fields in struct boot_params +that are intended to be preserved, and zeroing all the rest. + +[ tglx: Tagged for stable as it breaks the warning free build there as well ] + +Suggested-by: Thomas Gleixner +Suggested-by: H. Peter Anvin +Signed-off-by: John Hubbard +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20190731054627.5627-2-jhubbard@nvidia.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/bootparam_utils.h | 59 +++++++++++++++++++++++++-------- + 1 file changed, 46 insertions(+), 13 deletions(-) + +--- a/arch/x86/include/asm/bootparam_utils.h ++++ b/arch/x86/include/asm/bootparam_utils.h +@@ -17,6 +17,20 @@ + * Note: efi_info is commonly left uninitialized, but that field has a + * private magic, so it is better to leave it unchanged. + */ ++ ++#define sizeof_mbr(type, member) ({ sizeof(((type *)0)->member); }) ++ ++#define BOOT_PARAM_PRESERVE(struct_member) \ ++ { \ ++ .start = offsetof(struct boot_params, struct_member), \ ++ .len = sizeof_mbr(struct boot_params, struct_member), \ ++ } ++ ++struct boot_params_to_save { ++ unsigned int start; ++ unsigned int len; ++}; ++ + static void sanitize_boot_params(struct boot_params *boot_params) + { + /* +@@ -35,19 +49,38 @@ static void sanitize_boot_params(struct + */ + if (boot_params->sentinel) { + /* fields in boot_params are left uninitialized, clear them */ +- memset(&boot_params->ext_ramdisk_image, 0, +- (char *)&boot_params->efi_info - +- (char *)&boot_params->ext_ramdisk_image); +- memset(&boot_params->kbd_status, 0, +- (char *)&boot_params->hdr - +- (char *)&boot_params->kbd_status); +- memset(&boot_params->_pad7[0], 0, +- (char *)&boot_params->edd_mbr_sig_buffer[0] - +- (char *)&boot_params->_pad7[0]); +- memset(&boot_params->_pad8[0], 0, +- (char *)&boot_params->eddbuf[0] - +- (char *)&boot_params->_pad8[0]); +- memset(&boot_params->_pad9[0], 0, sizeof(boot_params->_pad9)); ++ static struct boot_params scratch; ++ char *bp_base = (char *)boot_params; ++ char *save_base = (char *)&scratch; ++ int i; ++ ++ const struct boot_params_to_save to_save[] = { ++ BOOT_PARAM_PRESERVE(screen_info), ++ BOOT_PARAM_PRESERVE(apm_bios_info), ++ BOOT_PARAM_PRESERVE(tboot_addr), ++ BOOT_PARAM_PRESERVE(ist_info), ++ BOOT_PARAM_PRESERVE(hd0_info), ++ BOOT_PARAM_PRESERVE(hd1_info), ++ BOOT_PARAM_PRESERVE(sys_desc_table), ++ BOOT_PARAM_PRESERVE(olpc_ofw_header), ++ BOOT_PARAM_PRESERVE(efi_info), ++ BOOT_PARAM_PRESERVE(alt_mem_k), ++ BOOT_PARAM_PRESERVE(scratch), ++ BOOT_PARAM_PRESERVE(e820_entries), ++ BOOT_PARAM_PRESERVE(eddbuf_entries), ++ BOOT_PARAM_PRESERVE(edd_mbr_sig_buf_entries), ++ BOOT_PARAM_PRESERVE(edd_mbr_sig_buffer), ++ BOOT_PARAM_PRESERVE(eddbuf), ++ }; ++ ++ memset(&scratch, 0, sizeof(scratch)); ++ ++ for (i = 0; i < ARRAY_SIZE(to_save); i++) { ++ memcpy(save_base + to_save[i].start, ++ bp_base + to_save[i].start, to_save[i].len); ++ } ++ ++ memcpy(boot_params, save_base, sizeof(*boot_params)); + } + } + diff --git a/queue-4.9/x86-retpoline-don-t-clobber-rflags-during-call_nospec-on-i386.patch b/queue-4.9/x86-retpoline-don-t-clobber-rflags-during-call_nospec-on-i386.patch new file mode 100644 index 00000000000..21957d9456c --- /dev/null +++ b/queue-4.9/x86-retpoline-don-t-clobber-rflags-during-call_nospec-on-i386.patch @@ -0,0 +1,79 @@ +From b63f20a778c88b6a04458ed6ffc69da953d3a109 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Thu, 22 Aug 2019 14:11:22 -0700 +Subject: x86/retpoline: Don't clobber RFLAGS during CALL_NOSPEC on i386 + +From: Sean Christopherson + +commit b63f20a778c88b6a04458ed6ffc69da953d3a109 upstream. + +Use 'lea' instead of 'add' when adjusting %rsp in CALL_NOSPEC so as to +avoid clobbering flags. + +KVM's emulator makes indirect calls into a jump table of sorts, where +the destination of the CALL_NOSPEC is a small blob of code that performs +fast emulation by executing the target instruction with fixed operands. + + adcb_al_dl: + 0x000339f8 <+0>: adc %dl,%al + 0x000339fa <+2>: ret + +A major motiviation for doing fast emulation is to leverage the CPU to +handle consumption and manipulation of arithmetic flags, i.e. RFLAGS is +both an input and output to the target of CALL_NOSPEC. Clobbering flags +results in all sorts of incorrect emulation, e.g. Jcc instructions often +take the wrong path. Sans the nops... + + asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n" + 0x0003595a <+58>: mov 0xc0(%ebx),%eax + 0x00035960 <+64>: mov 0x60(%ebx),%edx + 0x00035963 <+67>: mov 0x90(%ebx),%ecx + 0x00035969 <+73>: push %edi + 0x0003596a <+74>: popf + 0x0003596b <+75>: call *%esi + 0x000359a0 <+128>: pushf + 0x000359a1 <+129>: pop %edi + 0x000359a2 <+130>: mov %eax,0xc0(%ebx) + 0x000359b1 <+145>: mov %edx,0x60(%ebx) + + ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); + 0x000359a8 <+136>: mov -0x10(%ebp),%eax + 0x000359ab <+139>: and $0x8d5,%edi + 0x000359b4 <+148>: and $0xfffff72a,%eax + 0x000359b9 <+153>: or %eax,%edi + 0x000359bd <+157>: mov %edi,0x4(%ebx) + +For the most part this has gone unnoticed as emulation of guest code +that can trigger fast emulation is effectively limited to MMIO when +running on modern hardware, and MMIO is rarely, if ever, accessed by +instructions that affect or consume flags. + +Breakage is almost instantaneous when running with unrestricted guest +disabled, in which case KVM must emulate all instructions when the guest +has invalid state, e.g. when the guest is in Big Real Mode during early +BIOS. + +Fixes: 776b043848fd2 ("x86/retpoline: Add initial retpoline support") +Fixes: 1a29b5b7f347a ("KVM: x86: Make indirect calls in emulator speculation safe") +Signed-off-by: Sean Christopherson +Signed-off-by: Thomas Gleixner +Acked-by: Peter Zijlstra (Intel) +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20190822211122.27579-1-sean.j.christopherson@intel.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/nospec-branch.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -196,7 +196,7 @@ + " lfence;\n" \ + " jmp 902b;\n" \ + " .align 16\n" \ +- "903: addl $4, %%esp;\n" \ ++ "903: lea 4(%%esp), %%esp;\n" \ + " pushl %[thunk_target];\n" \ + " ret;\n" \ + " .align 16\n" \ -- 2.47.3