From 2f91257a421ea2cf2bd93c5272fcc6b809a3cce2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 24 Apr 2018 15:51:31 +0200 Subject: [PATCH] 4.16-stable patches added patches: alarmtimer-init-nanosleep-alarm-timer-on-stack.patch mac80211_hwsim-fix-use-after-free-bug-in-hwsim_exit_net.patch mm-vmscan-allow-preallocating-memory-for-register_shrinker.patch netfilter-x_tables-limit-allocation-requests-for-blob-rule-heads.patch perf-fix-sample_max_stack-maximum-check.patch perf-return-proper-values-for-user-stack-errors.patch rdma-mlx5-fix-null-dereference-while-accessing-xrc_tgt-qps.patch revert-kvm-x86-fix-smram-accessing-even-if-vm-is-shutdown.patch --- ...-init-nanosleep-alarm-timer-on-stack.patch | 122 +++++++++++++++++ ...use-after-free-bug-in-hwsim_exit_net.patch | 42 ++++++ ...ocating-memory-for-register_shrinker.patch | 127 ++++++++++++++++++ ...ocation-requests-for-blob-rule-heads.patch | 32 +++++ ...f-fix-sample_max_stack-maximum-check.patch | 90 +++++++++++++ ...-proper-values-for-user-stack-errors.patch | 46 +++++++ ...eference-while-accessing-xrc_tgt-qps.patch | 77 +++++++++++ ...ram-accessing-even-if-vm-is-shutdown.patch | 72 ++++++++++ queue-4.16/series | 8 ++ 9 files changed, 616 insertions(+) create mode 100644 queue-4.16/alarmtimer-init-nanosleep-alarm-timer-on-stack.patch create mode 100644 queue-4.16/mac80211_hwsim-fix-use-after-free-bug-in-hwsim_exit_net.patch create mode 100644 queue-4.16/mm-vmscan-allow-preallocating-memory-for-register_shrinker.patch create mode 100644 queue-4.16/netfilter-x_tables-limit-allocation-requests-for-blob-rule-heads.patch create mode 100644 queue-4.16/perf-fix-sample_max_stack-maximum-check.patch create mode 100644 queue-4.16/perf-return-proper-values-for-user-stack-errors.patch create mode 100644 queue-4.16/rdma-mlx5-fix-null-dereference-while-accessing-xrc_tgt-qps.patch create mode 100644 queue-4.16/revert-kvm-x86-fix-smram-accessing-even-if-vm-is-shutdown.patch diff --git a/queue-4.16/alarmtimer-init-nanosleep-alarm-timer-on-stack.patch b/queue-4.16/alarmtimer-init-nanosleep-alarm-timer-on-stack.patch new file mode 100644 index 00000000000..47f3725300a --- /dev/null +++ b/queue-4.16/alarmtimer-init-nanosleep-alarm-timer-on-stack.patch @@ -0,0 +1,122 @@ +From bd03143007eb9b03a7f2316c677780561b68ba2a Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Mon, 26 Mar 2018 15:29:57 +0200 +Subject: alarmtimer: Init nanosleep alarm timer on stack + +From: Thomas Gleixner + +commit bd03143007eb9b03a7f2316c677780561b68ba2a upstream. + +syszbot reported the following debugobjects splat: + + ODEBUG: object is on stack, but not annotated + WARNING: CPU: 0 PID: 4185 at lib/debugobjects.c:328 + + RIP: 0010:debug_object_is_on_stack lib/debugobjects.c:327 [inline] + debug_object_init+0x17/0x20 lib/debugobjects.c:391 + debug_hrtimer_init kernel/time/hrtimer.c:410 [inline] + debug_init kernel/time/hrtimer.c:458 [inline] + hrtimer_init+0x8c/0x410 kernel/time/hrtimer.c:1259 + alarm_init kernel/time/alarmtimer.c:339 [inline] + alarm_timer_nsleep+0x164/0x4d0 kernel/time/alarmtimer.c:787 + SYSC_clock_nanosleep kernel/time/posix-timers.c:1226 [inline] + SyS_clock_nanosleep+0x235/0x330 kernel/time/posix-timers.c:1204 + do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287 + entry_SYSCALL_64_after_hwframe+0x42/0xb7 + +This happens because the hrtimer for the alarm nanosleep is on stack, but +the code does not use the proper debug objects initialization. + +Split out the code for the allocated use cases and invoke +hrtimer_init_on_stack() for the nanosleep related functions. + +Reported-by: syzbot+a3e0726462b2e346a31d@syzkaller.appspotmail.com +Signed-off-by: Thomas Gleixner +Cc: John Stultz +Cc: syzkaller-bugs@googlegroups.com +Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1803261528270.1585@nanos.tec.linutronix.de +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/time/alarmtimer.c | 34 ++++++++++++++++++++++++++-------- + 1 file changed, 26 insertions(+), 8 deletions(-) + +--- a/kernel/time/alarmtimer.c ++++ b/kernel/time/alarmtimer.c +@@ -326,6 +326,17 @@ static int alarmtimer_resume(struct devi + } + #endif + ++static void ++__alarm_init(struct alarm *alarm, enum alarmtimer_type type, ++ enum alarmtimer_restart (*function)(struct alarm *, ktime_t)) ++{ ++ timerqueue_init(&alarm->node); ++ alarm->timer.function = alarmtimer_fired; ++ alarm->function = function; ++ alarm->type = type; ++ alarm->state = ALARMTIMER_STATE_INACTIVE; ++} ++ + /** + * alarm_init - Initialize an alarm structure + * @alarm: ptr to alarm to be initialized +@@ -335,13 +346,9 @@ static int alarmtimer_resume(struct devi + void alarm_init(struct alarm *alarm, enum alarmtimer_type type, + enum alarmtimer_restart (*function)(struct alarm *, ktime_t)) + { +- timerqueue_init(&alarm->node); + hrtimer_init(&alarm->timer, alarm_bases[type].base_clockid, +- HRTIMER_MODE_ABS); +- alarm->timer.function = alarmtimer_fired; +- alarm->function = function; +- alarm->type = type; +- alarm->state = ALARMTIMER_STATE_INACTIVE; ++ HRTIMER_MODE_ABS); ++ __alarm_init(alarm, type, function); + } + EXPORT_SYMBOL_GPL(alarm_init); + +@@ -719,6 +726,8 @@ static int alarmtimer_do_nsleep(struct a + + __set_current_state(TASK_RUNNING); + ++ destroy_hrtimer_on_stack(&alarm->timer); ++ + if (!alarm->data) + return 0; + +@@ -740,6 +749,15 @@ static int alarmtimer_do_nsleep(struct a + return -ERESTART_RESTARTBLOCK; + } + ++static void ++alarm_init_on_stack(struct alarm *alarm, enum alarmtimer_type type, ++ enum alarmtimer_restart (*function)(struct alarm *, ktime_t)) ++{ ++ hrtimer_init_on_stack(&alarm->timer, alarm_bases[type].base_clockid, ++ HRTIMER_MODE_ABS); ++ __alarm_init(alarm, type, function); ++} ++ + /** + * alarm_timer_nsleep_restart - restartblock alarmtimer nsleep + * @restart: ptr to restart block +@@ -752,7 +770,7 @@ static long __sched alarm_timer_nsleep_r + ktime_t exp = restart->nanosleep.expires; + struct alarm alarm; + +- alarm_init(&alarm, type, alarmtimer_nsleep_wakeup); ++ alarm_init_on_stack(&alarm, type, alarmtimer_nsleep_wakeup); + + return alarmtimer_do_nsleep(&alarm, exp, type); + } +@@ -784,7 +802,7 @@ static int alarm_timer_nsleep(const cloc + if (!capable(CAP_WAKE_ALARM)) + return -EPERM; + +- alarm_init(&alarm, type, alarmtimer_nsleep_wakeup); ++ alarm_init_on_stack(&alarm, type, alarmtimer_nsleep_wakeup); + + exp = timespec64_to_ktime(*tsreq); + /* Convert (if necessary) to absolute time */ diff --git a/queue-4.16/mac80211_hwsim-fix-use-after-free-bug-in-hwsim_exit_net.patch b/queue-4.16/mac80211_hwsim-fix-use-after-free-bug-in-hwsim_exit_net.patch new file mode 100644 index 00000000000..503c3a3141f --- /dev/null +++ b/queue-4.16/mac80211_hwsim-fix-use-after-free-bug-in-hwsim_exit_net.patch @@ -0,0 +1,42 @@ +From 8cfd36a0b53aeb4ec21d81eb79706697b84dfc3d Mon Sep 17 00:00:00 2001 +From: Benjamin Beichler +Date: Wed, 7 Mar 2018 18:11:07 +0100 +Subject: mac80211_hwsim: fix use-after-free bug in hwsim_exit_net + +From: Benjamin Beichler + +commit 8cfd36a0b53aeb4ec21d81eb79706697b84dfc3d upstream. + +When destroying a net namespace, all hwsim interfaces, which are not +created in default namespace are deleted. But the async deletion of the +interfaces could last longer than the actual destruction of the +namespace, which results to an use after free bug. Therefore use +synchronous deletion in this case. + +Fixes: 100cb9ff40e0 ("mac80211_hwsim: Allow managing radios from non-initial namespaces") +Reported-by: syzbot+70ce058e01259de7bb1d@syzkaller.appspotmail.com +Signed-off-by: Benjamin Beichler +Signed-off-by: Johannes Berg +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/wireless/mac80211_hwsim.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/drivers/net/wireless/mac80211_hwsim.c ++++ b/drivers/net/wireless/mac80211_hwsim.c +@@ -3484,8 +3484,12 @@ static void __net_exit hwsim_exit_net(st + list_del(&data->list); + rhashtable_remove_fast(&hwsim_radios_rht, &data->rht, + hwsim_rht_params); +- INIT_WORK(&data->destroy_work, destroy_radio); +- queue_work(hwsim_wq, &data->destroy_work); ++ hwsim_radios_generation++; ++ spin_unlock_bh(&hwsim_radio_lock); ++ mac80211_hwsim_del_radio(data, ++ wiphy_name(data->hw->wiphy), ++ NULL); ++ spin_lock_bh(&hwsim_radio_lock); + } + spin_unlock_bh(&hwsim_radio_lock); + } diff --git a/queue-4.16/mm-vmscan-allow-preallocating-memory-for-register_shrinker.patch b/queue-4.16/mm-vmscan-allow-preallocating-memory-for-register_shrinker.patch new file mode 100644 index 00000000000..3d4301d7ed7 --- /dev/null +++ b/queue-4.16/mm-vmscan-allow-preallocating-memory-for-register_shrinker.patch @@ -0,0 +1,127 @@ +From 8e04944f0ea8b838399049bdcda920ab36ae3b04 Mon Sep 17 00:00:00 2001 +From: Tetsuo Handa +Date: Wed, 4 Apr 2018 19:53:07 +0900 +Subject: mm,vmscan: Allow preallocating memory for register_shrinker(). + +From: Tetsuo Handa + +commit 8e04944f0ea8b838399049bdcda920ab36ae3b04 upstream. + +syzbot is catching so many bugs triggered by commit 9ee332d99e4d5a97 +("sget(): handle failures of register_shrinker()"). That commit expected +that calling kill_sb() from deactivate_locked_super() without successful +fill_super() is safe, but the reality was different; some callers assign +attributes which are needed for kill_sb() after sget() succeeds. + +For example, [1] is a report where sb->s_mode (which seems to be either +FMODE_READ | FMODE_EXCL | FMODE_WRITE or FMODE_READ | FMODE_EXCL) is not +assigned unless sget() succeeds. But it does not worth complicate sget() +so that register_shrinker() failure path can safely call +kill_block_super() via kill_sb(). Making alloc_super() fail if memory +allocation for register_shrinker() failed is much simpler. Let's avoid +calling deactivate_locked_super() from sget_userns() by preallocating +memory for the shrinker and making register_shrinker() in sget_userns() +never fail. + +[1] https://syzkaller.appspot.com/bug?id=588996a25a2587be2e3a54e8646728fb9cae44e7 + +Signed-off-by: Tetsuo Handa +Reported-by: syzbot +Cc: Al Viro +Cc: Michal Hocko +Signed-off-by: Al Viro +Signed-off-by: Greg Kroah-Hartman + +--- + fs/super.c | 9 ++++----- + include/linux/shrinker.h | 7 +++++-- + mm/vmscan.c | 21 ++++++++++++++++++++- + 3 files changed, 29 insertions(+), 8 deletions(-) + +--- a/fs/super.c ++++ b/fs/super.c +@@ -166,6 +166,7 @@ static void destroy_unused_super(struct + security_sb_free(s); + put_user_ns(s->s_user_ns); + kfree(s->s_subtype); ++ free_prealloced_shrinker(&s->s_shrink); + /* no delays needed */ + destroy_super_work(&s->destroy_work); + } +@@ -251,6 +252,8 @@ static struct super_block *alloc_super(s + s->s_shrink.count_objects = super_cache_count; + s->s_shrink.batch = 1024; + s->s_shrink.flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE; ++ if (prealloc_shrinker(&s->s_shrink)) ++ goto fail; + return s; + + fail: +@@ -517,11 +520,7 @@ retry: + hlist_add_head(&s->s_instances, &type->fs_supers); + spin_unlock(&sb_lock); + get_filesystem(type); +- err = register_shrinker(&s->s_shrink); +- if (err) { +- deactivate_locked_super(s); +- s = ERR_PTR(err); +- } ++ register_shrinker_prepared(&s->s_shrink); + return s; + } + +--- a/include/linux/shrinker.h ++++ b/include/linux/shrinker.h +@@ -75,6 +75,9 @@ struct shrinker { + #define SHRINKER_NUMA_AWARE (1 << 0) + #define SHRINKER_MEMCG_AWARE (1 << 1) + +-extern int register_shrinker(struct shrinker *); +-extern void unregister_shrinker(struct shrinker *); ++extern int prealloc_shrinker(struct shrinker *shrinker); ++extern void register_shrinker_prepared(struct shrinker *shrinker); ++extern int register_shrinker(struct shrinker *shrinker); ++extern void unregister_shrinker(struct shrinker *shrinker); ++extern void free_prealloced_shrinker(struct shrinker *shrinker); + #endif +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -258,7 +258,7 @@ unsigned long lruvec_lru_size(struct lru + /* + * Add a shrinker callback to be called from the vm. + */ +-int register_shrinker(struct shrinker *shrinker) ++int prealloc_shrinker(struct shrinker *shrinker) + { + size_t size = sizeof(*shrinker->nr_deferred); + +@@ -268,10 +268,29 @@ int register_shrinker(struct shrinker *s + shrinker->nr_deferred = kzalloc(size, GFP_KERNEL); + if (!shrinker->nr_deferred) + return -ENOMEM; ++ return 0; ++} ++ ++void free_prealloced_shrinker(struct shrinker *shrinker) ++{ ++ kfree(shrinker->nr_deferred); ++ shrinker->nr_deferred = NULL; ++} + ++void register_shrinker_prepared(struct shrinker *shrinker) ++{ + down_write(&shrinker_rwsem); + list_add_tail(&shrinker->list, &shrinker_list); + up_write(&shrinker_rwsem); ++} ++ ++int register_shrinker(struct shrinker *shrinker) ++{ ++ int err = prealloc_shrinker(shrinker); ++ ++ if (err) ++ return err; ++ register_shrinker_prepared(shrinker); + return 0; + } + EXPORT_SYMBOL(register_shrinker); diff --git a/queue-4.16/netfilter-x_tables-limit-allocation-requests-for-blob-rule-heads.patch b/queue-4.16/netfilter-x_tables-limit-allocation-requests-for-blob-rule-heads.patch new file mode 100644 index 00000000000..593f0440aa2 --- /dev/null +++ b/queue-4.16/netfilter-x_tables-limit-allocation-requests-for-blob-rule-heads.patch @@ -0,0 +1,32 @@ +From 9d5c12a7c08f67999772065afd50fb222072114e Mon Sep 17 00:00:00 2001 +From: Florian Westphal +Date: Tue, 27 Feb 2018 19:42:32 +0100 +Subject: netfilter: x_tables: limit allocation requests for blob rule heads + +From: Florian Westphal + +commit 9d5c12a7c08f67999772065afd50fb222072114e upstream. + +This is a very conservative limit (134217728 rules), but good +enough to not trigger frequent oom from syzkaller. + +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman + +--- + net/netfilter/x_tables.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/netfilter/x_tables.c ++++ b/net/netfilter/x_tables.c +@@ -805,6 +805,9 @@ EXPORT_SYMBOL(xt_check_entry_offsets); + */ + unsigned int *xt_alloc_entry_offsets(unsigned int size) + { ++ if (size > XT_MAX_TABLE_SIZE / sizeof(unsigned int)) ++ return NULL; ++ + return kvmalloc_array(size, sizeof(unsigned int), GFP_KERNEL | __GFP_ZERO); + + } diff --git a/queue-4.16/perf-fix-sample_max_stack-maximum-check.patch b/queue-4.16/perf-fix-sample_max_stack-maximum-check.patch new file mode 100644 index 00000000000..6ebb9cbc78a --- /dev/null +++ b/queue-4.16/perf-fix-sample_max_stack-maximum-check.patch @@ -0,0 +1,90 @@ +From 5af44ca53d019de47efe6dbc4003dd518e5197ed Mon Sep 17 00:00:00 2001 +From: Jiri Olsa +Date: Sun, 15 Apr 2018 11:23:51 +0200 +Subject: perf: Fix sample_max_stack maximum check + +From: Jiri Olsa + +commit 5af44ca53d019de47efe6dbc4003dd518e5197ed upstream. + +The syzbot hit KASAN bug in perf_callchain_store having the entry stored +behind the allocated bounds [1]. + +We miss the sample_max_stack check for the initial event that allocates +callchain buffers. This missing check allows to create an event with +sample_max_stack value bigger than the global sysctl maximum: + + # sysctl -a | grep perf_event_max_stack + kernel.perf_event_max_stack = 127 + + # perf record -vv -C 1 -e cycles/max-stack=256/ kill + ... + perf_event_attr: + size 112 + ... + sample_max_stack 256 + ------------------------------------------------------------ + sys_perf_event_open: pid -1 cpu 1 group_fd -1 flags 0x8 = 4 + +Note the '-C 1', which forces perf record to create just single event. +Otherwise it opens event for every cpu, then the sample_max_stack check +fails on the second event and all's fine. + +The fix is to run the sample_max_stack check also for the first event +with callchains. + +[1] https://marc.info/?l=linux-kernel&m=152352732920874&w=2 + +Reported-by: syzbot+7c449856228b63ac951e@syzkaller.appspotmail.com +Signed-off-by: Jiri Olsa +Cc: Alexander Shishkin +Cc: Andi Kleen +Cc: H. Peter Anvin +Cc: Namhyung Kim +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: syzkaller-bugs@googlegroups.com +Cc: x86@kernel.org +Fixes: 97c79a38cd45 ("perf core: Per event callchain limit") +Link: http://lkml.kernel.org/r/20180415092352.12403-2-jolsa@kernel.org +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/events/callchain.c | 21 ++++++++++++--------- + 1 file changed, 12 insertions(+), 9 deletions(-) + +--- a/kernel/events/callchain.c ++++ b/kernel/events/callchain.c +@@ -119,19 +119,22 @@ int get_callchain_buffers(int event_max_ + goto exit; + } + ++ /* ++ * If requesting per event more than the global cap, ++ * return a different error to help userspace figure ++ * this out. ++ * ++ * And also do it here so that we have &callchain_mutex held. ++ */ ++ if (event_max_stack > sysctl_perf_event_max_stack) { ++ err = -EOVERFLOW; ++ goto exit; ++ } ++ + if (count > 1) { + /* If the allocation failed, give up */ + if (!callchain_cpus_entries) + err = -ENOMEM; +- /* +- * If requesting per event more than the global cap, +- * return a different error to help userspace figure +- * this out. +- * +- * And also do it here so that we have &callchain_mutex held. +- */ +- if (event_max_stack > sysctl_perf_event_max_stack) +- err = -EOVERFLOW; + goto exit; + } + diff --git a/queue-4.16/perf-return-proper-values-for-user-stack-errors.patch b/queue-4.16/perf-return-proper-values-for-user-stack-errors.patch new file mode 100644 index 00000000000..81acd6d1ca4 --- /dev/null +++ b/queue-4.16/perf-return-proper-values-for-user-stack-errors.patch @@ -0,0 +1,46 @@ +From 78b562fbfa2cf0a9fcb23c3154756b690f4905c1 Mon Sep 17 00:00:00 2001 +From: Jiri Olsa +Date: Sun, 15 Apr 2018 11:23:50 +0200 +Subject: perf: Return proper values for user stack errors + +From: Jiri Olsa + +commit 78b562fbfa2cf0a9fcb23c3154756b690f4905c1 upstream. + +Return immediately when we find issue in the user stack checks. The +error value could get overwritten by following check for +PERF_SAMPLE_REGS_INTR. + +Signed-off-by: Jiri Olsa +Cc: Alexander Shishkin +Cc: Andi Kleen +Cc: H. Peter Anvin +Cc: Namhyung Kim +Cc: Peter Zijlstra +Cc: Stephane Eranian +Cc: Thomas Gleixner +Cc: syzkaller-bugs@googlegroups.com +Cc: x86@kernel.org +Fixes: 60e2364e60e8 ("perf: Add ability to sample machine state on interrupt") +Link: http://lkml.kernel.org/r/20180415092352.12403-1-jolsa@kernel.org +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/events/core.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/kernel/events/core.c ++++ b/kernel/events/core.c +@@ -9730,9 +9730,9 @@ static int perf_copy_attr(struct perf_ev + * __u16 sample size limit. + */ + if (attr->sample_stack_user >= USHRT_MAX) +- ret = -EINVAL; ++ return -EINVAL; + else if (!IS_ALIGNED(attr->sample_stack_user, sizeof(u64))) +- ret = -EINVAL; ++ return -EINVAL; + } + + if (attr->sample_type & PERF_SAMPLE_REGS_INTR) diff --git a/queue-4.16/rdma-mlx5-fix-null-dereference-while-accessing-xrc_tgt-qps.patch b/queue-4.16/rdma-mlx5-fix-null-dereference-while-accessing-xrc_tgt-qps.patch new file mode 100644 index 00000000000..efd2aaee303 --- /dev/null +++ b/queue-4.16/rdma-mlx5-fix-null-dereference-while-accessing-xrc_tgt-qps.patch @@ -0,0 +1,77 @@ +From 75a4598209cbe45540baa316c3b51d9db222e96e Mon Sep 17 00:00:00 2001 +From: Leon Romanovsky +Date: Sun, 11 Mar 2018 13:51:32 +0200 +Subject: RDMA/mlx5: Fix NULL dereference while accessing XRC_TGT QPs + +From: Leon Romanovsky + +commit 75a4598209cbe45540baa316c3b51d9db222e96e upstream. + +mlx5 modify_qp() relies on FW that the error will be thrown if wrong +state is supplied. The missing check in FW causes the following crash +while using XRC_TGT QPs. + +[ 14.769632] BUG: unable to handle kernel NULL pointer dereference at (null) +[ 14.771085] IP: mlx5_ib_modify_qp+0xf60/0x13f0 +[ 14.771894] PGD 800000001472e067 P4D 800000001472e067 PUD 14529067 PMD 0 +[ 14.773126] Oops: 0002 [#1] SMP PTI +[ 14.773763] CPU: 0 PID: 365 Comm: ubsan Not tainted 4.16.0-rc1-00038-g8151138c0793 #119 +[ 14.775192] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014 +[ 14.777522] RIP: 0010:mlx5_ib_modify_qp+0xf60/0x13f0 +[ 14.778417] RSP: 0018:ffffbf48001c7bd8 EFLAGS: 00010246 +[ 14.779346] RAX: 0000000000000000 RBX: ffff9a8f9447d400 RCX: 0000000000000000 +[ 14.780643] RDX: 0000000000000000 RSI: 000000000000000a RDI: 0000000000000000 +[ 14.781930] RBP: 0000000000000000 R08: 00000000000217b0 R09: ffffffffbc9c1504 +[ 14.783214] R10: fffff4a180519480 R11: ffff9a8f94523600 R12: ffff9a8f9493e240 +[ 14.784507] R13: ffff9a8f9447d738 R14: 000000000000050a R15: 0000000000000000 +[ 14.785800] FS: 00007f545b466700(0000) GS:ffff9a8f9fc00000(0000) knlGS:0000000000000000 +[ 14.787073] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 14.787792] CR2: 0000000000000000 CR3: 00000000144be000 CR4: 00000000000006b0 +[ 14.788689] Call Trace: +[ 14.789007] _ib_modify_qp+0x71/0x120 +[ 14.789475] modify_qp.isra.20+0x207/0x2f0 +[ 14.790010] ib_uverbs_modify_qp+0x90/0xe0 +[ 14.790532] ib_uverbs_write+0x1d2/0x3c0 +[ 14.791049] ? __handle_mm_fault+0x93c/0xe40 +[ 14.791644] __vfs_write+0x36/0x180 +[ 14.792096] ? handle_mm_fault+0xc1/0x210 +[ 14.792601] vfs_write+0xad/0x1e0 +[ 14.793018] SyS_write+0x52/0xc0 +[ 14.793422] do_syscall_64+0x75/0x180 +[ 14.793888] entry_SYSCALL_64_after_hwframe+0x21/0x86 +[ 14.794527] RIP: 0033:0x7f545ad76099 +[ 14.794975] RSP: 002b:00007ffd78787468 EFLAGS: 00000287 ORIG_RAX: 0000000000000001 +[ 14.795958] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f545ad76099 +[ 14.797075] RDX: 0000000000000078 RSI: 0000000020009000 RDI: 0000000000000003 +[ 14.798140] RBP: 00007ffd78787470 R08: 00007ffd78787480 R09: 00007ffd78787480 +[ 14.799207] R10: 00007ffd78787480 R11: 0000000000000287 R12: 00005599ada98760 +[ 14.800277] R13: 00007ffd78787560 R14: 0000000000000000 R15: 0000000000000000 +[ 14.801341] Code: 4c 8b 1c 24 48 8b 83 70 02 00 00 48 c7 83 cc 02 00 +00 00 00 00 00 48 c7 83 24 03 00 00 00 00 00 00 c7 83 2c 03 00 00 00 00 +00 00 00 00 00 00 00 48 8b 83 70 02 00 00 c7 40 04 00 00 00 00 4c +[ 14.804012] RIP: mlx5_ib_modify_qp+0xf60/0x13f0 RSP: ffffbf48001c7bd8 +[ 14.804838] CR2: 0000000000000000 +[ 14.805288] ---[ end trace 3f1da0df5c8b7c37 ]--- + +Cc: syzkaller +Reported-by: Maor Gottlieb +Signed-off-by: Leon Romanovsky +Signed-off-by: Doug Ledford +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/infiniband/hw/mlx5/qp.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/infiniband/hw/mlx5/qp.c ++++ b/drivers/infiniband/hw/mlx5/qp.c +@@ -3157,7 +3157,8 @@ static int __mlx5_ib_modify_qp(struct ib + * If we moved a kernel QP to RESET, clean up all old CQ + * entries and reinitialize the QP. + */ +- if (new_state == IB_QPS_RESET && !ibqp->uobject) { ++ if (new_state == IB_QPS_RESET && ++ !ibqp->uobject && ibqp->qp_type != IB_QPT_XRC_TGT) { + mlx5_ib_cq_clean(recv_cq, base->mqp.qpn, + ibqp->srq ? to_msrq(ibqp->srq) : NULL); + if (send_cq != recv_cq) diff --git a/queue-4.16/revert-kvm-x86-fix-smram-accessing-even-if-vm-is-shutdown.patch b/queue-4.16/revert-kvm-x86-fix-smram-accessing-even-if-vm-is-shutdown.patch new file mode 100644 index 00000000000..e37a744eda6 --- /dev/null +++ b/queue-4.16/revert-kvm-x86-fix-smram-accessing-even-if-vm-is-shutdown.patch @@ -0,0 +1,72 @@ +From 2c151b25441ae5c2da66472abd165af785c9ecd2 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Thu, 29 Mar 2018 14:48:30 -0700 +Subject: Revert "KVM: X86: Fix SMRAM accessing even if VM is shutdown" + +From: Sean Christopherson + +commit 2c151b25441ae5c2da66472abd165af785c9ecd2 upstream. + +The bug that led to commit 95e057e25892eaa48cad1e2d637b80d0f1a4fac5 +was a benign warning (no adverse affects other than the warning +itself) that was detected by syzkaller. Further inspection shows +that the WARN_ON in question, in handle_ept_misconfig(), is +unnecessary and flawed (this was also briefly discussed in the +original patch: https://patchwork.kernel.org/patch/10204649). + + * The WARN_ON is unnecessary as kvm_mmu_page_fault() will WARN + if reserved bits are set in the SPTEs, i.e. it covers the case + where an EPT misconfig occurred because of a KVM bug. + + * The WARN_ON is flawed because it will fire on any system error + code that is hit while handling the fault, e.g. -ENOMEM can be + returned by mmu_topup_memory_caches() while handling a legitmate + MMIO EPT misconfig. + +The original behavior of returning -EFAULT when userspace munmaps +an HVA without first removing the memslot is correct and desirable, +i.e. KVM is letting userspace know it has generated a bad address. +Returning RET_PF_EMULATE masks the WARN_ON in the EPT misconfig path, +but does not fix the underlying bug, i.e. the WARN_ON is bogus. + +Furthermore, returning RET_PF_EMULATE has the unwanted side effect of +causing KVM to attempt to emulate an instruction on any page fault +with an invalid HVA translation, e.g. a not-present EPT violation +on a VM_PFNMAP VMA whose fault handler failed to insert a PFN. + + * There is no guarantee that the fault is directly related to the + instruction, i.e. the fault could have been triggered by a side + effect memory access in the guest, e.g. while vectoring a #DB or + writing a tracing record. This could cause KVM to effectively + mask the fault if KVM doesn't model the behavior leading to the + fault, i.e. emulation could succeed and resume the guest. + + * If emulation does fail, KVM will return EMULATION_FAILED instead + of -EFAULT, which is a red herring as the user will either debug + a bogus emulation attempt or scratch their head wondering why we + were attempting emulation in the first place. + +TL;DR: revert to returning -EFAULT and remove the bogus WARN_ON in +handle_ept_misconfig in a future patch. + +This reverts commit 95e057e25892eaa48cad1e2d637b80d0f1a4fac5. + +Signed-off-by: Sean Christopherson +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/mmu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -3031,7 +3031,7 @@ static int kvm_handle_bad_page(struct kv + return RET_PF_RETRY; + } + +- return RET_PF_EMULATE; ++ return -EFAULT; + } + + static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, diff --git a/queue-4.16/series b/queue-4.16/series index becfb72d4d2..0af23708fd0 100644 --- a/queue-4.16/series +++ b/queue-4.16/series @@ -13,3 +13,11 @@ drm-i915-bios-filter-out-invalid-ddc-pins-from-vbt-child-devices.patch drm-i915-audio-fix-audio-detection-issue-on-glk.patch drm-i915-do-no-use-kfree-to-free-a-kmem_cache_alloc-return-value.patch drm-i915-fix-lspcon-tmds-output-buffer-enabling-from-low-power-state.patch +alarmtimer-init-nanosleep-alarm-timer-on-stack.patch +mac80211_hwsim-fix-use-after-free-bug-in-hwsim_exit_net.patch +mm-vmscan-allow-preallocating-memory-for-register_shrinker.patch +netfilter-x_tables-limit-allocation-requests-for-blob-rule-heads.patch +perf-fix-sample_max_stack-maximum-check.patch +perf-return-proper-values-for-user-stack-errors.patch +rdma-mlx5-fix-null-dereference-while-accessing-xrc_tgt-qps.patch +revert-kvm-x86-fix-smram-accessing-even-if-vm-is-shutdown.patch -- 2.47.3