--- /dev/null
+From 28a282616f56990547b9dcd5c6fbd2001344664c Mon Sep 17 00:00:00 2001
+From: Jeff Layton <jlayton@kernel.org>
+Date: Thu, 15 Aug 2019 06:23:38 -0400
+Subject: ceph: don't try fill file_lock on unsuccessful GETFILELOCK reply
+
+From: Jeff Layton <jlayton@kernel.org>
+
+commit 28a282616f56990547b9dcd5c6fbd2001344664c upstream.
+
+When ceph_mdsc_do_request returns an error, we can't assume that the
+filelock_reply pointer will be set. Only try to fetch fields out of
+the r_reply_info when it returns success.
+
+Cc: stable@vger.kernel.org
+Reported-by: Hector Martin <hector@marcansoft.com>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: "Yan, Zheng" <zyan@redhat.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ceph/locks.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/fs/ceph/locks.c
++++ b/fs/ceph/locks.c
+@@ -78,8 +78,7 @@ static int ceph_lock_message(u8 lock_typ
+ req->r_wait_for_completion = ceph_lock_wait_for_completion;
+
+ err = ceph_mdsc_do_request(mdsc, inode, req);
+-
+- if (operation == CEPH_MDS_OP_GETFILELOCK) {
++ if (!err && operation == CEPH_MDS_OP_GETFILELOCK) {
+ fl->fl_pid = -le64_to_cpu(req->r_reply_info.filelock_reply->pid);
+ if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type)
+ fl->fl_type = F_RDLCK;
--- /dev/null
+From e4f9d6013820d1eba1432d51dd1c5795759aa77f Mon Sep 17 00:00:00 2001
+From: ZhangXiaoxu <zhangxiaoxu5@huawei.com>
+Date: Sat, 17 Aug 2019 13:32:40 +0800
+Subject: dm btree: fix order of block initialization in btree_split_beneath
+
+From: ZhangXiaoxu <zhangxiaoxu5@huawei.com>
+
+commit e4f9d6013820d1eba1432d51dd1c5795759aa77f upstream.
+
+When btree_split_beneath() splits a node to two new children, it will
+allocate two blocks: left and right. If right block's allocation
+failed, the left block will be unlocked and marked dirty. If this
+happened, the left block'ss content is zero, because it wasn't
+initialized with the btree struct before the attempot to allocate the
+right block. Upon return, when flushing the left block to disk, the
+validator will fail when check this block. Then a BUG_ON is raised.
+
+Fix this by completely initializing the left block before allocating and
+initializing the right block.
+
+Fixes: 4dcb8b57df359 ("dm btree: fix leak of bufio-backed block in btree_split_beneath error path")
+Cc: stable@vger.kernel.org
+Signed-off-by: ZhangXiaoxu <zhangxiaoxu5@huawei.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/persistent-data/dm-btree.c | 31 ++++++++++++++++---------------
+ 1 file changed, 16 insertions(+), 15 deletions(-)
+
+--- a/drivers/md/persistent-data/dm-btree.c
++++ b/drivers/md/persistent-data/dm-btree.c
+@@ -628,39 +628,40 @@ static int btree_split_beneath(struct sh
+
+ new_parent = shadow_current(s);
+
++ pn = dm_block_data(new_parent);
++ size = le32_to_cpu(pn->header.flags) & INTERNAL_NODE ?
++ sizeof(__le64) : s->info->value_type.size;
++
++ /* create & init the left block */
+ r = new_block(s->info, &left);
+ if (r < 0)
+ return r;
+
++ ln = dm_block_data(left);
++ nr_left = le32_to_cpu(pn->header.nr_entries) / 2;
++
++ ln->header.flags = pn->header.flags;
++ ln->header.nr_entries = cpu_to_le32(nr_left);
++ ln->header.max_entries = pn->header.max_entries;
++ ln->header.value_size = pn->header.value_size;
++ memcpy(ln->keys, pn->keys, nr_left * sizeof(pn->keys[0]));
++ memcpy(value_ptr(ln, 0), value_ptr(pn, 0), nr_left * size);
++
++ /* create & init the right block */
+ r = new_block(s->info, &right);
+ if (r < 0) {
+ unlock_block(s->info, left);
+ return r;
+ }
+
+- pn = dm_block_data(new_parent);
+- ln = dm_block_data(left);
+ rn = dm_block_data(right);
+-
+- nr_left = le32_to_cpu(pn->header.nr_entries) / 2;
+ nr_right = le32_to_cpu(pn->header.nr_entries) - nr_left;
+
+- ln->header.flags = pn->header.flags;
+- ln->header.nr_entries = cpu_to_le32(nr_left);
+- ln->header.max_entries = pn->header.max_entries;
+- ln->header.value_size = pn->header.value_size;
+-
+ rn->header.flags = pn->header.flags;
+ rn->header.nr_entries = cpu_to_le32(nr_right);
+ rn->header.max_entries = pn->header.max_entries;
+ rn->header.value_size = pn->header.value_size;
+-
+- memcpy(ln->keys, pn->keys, nr_left * sizeof(pn->keys[0]));
+ memcpy(rn->keys, pn->keys + nr_left, nr_right * sizeof(pn->keys[0]));
+-
+- size = le32_to_cpu(pn->header.flags) & INTERNAL_NODE ?
+- sizeof(__le64) : s->info->value_type.size;
+- memcpy(value_ptr(ln, 0), value_ptr(pn, 0), nr_left * size);
+ memcpy(value_ptr(rn, 0), value_ptr(pn, nr_left),
+ nr_right * size);
+
--- /dev/null
+From d1fef41465f0e8cae0693fb184caa6bfafb6cd16 Mon Sep 17 00:00:00 2001
+From: Dmitry Fomichev <dmitry.fomichev@wdc.com>
+Date: Mon, 5 Aug 2019 16:56:03 -0700
+Subject: dm kcopyd: always complete failed jobs
+
+From: Dmitry Fomichev <dmitry.fomichev@wdc.com>
+
+commit d1fef41465f0e8cae0693fb184caa6bfafb6cd16 upstream.
+
+This patch fixes a problem in dm-kcopyd that may leave jobs in
+complete queue indefinitely in the event of backing storage failure.
+
+This behavior has been observed while running 100% write file fio
+workload against an XFS volume created on top of a dm-zoned target
+device. If the underlying storage of dm-zoned goes to offline state
+under I/O, kcopyd sometimes never issues the end copy callback and
+dm-zoned reclaim work hangs indefinitely waiting for that completion.
+
+This behavior was traced down to the error handling code in
+process_jobs() function that places the failed job to complete_jobs
+queue, but doesn't wake up the job handler. In case of backing device
+failure, all outstanding jobs may end up going to complete_jobs queue
+via this code path and then stay there forever because there are no
+more successful I/O jobs to wake up the job handler.
+
+This patch adds a wake() call to always wake up kcopyd job wait queue
+for all I/O jobs that fail before dm_io() gets called for that job.
+
+The patch also sets the write error status in all sub jobs that are
+failed because their master job has failed.
+
+Fixes: b73c67c2cbb00 ("dm kcopyd: add sequential write feature")
+Cc: stable@vger.kernel.org
+Signed-off-by: Dmitry Fomichev <dmitry.fomichev@wdc.com>
+Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/dm-kcopyd.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/drivers/md/dm-kcopyd.c
++++ b/drivers/md/dm-kcopyd.c
+@@ -545,8 +545,10 @@ static int run_io_job(struct kcopyd_job
+ * no point in continuing.
+ */
+ if (test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags) &&
+- job->master_job->write_err)
++ job->master_job->write_err) {
++ job->write_err = job->master_job->write_err;
+ return -EIO;
++ }
+
+ io_job_start(job->kc->throttle);
+
+@@ -598,6 +600,7 @@ static int process_jobs(struct list_head
+ else
+ job->read_err = 1;
+ push(&kc->complete_jobs, job);
++ wake(kc);
+ break;
+ }
+
--- /dev/null
+From ae148243d3f0816b37477106c05a2ec7d5f32614 Mon Sep 17 00:00:00 2001
+From: ZhangXiaoxu <zhangxiaoxu5@huawei.com>
+Date: Mon, 19 Aug 2019 11:31:21 +0800
+Subject: dm space map metadata: fix missing store of apply_bops() return value
+
+From: ZhangXiaoxu <zhangxiaoxu5@huawei.com>
+
+commit ae148243d3f0816b37477106c05a2ec7d5f32614 upstream.
+
+In commit 6096d91af0b6 ("dm space map metadata: fix occasional leak
+of a metadata block on resize"), we refactor the commit logic to a new
+function 'apply_bops'. But when that logic was replaced in out() the
+return value was not stored. This may lead out() returning a wrong
+value to the caller.
+
+Fixes: 6096d91af0b6 ("dm space map metadata: fix occasional leak of a metadata block on resize")
+Cc: stable@vger.kernel.org
+Signed-off-by: ZhangXiaoxu <zhangxiaoxu5@huawei.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/persistent-data/dm-space-map-metadata.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/md/persistent-data/dm-space-map-metadata.c
++++ b/drivers/md/persistent-data/dm-space-map-metadata.c
+@@ -248,7 +248,7 @@ static int out(struct sm_metadata *smm)
+ }
+
+ if (smm->recursion_count == 1)
+- apply_bops(smm);
++ r = apply_bops(smm);
+
+ smm->recursion_count--;
+
--- /dev/null
+From 1cfd5d3399e87167b7f9157ef99daa0e959f395d Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Fri, 23 Aug 2019 09:54:09 -0400
+Subject: dm table: fix invalid memory accesses with too high sector number
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit 1cfd5d3399e87167b7f9157ef99daa0e959f395d upstream.
+
+If the sector number is too high, dm_table_find_target() should return a
+pointer to a zeroed dm_target structure (the caller should test it with
+dm_target_is_valid).
+
+However, for some table sizes, the code in dm_table_find_target() that
+performs btree lookup will access out of bound memory structures.
+
+Fix this bug by testing the sector number at the beginning of
+dm_table_find_target(). Also, add an "inline" keyword to the function
+dm_table_get_size() because this is a hot path.
+
+Fixes: 512875bd9661 ("dm: table detect io beyond device")
+Cc: stable@vger.kernel.org
+Reported-by: Zhang Tao <kontais@zoho.com>
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/dm-table.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/drivers/md/dm-table.c
++++ b/drivers/md/dm-table.c
+@@ -1308,7 +1308,7 @@ void dm_table_event(struct dm_table *t)
+ }
+ EXPORT_SYMBOL(dm_table_event);
+
+-sector_t dm_table_get_size(struct dm_table *t)
++inline sector_t dm_table_get_size(struct dm_table *t)
+ {
+ return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0;
+ }
+@@ -1333,6 +1333,9 @@ struct dm_target *dm_table_find_target(s
+ unsigned int l, n = 0, k = 0;
+ sector_t *node;
+
++ if (unlikely(sector >= dm_table_get_size(t)))
++ return &t->targets[t->num_targets];
++
+ for (l = 0; l < t->depth; l++) {
+ n = get_child(n, k);
+ node = get_node(t, l, n);
--- /dev/null
+From c358ebf59634f06d8ed176da651ec150df3c8686 Mon Sep 17 00:00:00 2001
+From: Lyude Paul <lyude@redhat.com>
+Date: Thu, 25 Jul 2019 15:40:01 -0400
+Subject: drm/nouveau: Don't retry infinitely when receiving no data on i2c over AUX
+
+From: Lyude Paul <lyude@redhat.com>
+
+commit c358ebf59634f06d8ed176da651ec150df3c8686 upstream.
+
+While I had thought I had fixed this issue in:
+
+commit 342406e4fbba ("drm/nouveau/i2c: Disable i2c bus access after
+->fini()")
+
+It turns out that while I did fix the error messages I was seeing on my
+P50 when trying to access i2c busses with the GPU in runtime suspend, I
+accidentally had missed one important detail that was mentioned on the
+bug report this commit was supposed to fix: that the CPU would only lock
+up when trying to access i2c busses _on connected devices_ _while the
+GPU is not in runtime suspend_. Whoops. That definitely explains why I
+was not able to get my machine to hang with i2c bus interactions until
+now, as plugging my P50 into it's dock with an HDMI monitor connected
+allowed me to finally reproduce this locally.
+
+Now that I have managed to reproduce this issue properly, it looks like
+the problem is much simpler then it looks. It turns out that some
+connected devices, such as MST laptop docks, will actually ACK i2c reads
+even if no data was actually read:
+
+[ 275.063043] nouveau 0000:01:00.0: i2c: aux 000a: 1: 0000004c 1
+[ 275.063447] nouveau 0000:01:00.0: i2c: aux 000a: 00 01101000 10040000
+[ 275.063759] nouveau 0000:01:00.0: i2c: aux 000a: rd 00000001
+[ 275.064024] nouveau 0000:01:00.0: i2c: aux 000a: rd 00000000
+[ 275.064285] nouveau 0000:01:00.0: i2c: aux 000a: rd 00000000
+[ 275.064594] nouveau 0000:01:00.0: i2c: aux 000a: rd 00000000
+
+Because we don't handle the situation of i2c ack without any data, we
+end up entering an infinite loop in nvkm_i2c_aux_i2c_xfer() since the
+value of cnt always remains at 0. This finally properly explains how
+this could result in a CPU hang like the ones observed in the
+aforementioned commit.
+
+So, fix this by retrying transactions if no data is written or received,
+and give up and fail the transaction if we continue to not write or
+receive any data after 32 retries.
+
+Signed-off-by: Lyude Paul <lyude@redhat.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c | 24 +++++++++++++++++-------
+ 1 file changed, 17 insertions(+), 7 deletions(-)
+
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c
+@@ -40,8 +40,7 @@ nvkm_i2c_aux_i2c_xfer(struct i2c_adapter
+ u8 *ptr = msg->buf;
+
+ while (remaining) {
+- u8 cnt = (remaining > 16) ? 16 : remaining;
+- u8 cmd;
++ u8 cnt, retries, cmd;
+
+ if (msg->flags & I2C_M_RD)
+ cmd = 1;
+@@ -51,10 +50,19 @@ nvkm_i2c_aux_i2c_xfer(struct i2c_adapter
+ if (mcnt || remaining > 16)
+ cmd |= 4; /* MOT */
+
+- ret = aux->func->xfer(aux, true, cmd, msg->addr, ptr, &cnt);
+- if (ret < 0) {
+- nvkm_i2c_aux_release(aux);
+- return ret;
++ for (retries = 0, cnt = 0;
++ retries < 32 && !cnt;
++ retries++) {
++ cnt = min_t(u8, remaining, 16);
++ ret = aux->func->xfer(aux, true, cmd,
++ msg->addr, ptr, &cnt);
++ if (ret < 0)
++ goto out;
++ }
++ if (!cnt) {
++ AUX_TRACE(aux, "no data after 32 retries");
++ ret = -EIO;
++ goto out;
+ }
+
+ ptr += cnt;
+@@ -64,8 +72,10 @@ nvkm_i2c_aux_i2c_xfer(struct i2c_adapter
+ msg++;
+ }
+
++ ret = num;
++out:
+ nvkm_i2c_aux_release(aux);
+- return num;
++ return ret;
+ }
+
+ static u32
--- /dev/null
+From 2c60e6b5c9241b24b8b523fefd3e44fb85622cda Mon Sep 17 00:00:00 2001
+From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
+Date: Tue, 6 Aug 2019 13:41:51 +0200
+Subject: gpiolib: never report open-drain/source lines as 'input' to user-space
+
+From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
+
+commit 2c60e6b5c9241b24b8b523fefd3e44fb85622cda upstream.
+
+If the driver doesn't support open-drain/source config options, we
+emulate this behavior when setting the direction by calling
+gpiod_direction_input() if the default value is 0 (open-source) or
+1 (open-drain), thus not actively driving the line in those cases.
+
+This however clears the FLAG_IS_OUT bit for the GPIO line descriptor
+and makes the LINEINFO ioctl() incorrectly report this line's mode as
+'input' to user-space.
+
+This commit modifies the ioctl() to always set the GPIOLINE_FLAG_IS_OUT
+bit in the lineinfo structure's flags field. Since it's impossible to
+use the input mode and open-drain/source options at the same time, we
+can be sure the reported information will be correct.
+
+Fixes: 521a2ad6f862 ("gpio: add userspace ABI for GPIO line information")
+Cc: stable <stable@vger.kernel.org>
+Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
+Link: https://lore.kernel.org/r/20190806114151.17652-1-brgl@bgdev.pl
+Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpio/gpiolib.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpio/gpiolib.c
++++ b/drivers/gpio/gpiolib.c
+@@ -971,9 +971,11 @@ static long gpio_ioctl(struct file *filp
+ if (test_bit(FLAG_ACTIVE_LOW, &desc->flags))
+ lineinfo.flags |= GPIOLINE_FLAG_ACTIVE_LOW;
+ if (test_bit(FLAG_OPEN_DRAIN, &desc->flags))
+- lineinfo.flags |= GPIOLINE_FLAG_OPEN_DRAIN;
++ lineinfo.flags |= (GPIOLINE_FLAG_OPEN_DRAIN |
++ GPIOLINE_FLAG_IS_OUT);
+ if (test_bit(FLAG_OPEN_SOURCE, &desc->flags))
+- lineinfo.flags |= GPIOLINE_FLAG_OPEN_SOURCE;
++ lineinfo.flags |= (GPIOLINE_FLAG_OPEN_SOURCE |
++ GPIOLINE_FLAG_IS_OUT);
+
+ if (copy_to_user(ip, &lineinfo, sizeof(lineinfo)))
+ return -EFAULT;
--- /dev/null
+From b72fb1dcd2ea9d29417711cb302cef3006fa8d5a Mon Sep 17 00:00:00 2001
+From: Jason Gerecke <jason.gerecke@wacom.com>
+Date: Wed, 7 Aug 2019 14:11:55 -0700
+Subject: HID: wacom: Correct distance scale for 2nd-gen Intuos devices
+
+From: Jason Gerecke <jason.gerecke@wacom.com>
+
+commit b72fb1dcd2ea9d29417711cb302cef3006fa8d5a upstream.
+
+Distance values reported by 2nd-gen Intuos tablets are on an inverted
+scale (0 == far, 63 == near). We need to change them over to a normal
+scale before reporting to userspace or else userspace drivers and
+applications can get confused.
+
+Ref: https://github.com/linuxwacom/input-wacom/issues/98
+Fixes: eda01dab53 ("HID: wacom: Add four new Intuos devices")
+Signed-off-by: Jason Gerecke <jason.gerecke@wacom.com>
+Cc: <stable@vger.kernel.org> # v4.4+
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/hid/wacom_wac.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/hid/wacom_wac.c
++++ b/drivers/hid/wacom_wac.c
+@@ -848,6 +848,8 @@ static int wacom_intuos_general(struct w
+ y >>= 1;
+ distance >>= 1;
+ }
++ if (features->type == INTUOSHT2)
++ distance = features->distance_max - distance;
+ input_report_abs(input, ABS_X, x);
+ input_report_abs(input, ABS_Y, y);
+ input_report_abs(input, ABS_DISTANCE, distance);
--- /dev/null
+From fcf887e7caaa813eea821d11bf2b7619a37df37a Mon Sep 17 00:00:00 2001
+From: Aaron Armstrong Skomra <skomra@gmail.com>
+Date: Fri, 16 Aug 2019 12:00:54 -0700
+Subject: HID: wacom: correct misreported EKR ring values
+
+From: Aaron Armstrong Skomra <skomra@gmail.com>
+
+commit fcf887e7caaa813eea821d11bf2b7619a37df37a upstream.
+
+The EKR ring claims a range of 0 to 71 but actually reports
+values 1 to 72. The ring is used in relative mode so this
+change should not affect users.
+
+Signed-off-by: Aaron Armstrong Skomra <aaron.skomra@wacom.com>
+Fixes: 72b236d60218f ("HID: wacom: Add support for Express Key Remote.")
+Cc: <stable@vger.kernel.org> # v4.3+
+Reviewed-by: Ping Cheng <ping.cheng@wacom.com>
+Reviewed-by: Jason Gerecke <jason.gerecke@wacom.com>
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/hid/wacom_wac.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/hid/wacom_wac.c
++++ b/drivers/hid/wacom_wac.c
+@@ -1061,7 +1061,7 @@ static int wacom_remote_irq(struct wacom
+ input_report_key(input, BTN_BASE2, (data[11] & 0x02));
+
+ if (data[12] & 0x80)
+- input_report_abs(input, ABS_WHEEL, (data[12] & 0x7f));
++ input_report_abs(input, ABS_WHEEL, (data[12] & 0x7f) - 1);
+ else
+ input_report_abs(input, ABS_WHEEL, 0);
+
--- /dev/null
+From a561372405cf6bc6f14239b3a9e57bb39f2788b0 Mon Sep 17 00:00:00 2001
+From: Ilya Dryomov <idryomov@gmail.com>
+Date: Tue, 20 Aug 2019 16:40:33 +0200
+Subject: libceph: fix PG split vs OSD (re)connect race
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit a561372405cf6bc6f14239b3a9e57bb39f2788b0 upstream.
+
+We can't rely on ->peer_features in calc_target() because it may be
+called both when the OSD session is established and open and when it's
+not. ->peer_features is not valid unless the OSD session is open. If
+this happens on a PG split (pg_num increase), that could mean we don't
+resend a request that should have been resent, hanging the client
+indefinitely.
+
+In userspace this was fixed by looking at require_osd_release and
+get_xinfo[osd].features fields of the osdmap. However these fields
+belong to the OSD section of the osdmap, which the kernel doesn't
+decode (only the client section is decoded).
+
+Instead, let's drop this feature check. It effectively checks for
+luminous, so only pre-luminous OSDs would be affected in that on a PG
+split the kernel might resend a request that should not have been
+resent. Duplicates can occur in other scenarios, so both sides should
+already be prepared for them: see dup/replay logic on the OSD side and
+retry_attempt check on the client side.
+
+Cc: stable@vger.kernel.org
+Fixes: 7de030d6b10a ("libceph: resend on PG splits if OSD has RESEND_ON_SPLIT")
+Link: https://tracker.ceph.com/issues/41162
+Reported-by: Jerry Lee <leisurelysw24@gmail.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Tested-by: Jerry Lee <leisurelysw24@gmail.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/ceph/osd_client.c | 9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -1330,7 +1330,7 @@ static enum calc_target_result calc_targ
+ struct ceph_osds up, acting;
+ bool force_resend = false;
+ bool unpaused = false;
+- bool legacy_change;
++ bool legacy_change = false;
+ bool split = false;
+ bool sort_bitwise = ceph_osdmap_flag(osdc, CEPH_OSDMAP_SORTBITWISE);
+ bool recovery_deletes = ceph_osdmap_flag(osdc,
+@@ -1426,15 +1426,14 @@ static enum calc_target_result calc_targ
+ t->osd = acting.primary;
+ }
+
+- if (unpaused || legacy_change || force_resend ||
+- (split && con && CEPH_HAVE_FEATURE(con->peer_features,
+- RESEND_ON_SPLIT)))
++ if (unpaused || legacy_change || force_resend || split)
+ ct_res = CALC_TARGET_NEED_RESEND;
+ else
+ ct_res = CALC_TARGET_NO_ACTION;
+
+ out:
+- dout("%s t %p -> ct_res %d osd %d\n", __func__, t, ct_res, t->osd);
++ dout("%s t %p -> %d%d%d%d ct_res %d osd%d\n", __func__, t, unpaused,
++ legacy_change, force_resend, split, ct_res, t->osd);
+ return ct_res;
+ }
+
--- /dev/null
+From cf3591ef832915892f2499b7e54b51d4c578b28c Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Thu, 8 Aug 2019 05:40:04 -0400
+Subject: Revert "dm bufio: fix deadlock with loop device"
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit cf3591ef832915892f2499b7e54b51d4c578b28c upstream.
+
+Revert the commit bd293d071ffe65e645b4d8104f9d8fe15ea13862. The proper
+fix has been made available with commit d0a255e795ab ("loop: set
+PF_MEMALLOC_NOIO for the worker thread").
+
+Note that the fix offered by commit bd293d071ffe doesn't really prevent
+the deadlock from occuring - if we look at the stacktrace reported by
+Junxiao Bi, we see that it hangs in bit_wait_io and not on the mutex -
+i.e. it has already successfully taken the mutex. Changing the mutex
+from mutex_lock to mutex_trylock won't help with deadlocks that happen
+afterwards.
+
+PID: 474 TASK: ffff8813e11f4600 CPU: 10 COMMAND: "kswapd0"
+ #0 [ffff8813dedfb938] __schedule at ffffffff8173f405
+ #1 [ffff8813dedfb990] schedule at ffffffff8173fa27
+ #2 [ffff8813dedfb9b0] schedule_timeout at ffffffff81742fec
+ #3 [ffff8813dedfba60] io_schedule_timeout at ffffffff8173f186
+ #4 [ffff8813dedfbaa0] bit_wait_io at ffffffff8174034f
+ #5 [ffff8813dedfbac0] __wait_on_bit at ffffffff8173fec8
+ #6 [ffff8813dedfbb10] out_of_line_wait_on_bit at ffffffff8173ff81
+ #7 [ffff8813dedfbb90] __make_buffer_clean at ffffffffa038736f [dm_bufio]
+ #8 [ffff8813dedfbbb0] __try_evict_buffer at ffffffffa0387bb8 [dm_bufio]
+ #9 [ffff8813dedfbbd0] dm_bufio_shrink_scan at ffffffffa0387cc3 [dm_bufio]
+ #10 [ffff8813dedfbc40] shrink_slab at ffffffff811a87ce
+ #11 [ffff8813dedfbd30] shrink_zone at ffffffff811ad778
+ #12 [ffff8813dedfbdc0] kswapd at ffffffff811ae92f
+ #13 [ffff8813dedfbec0] kthread at ffffffff810a8428
+ #14 [ffff8813dedfbf50] ret_from_fork at ffffffff81745242
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Cc: stable@vger.kernel.org
+Fixes: bd293d071ffe ("dm bufio: fix deadlock with loop device")
+Depends-on: d0a255e795ab ("loop: set PF_MEMALLOC_NOIO for the worker thread")
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/dm-bufio.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/md/dm-bufio.c
++++ b/drivers/md/dm-bufio.c
+@@ -1630,7 +1630,9 @@ dm_bufio_shrink_scan(struct shrinker *sh
+ unsigned long freed;
+
+ c = container_of(shrink, struct dm_bufio_client, shrinker);
+- if (!dm_bufio_trylock(c))
++ if (sc->gfp_mask & __GFP_FS)
++ dm_bufio_lock(c);
++ else if (!dm_bufio_trylock(c))
+ return SHRINK_STOP;
+
+ freed = __scan(c, sc->nr_to_scan, sc->gfp_mask);
perf-cpumap-fix-writing-to-illegal-memory-in-handlin.patch
perf-pmu-events-fix-missing-cpu_clk_unhalted.core-ev.patch
selftests-kvm-adding-config-fragments.patch
+hid-wacom-correct-misreported-ekr-ring-values.patch
+hid-wacom-correct-distance-scale-for-2nd-gen-intuos-devices.patch
+revert-dm-bufio-fix-deadlock-with-loop-device.patch
+ceph-don-t-try-fill-file_lock-on-unsuccessful-getfilelock-reply.patch
+libceph-fix-pg-split-vs-osd-re-connect-race.patch
+drm-nouveau-don-t-retry-infinitely-when-receiving-no-data-on-i2c-over-aux.patch
+gpiolib-never-report-open-drain-source-lines-as-input-to-user-space.patch
+userfaultfd_release-always-remove-uffd-flags-and-clear-vm_userfaultfd_ctx.patch
+x86-retpoline-don-t-clobber-rflags-during-call_nospec-on-i386.patch
+x86-apic-handle-missing-global-clockevent-gracefully.patch
+x86-cpu-amd-clear-rdrand-cpuid-bit-on-amd-family-15h-16h.patch
+x86-boot-save-fields-explicitly-zero-out-everything-else.patch
+x86-boot-fix-boot-regression-caused-by-bootparam-sanitizing.patch
+dm-kcopyd-always-complete-failed-jobs.patch
+dm-btree-fix-order-of-block-initialization-in-btree_split_beneath.patch
+dm-space-map-metadata-fix-missing-store-of-apply_bops-return-value.patch
+dm-table-fix-invalid-memory-accesses-with-too-high-sector-number.patch
--- /dev/null
+From 46d0b24c5ee10a15dfb25e20642f5a5ed59c5003 Mon Sep 17 00:00:00 2001
+From: Oleg Nesterov <oleg@redhat.com>
+Date: Sat, 24 Aug 2019 17:54:56 -0700
+Subject: userfaultfd_release: always remove uffd flags and clear vm_userfaultfd_ctx
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+commit 46d0b24c5ee10a15dfb25e20642f5a5ed59c5003 upstream.
+
+userfaultfd_release() should clear vm_flags/vm_userfaultfd_ctx even if
+mm->core_state != NULL.
+
+Otherwise a page fault can see userfaultfd_missing() == T and use an
+already freed userfaultfd_ctx.
+
+Link: http://lkml.kernel.org/r/20190820160237.GB4983@redhat.com
+Fixes: 04f5866e41fb ("coredump: fix race condition between mmget_not_zero()/get_task_mm() and core dumping")
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Reported-by: Kefeng Wang <wangkefeng.wang@huawei.com>
+Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
+Tested-by: Kefeng Wang <wangkefeng.wang@huawei.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Mike Rapoport <rppt@linux.ibm.com>
+Cc: Jann Horn <jannh@google.com>
+Cc: Jason Gunthorpe <jgg@mellanox.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/userfaultfd.c | 25 +++++++++++++------------
+ 1 file changed, 13 insertions(+), 12 deletions(-)
+
+--- a/fs/userfaultfd.c
++++ b/fs/userfaultfd.c
+@@ -854,6 +854,7 @@ static int userfaultfd_release(struct in
+ /* len == 0 means wake all */
+ struct userfaultfd_wake_range range = { .len = 0, };
+ unsigned long new_flags;
++ bool still_valid;
+
+ ACCESS_ONCE(ctx->released) = true;
+
+@@ -869,8 +870,7 @@ static int userfaultfd_release(struct in
+ * taking the mmap_sem for writing.
+ */
+ down_write(&mm->mmap_sem);
+- if (!mmget_still_valid(mm))
+- goto skip_mm;
++ still_valid = mmget_still_valid(mm);
+ prev = NULL;
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ cond_resched();
+@@ -881,19 +881,20 @@ static int userfaultfd_release(struct in
+ continue;
+ }
+ new_flags = vma->vm_flags & ~(VM_UFFD_MISSING | VM_UFFD_WP);
+- prev = vma_merge(mm, prev, vma->vm_start, vma->vm_end,
+- new_flags, vma->anon_vma,
+- vma->vm_file, vma->vm_pgoff,
+- vma_policy(vma),
+- NULL_VM_UFFD_CTX);
+- if (prev)
+- vma = prev;
+- else
+- prev = vma;
++ if (still_valid) {
++ prev = vma_merge(mm, prev, vma->vm_start, vma->vm_end,
++ new_flags, vma->anon_vma,
++ vma->vm_file, vma->vm_pgoff,
++ vma_policy(vma),
++ NULL_VM_UFFD_CTX);
++ if (prev)
++ vma = prev;
++ else
++ prev = vma;
++ }
+ vma->vm_flags = new_flags;
+ vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
+ }
+-skip_mm:
+ up_write(&mm->mmap_sem);
+ mmput(mm);
+ wakeup:
--- /dev/null
+From f897e60a12f0b9146357780d317879bce2a877dc Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 9 Aug 2019 14:54:07 +0200
+Subject: x86/apic: Handle missing global clockevent gracefully
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit f897e60a12f0b9146357780d317879bce2a877dc upstream.
+
+Some newer machines do not advertise legacy timers. The kernel can handle
+that situation if the TSC and the CPU frequency are enumerated by CPUID or
+MSRs and the CPU supports TSC deadline timer. If the CPU does not support
+TSC deadline timer the local APIC timer frequency has to be known as well.
+
+Some Ryzens machines do not advertize legacy timers, but there is no
+reliable way to determine the bus frequency which feeds the local APIC
+timer when the machine allows overclocking of that frequency.
+
+As there is no legacy timer the local APIC timer calibration crashes due to
+a NULL pointer dereference when accessing the not installed global clock
+event device.
+
+Switch the calibration loop to a non interrupt based one, which polls
+either TSC (if frequency is known) or jiffies. The latter requires a global
+clockevent. As the machines which do not have a global clockevent installed
+have a known TSC frequency this is a non issue. For older machines where
+TSC frequency is not known, there is no known case where the legacy timers
+do not exist as that would have been reported long ago.
+
+Reported-by: Daniel Drake <drake@endlessm.com>
+Reported-by: Jiri Slaby <jslaby@suse.cz>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Daniel Drake <drake@endlessm.com>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1908091443030.21433@nanos.tec.linutronix.de
+Link: http://bugzilla.opensuse.org/show_bug.cgi?id=1142926#c12
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/apic/apic.c | 68 ++++++++++++++++++++++++++++++++++----------
+ 1 file changed, 53 insertions(+), 15 deletions(-)
+
+--- a/arch/x86/kernel/apic/apic.c
++++ b/arch/x86/kernel/apic/apic.c
+@@ -723,7 +723,7 @@ static __initdata unsigned long lapic_ca
+ static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;
+
+ /*
+- * Temporary interrupt handler.
++ * Temporary interrupt handler and polled calibration function.
+ */
+ static void __init lapic_cal_handler(struct clock_event_device *dev)
+ {
+@@ -807,7 +807,8 @@ calibrate_by_pmtimer(long deltapm, long
+ static int __init calibrate_APIC_clock(void)
+ {
+ struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
+- void (*real_handler)(struct clock_event_device *dev);
++ u64 tsc_perj = 0, tsc_start = 0;
++ unsigned long jif_start;
+ unsigned long deltaj;
+ long delta, deltatsc;
+ int pm_referenced = 0;
+@@ -838,29 +839,65 @@ static int __init calibrate_APIC_clock(v
+ apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
+ "calibrating APIC timer ...\n");
+
++ /*
++ * There are platforms w/o global clockevent devices. Instead of
++ * making the calibration conditional on that, use a polling based
++ * approach everywhere.
++ */
+ local_irq_disable();
+
+- /* Replace the global interrupt handler */
+- real_handler = global_clock_event->event_handler;
+- global_clock_event->event_handler = lapic_cal_handler;
+-
+ /*
+ * Setup the APIC counter to maximum. There is no way the lapic
+ * can underflow in the 100ms detection time frame
+ */
+ __setup_APIC_LVTT(0xffffffff, 0, 0);
+
+- /* Let the interrupts run */
++ /*
++ * Methods to terminate the calibration loop:
++ * 1) Global clockevent if available (jiffies)
++ * 2) TSC if available and frequency is known
++ */
++ jif_start = READ_ONCE(jiffies);
++
++ if (tsc_khz) {
++ tsc_start = rdtsc();
++ tsc_perj = div_u64((u64)tsc_khz * 1000, HZ);
++ }
++
++ /*
++ * Enable interrupts so the tick can fire, if a global
++ * clockevent device is available
++ */
+ local_irq_enable();
+
+- while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
+- cpu_relax();
++ while (lapic_cal_loops <= LAPIC_CAL_LOOPS) {
++ /* Wait for a tick to elapse */
++ while (1) {
++ if (tsc_khz) {
++ u64 tsc_now = rdtsc();
++ if ((tsc_now - tsc_start) >= tsc_perj) {
++ tsc_start += tsc_perj;
++ break;
++ }
++ } else {
++ unsigned long jif_now = READ_ONCE(jiffies);
++
++ if (time_after(jif_now, jif_start)) {
++ jif_start = jif_now;
++ break;
++ }
++ }
++ cpu_relax();
++ }
++
++ /* Invoke the calibration routine */
++ local_irq_disable();
++ lapic_cal_handler(NULL);
++ local_irq_enable();
++ }
+
+ local_irq_disable();
+
+- /* Restore the real event handler */
+- global_clock_event->event_handler = real_handler;
+-
+ /* Build delta t1-t2 as apic timer counts down */
+ delta = lapic_cal_t1 - lapic_cal_t2;
+ apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
+@@ -912,10 +949,11 @@ static int __init calibrate_APIC_clock(v
+ levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
+
+ /*
+- * PM timer calibration failed or not turned on
+- * so lets try APIC timer based calibration
++ * PM timer calibration failed or not turned on so lets try APIC
++ * timer based calibration, if a global clockevent device is
++ * available.
+ */
+- if (!pm_referenced) {
++ if (!pm_referenced && global_clock_event) {
+ apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
+
+ /*
--- /dev/null
+From 7846f58fba964af7cb8cf77d4d13c33254725211 Mon Sep 17 00:00:00 2001
+From: John Hubbard <jhubbard@nvidia.com>
+Date: Wed, 21 Aug 2019 12:25:13 -0700
+Subject: x86/boot: Fix boot regression caused by bootparam sanitizing
+
+From: John Hubbard <jhubbard@nvidia.com>
+
+commit 7846f58fba964af7cb8cf77d4d13c33254725211 upstream.
+
+commit a90118c445cc ("x86/boot: Save fields explicitly, zero out everything
+else") had two errors:
+
+ * It preserved boot_params.acpi_rsdp_addr, and
+ * It failed to preserve boot_params.hdr
+
+Therefore, zero out acpi_rsdp_addr, and preserve hdr.
+
+Fixes: a90118c445cc ("x86/boot: Save fields explicitly, zero out everything else")
+Reported-by: Neil MacLeod <neil@nmacleod.com>
+Suggested-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: John Hubbard <jhubbard@nvidia.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Neil MacLeod <neil@nmacleod.com>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20190821192513.20126-1-jhubbard@nvidia.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/bootparam_utils.h | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/include/asm/bootparam_utils.h
++++ b/arch/x86/include/asm/bootparam_utils.h
+@@ -71,6 +71,7 @@ static void sanitize_boot_params(struct
+ BOOT_PARAM_PRESERVE(eddbuf_entries),
+ BOOT_PARAM_PRESERVE(edd_mbr_sig_buf_entries),
+ BOOT_PARAM_PRESERVE(edd_mbr_sig_buffer),
++ BOOT_PARAM_PRESERVE(hdr),
+ BOOT_PARAM_PRESERVE(e820_table),
+ BOOT_PARAM_PRESERVE(eddbuf),
+ };
--- /dev/null
+From a90118c445cc7f07781de26a9684d4ec58bfcfd1 Mon Sep 17 00:00:00 2001
+From: John Hubbard <jhubbard@nvidia.com>
+Date: Tue, 30 Jul 2019 22:46:27 -0700
+Subject: x86/boot: Save fields explicitly, zero out everything else
+
+From: John Hubbard <jhubbard@nvidia.com>
+
+commit a90118c445cc7f07781de26a9684d4ec58bfcfd1 upstream.
+
+Recent gcc compilers (gcc 9.1) generate warnings about an out of bounds
+memset, if the memset goes accross several fields of a struct. This
+generated a couple of warnings on x86_64 builds in sanitize_boot_params().
+
+Fix this by explicitly saving the fields in struct boot_params
+that are intended to be preserved, and zeroing all the rest.
+
+[ tglx: Tagged for stable as it breaks the warning free build there as well ]
+
+Suggested-by: Thomas Gleixner <tglx@linutronix.de>
+Suggested-by: H. Peter Anvin <hpa@zytor.com>
+Signed-off-by: John Hubbard <jhubbard@nvidia.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20190731054627.5627-2-jhubbard@nvidia.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/bootparam_utils.h | 60 +++++++++++++++++++++++++--------
+ 1 file changed, 47 insertions(+), 13 deletions(-)
+
+--- a/arch/x86/include/asm/bootparam_utils.h
++++ b/arch/x86/include/asm/bootparam_utils.h
+@@ -18,6 +18,20 @@
+ * Note: efi_info is commonly left uninitialized, but that field has a
+ * private magic, so it is better to leave it unchanged.
+ */
++
++#define sizeof_mbr(type, member) ({ sizeof(((type *)0)->member); })
++
++#define BOOT_PARAM_PRESERVE(struct_member) \
++ { \
++ .start = offsetof(struct boot_params, struct_member), \
++ .len = sizeof_mbr(struct boot_params, struct_member), \
++ }
++
++struct boot_params_to_save {
++ unsigned int start;
++ unsigned int len;
++};
++
+ static void sanitize_boot_params(struct boot_params *boot_params)
+ {
+ /*
+@@ -36,19 +50,39 @@ static void sanitize_boot_params(struct
+ */
+ if (boot_params->sentinel) {
+ /* fields in boot_params are left uninitialized, clear them */
+- memset(&boot_params->ext_ramdisk_image, 0,
+- (char *)&boot_params->efi_info -
+- (char *)&boot_params->ext_ramdisk_image);
+- memset(&boot_params->kbd_status, 0,
+- (char *)&boot_params->hdr -
+- (char *)&boot_params->kbd_status);
+- memset(&boot_params->_pad7[0], 0,
+- (char *)&boot_params->edd_mbr_sig_buffer[0] -
+- (char *)&boot_params->_pad7[0]);
+- memset(&boot_params->_pad8[0], 0,
+- (char *)&boot_params->eddbuf[0] -
+- (char *)&boot_params->_pad8[0]);
+- memset(&boot_params->_pad9[0], 0, sizeof(boot_params->_pad9));
++ static struct boot_params scratch;
++ char *bp_base = (char *)boot_params;
++ char *save_base = (char *)&scratch;
++ int i;
++
++ const struct boot_params_to_save to_save[] = {
++ BOOT_PARAM_PRESERVE(screen_info),
++ BOOT_PARAM_PRESERVE(apm_bios_info),
++ BOOT_PARAM_PRESERVE(tboot_addr),
++ BOOT_PARAM_PRESERVE(ist_info),
++ BOOT_PARAM_PRESERVE(hd0_info),
++ BOOT_PARAM_PRESERVE(hd1_info),
++ BOOT_PARAM_PRESERVE(sys_desc_table),
++ BOOT_PARAM_PRESERVE(olpc_ofw_header),
++ BOOT_PARAM_PRESERVE(efi_info),
++ BOOT_PARAM_PRESERVE(alt_mem_k),
++ BOOT_PARAM_PRESERVE(scratch),
++ BOOT_PARAM_PRESERVE(e820_entries),
++ BOOT_PARAM_PRESERVE(eddbuf_entries),
++ BOOT_PARAM_PRESERVE(edd_mbr_sig_buf_entries),
++ BOOT_PARAM_PRESERVE(edd_mbr_sig_buffer),
++ BOOT_PARAM_PRESERVE(e820_table),
++ BOOT_PARAM_PRESERVE(eddbuf),
++ };
++
++ memset(&scratch, 0, sizeof(scratch));
++
++ for (i = 0; i < ARRAY_SIZE(to_save); i++) {
++ memcpy(save_base + to_save[i].start,
++ bp_base + to_save[i].start, to_save[i].len);
++ }
++
++ memcpy(boot_params, save_base, sizeof(*boot_params));
+ }
+ }
+
--- /dev/null
+From c49a0a80137c7ca7d6ced4c812c9e07a949f6f24 Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Mon, 19 Aug 2019 15:52:35 +0000
+Subject: x86/CPU/AMD: Clear RDRAND CPUID bit on AMD family 15h/16h
+
+From: Tom Lendacky <thomas.lendacky@amd.com>
+
+commit c49a0a80137c7ca7d6ced4c812c9e07a949f6f24 upstream.
+
+There have been reports of RDRAND issues after resuming from suspend on
+some AMD family 15h and family 16h systems. This issue stems from a BIOS
+not performing the proper steps during resume to ensure RDRAND continues
+to function properly.
+
+RDRAND support is indicated by CPUID Fn00000001_ECX[30]. This bit can be
+reset by clearing MSR C001_1004[62]. Any software that checks for RDRAND
+support using CPUID, including the kernel, will believe that RDRAND is
+not supported.
+
+Update the CPU initialization to clear the RDRAND CPUID bit for any family
+15h and 16h processor that supports RDRAND. If it is known that the family
+15h or family 16h system does not have an RDRAND resume issue or that the
+system will not be placed in suspend, the "rdrand=force" kernel parameter
+can be used to stop the clearing of the RDRAND CPUID bit.
+
+Additionally, update the suspend and resume path to save and restore the
+MSR C001_1004 value to ensure that the RDRAND CPUID setting remains in
+place after resuming from suspend.
+
+Note, that clearing the RDRAND CPUID bit does not prevent a processor
+that normally supports the RDRAND instruction from executing it. So any
+code that determined the support based on family and model won't #UD.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: Andrew Cooper <andrew.cooper3@citrix.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Chen Yu <yu.c.chen@intel.com>
+Cc: "H. Peter Anvin" <hpa@zytor.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Jonathan Corbet <corbet@lwn.net>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: "linux-doc@vger.kernel.org" <linux-doc@vger.kernel.org>
+Cc: "linux-pm@vger.kernel.org" <linux-pm@vger.kernel.org>
+Cc: Nathan Chancellor <natechancellor@gmail.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Pavel Machek <pavel@ucw.cz>
+Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
+Cc: <stable@vger.kernel.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: "x86@kernel.org" <x86@kernel.org>
+Link: https://lkml.kernel.org/r/7543af91666f491547bd86cebb1e17c66824ab9f.1566229943.git.thomas.lendacky@amd.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ Documentation/admin-guide/kernel-parameters.txt | 7 +
+ arch/x86/include/asm/msr-index.h | 1
+ arch/x86/kernel/cpu/amd.c | 66 ++++++++++++++++++
+ arch/x86/power/cpu.c | 86 ++++++++++++++++++++----
+ 4 files changed, 147 insertions(+), 13 deletions(-)
+
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -3788,6 +3788,13 @@
+ Run specified binary instead of /init from the ramdisk,
+ used for early userspace startup. See initrd.
+
++ rdrand= [X86]
++ force - Override the decision by the kernel to hide the
++ advertisement of RDRAND support (this affects
++ certain AMD processors because of buggy BIOS
++ support, specifically around the suspend/resume
++ path).
++
+ rdt= [HW,X86,RDT]
+ Turn on/off individual RDT features. List is:
+ cmt, mbmtotal, mbmlocal, l3cat, l3cdp, l2cat, mba.
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -334,6 +334,7 @@
+ #define MSR_AMD64_PATCH_LEVEL 0x0000008b
+ #define MSR_AMD64_TSC_RATIO 0xc0000104
+ #define MSR_AMD64_NB_CFG 0xc001001f
++#define MSR_AMD64_CPUID_FN_1 0xc0011004
+ #define MSR_AMD64_PATCH_LOADER 0xc0010020
+ #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140
+ #define MSR_AMD64_OSVW_STATUS 0xc0010141
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -772,6 +772,64 @@ static void init_amd_ln(struct cpuinfo_x
+ msr_set_bit(MSR_AMD64_DE_CFG, 31);
+ }
+
++static bool rdrand_force;
++
++static int __init rdrand_cmdline(char *str)
++{
++ if (!str)
++ return -EINVAL;
++
++ if (!strcmp(str, "force"))
++ rdrand_force = true;
++ else
++ return -EINVAL;
++
++ return 0;
++}
++early_param("rdrand", rdrand_cmdline);
++
++static void clear_rdrand_cpuid_bit(struct cpuinfo_x86 *c)
++{
++ /*
++ * Saving of the MSR used to hide the RDRAND support during
++ * suspend/resume is done by arch/x86/power/cpu.c, which is
++ * dependent on CONFIG_PM_SLEEP.
++ */
++ if (!IS_ENABLED(CONFIG_PM_SLEEP))
++ return;
++
++ /*
++ * The nordrand option can clear X86_FEATURE_RDRAND, so check for
++ * RDRAND support using the CPUID function directly.
++ */
++ if (!(cpuid_ecx(1) & BIT(30)) || rdrand_force)
++ return;
++
++ msr_clear_bit(MSR_AMD64_CPUID_FN_1, 62);
++
++ /*
++ * Verify that the CPUID change has occurred in case the kernel is
++ * running virtualized and the hypervisor doesn't support the MSR.
++ */
++ if (cpuid_ecx(1) & BIT(30)) {
++ pr_info_once("BIOS may not properly restore RDRAND after suspend, but hypervisor does not support hiding RDRAND via CPUID.\n");
++ return;
++ }
++
++ clear_cpu_cap(c, X86_FEATURE_RDRAND);
++ pr_info_once("BIOS may not properly restore RDRAND after suspend, hiding RDRAND via CPUID. Use rdrand=force to reenable.\n");
++}
++
++static void init_amd_jg(struct cpuinfo_x86 *c)
++{
++ /*
++ * Some BIOS implementations do not restore proper RDRAND support
++ * across suspend and resume. Check on whether to hide the RDRAND
++ * instruction support via CPUID.
++ */
++ clear_rdrand_cpuid_bit(c);
++}
++
+ static void init_amd_bd(struct cpuinfo_x86 *c)
+ {
+ u64 value;
+@@ -786,6 +844,13 @@ static void init_amd_bd(struct cpuinfo_x
+ wrmsrl_safe(MSR_F15H_IC_CFG, value);
+ }
+ }
++
++ /*
++ * Some BIOS implementations do not restore proper RDRAND support
++ * across suspend and resume. Check on whether to hide the RDRAND
++ * instruction support via CPUID.
++ */
++ clear_rdrand_cpuid_bit(c);
+ }
+
+ static void init_amd_zn(struct cpuinfo_x86 *c)
+@@ -828,6 +893,7 @@ static void init_amd(struct cpuinfo_x86
+ case 0x10: init_amd_gh(c); break;
+ case 0x12: init_amd_ln(c); break;
+ case 0x15: init_amd_bd(c); break;
++ case 0x16: init_amd_jg(c); break;
+ case 0x17: init_amd_zn(c); break;
+ }
+
+--- a/arch/x86/power/cpu.c
++++ b/arch/x86/power/cpu.c
+@@ -13,6 +13,7 @@
+ #include <linux/smp.h>
+ #include <linux/perf_event.h>
+ #include <linux/tboot.h>
++#include <linux/dmi.h>
+
+ #include <asm/pgtable.h>
+ #include <asm/proto.h>
+@@ -24,7 +25,7 @@
+ #include <asm/debugreg.h>
+ #include <asm/cpu.h>
+ #include <asm/mmu_context.h>
+-#include <linux/dmi.h>
++#include <asm/cpu_device_id.h>
+
+ #ifdef CONFIG_X86_32
+ __visible unsigned long saved_context_ebx;
+@@ -398,15 +399,14 @@ static int __init bsp_pm_check_init(void
+
+ core_initcall(bsp_pm_check_init);
+
+-static int msr_init_context(const u32 *msr_id, const int total_num)
++static int msr_build_context(const u32 *msr_id, const int num)
+ {
+- int i = 0;
++ struct saved_msrs *saved_msrs = &saved_context.saved_msrs;
+ struct saved_msr *msr_array;
++ int total_num;
++ int i, j;
+
+- if (saved_context.saved_msrs.array || saved_context.saved_msrs.num > 0) {
+- pr_err("x86/pm: MSR quirk already applied, please check your DMI match table.\n");
+- return -EINVAL;
+- }
++ total_num = saved_msrs->num + num;
+
+ msr_array = kmalloc_array(total_num, sizeof(struct saved_msr), GFP_KERNEL);
+ if (!msr_array) {
+@@ -414,19 +414,30 @@ static int msr_init_context(const u32 *m
+ return -ENOMEM;
+ }
+
+- for (i = 0; i < total_num; i++) {
+- msr_array[i].info.msr_no = msr_id[i];
++ if (saved_msrs->array) {
++ /*
++ * Multiple callbacks can invoke this function, so copy any
++ * MSR save requests from previous invocations.
++ */
++ memcpy(msr_array, saved_msrs->array,
++ sizeof(struct saved_msr) * saved_msrs->num);
++
++ kfree(saved_msrs->array);
++ }
++
++ for (i = saved_msrs->num, j = 0; i < total_num; i++, j++) {
++ msr_array[i].info.msr_no = msr_id[j];
+ msr_array[i].valid = false;
+ msr_array[i].info.reg.q = 0;
+ }
+- saved_context.saved_msrs.num = total_num;
+- saved_context.saved_msrs.array = msr_array;
++ saved_msrs->num = total_num;
++ saved_msrs->array = msr_array;
+
+ return 0;
+ }
+
+ /*
+- * The following section is a quirk framework for problematic BIOSen:
++ * The following sections are a quirk framework for problematic BIOSen:
+ * Sometimes MSRs are modified by the BIOSen after suspended to
+ * RAM, this might cause unexpected behavior after wakeup.
+ * Thus we save/restore these specified MSRs across suspend/resume
+@@ -441,7 +452,7 @@ static int msr_initialize_bdw(const stru
+ u32 bdw_msr_id[] = { MSR_IA32_THERM_CONTROL };
+
+ pr_info("x86/pm: %s detected, MSR saving is needed during suspending.\n", d->ident);
+- return msr_init_context(bdw_msr_id, ARRAY_SIZE(bdw_msr_id));
++ return msr_build_context(bdw_msr_id, ARRAY_SIZE(bdw_msr_id));
+ }
+
+ static const struct dmi_system_id msr_save_dmi_table[] = {
+@@ -456,9 +467,58 @@ static const struct dmi_system_id msr_sa
+ {}
+ };
+
++static int msr_save_cpuid_features(const struct x86_cpu_id *c)
++{
++ u32 cpuid_msr_id[] = {
++ MSR_AMD64_CPUID_FN_1,
++ };
++
++ pr_info("x86/pm: family %#hx cpu detected, MSR saving is needed during suspending.\n",
++ c->family);
++
++ return msr_build_context(cpuid_msr_id, ARRAY_SIZE(cpuid_msr_id));
++}
++
++static const struct x86_cpu_id msr_save_cpu_table[] = {
++ {
++ .vendor = X86_VENDOR_AMD,
++ .family = 0x15,
++ .model = X86_MODEL_ANY,
++ .feature = X86_FEATURE_ANY,
++ .driver_data = (kernel_ulong_t)msr_save_cpuid_features,
++ },
++ {
++ .vendor = X86_VENDOR_AMD,
++ .family = 0x16,
++ .model = X86_MODEL_ANY,
++ .feature = X86_FEATURE_ANY,
++ .driver_data = (kernel_ulong_t)msr_save_cpuid_features,
++ },
++ {}
++};
++
++typedef int (*pm_cpu_match_t)(const struct x86_cpu_id *);
++static int pm_cpu_check(const struct x86_cpu_id *c)
++{
++ const struct x86_cpu_id *m;
++ int ret = 0;
++
++ m = x86_match_cpu(msr_save_cpu_table);
++ if (m) {
++ pm_cpu_match_t fn;
++
++ fn = (pm_cpu_match_t)m->driver_data;
++ ret = fn(m);
++ }
++
++ return ret;
++}
++
+ static int pm_check_save_msr(void)
+ {
+ dmi_check_system(msr_save_dmi_table);
++ pm_cpu_check(msr_save_cpu_table);
++
+ return 0;
+ }
+
--- /dev/null
+From b63f20a778c88b6a04458ed6ffc69da953d3a109 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <sean.j.christopherson@intel.com>
+Date: Thu, 22 Aug 2019 14:11:22 -0700
+Subject: x86/retpoline: Don't clobber RFLAGS during CALL_NOSPEC on i386
+
+From: Sean Christopherson <sean.j.christopherson@intel.com>
+
+commit b63f20a778c88b6a04458ed6ffc69da953d3a109 upstream.
+
+Use 'lea' instead of 'add' when adjusting %rsp in CALL_NOSPEC so as to
+avoid clobbering flags.
+
+KVM's emulator makes indirect calls into a jump table of sorts, where
+the destination of the CALL_NOSPEC is a small blob of code that performs
+fast emulation by executing the target instruction with fixed operands.
+
+ adcb_al_dl:
+ 0x000339f8 <+0>: adc %dl,%al
+ 0x000339fa <+2>: ret
+
+A major motiviation for doing fast emulation is to leverage the CPU to
+handle consumption and manipulation of arithmetic flags, i.e. RFLAGS is
+both an input and output to the target of CALL_NOSPEC. Clobbering flags
+results in all sorts of incorrect emulation, e.g. Jcc instructions often
+take the wrong path. Sans the nops...
+
+ asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n"
+ 0x0003595a <+58>: mov 0xc0(%ebx),%eax
+ 0x00035960 <+64>: mov 0x60(%ebx),%edx
+ 0x00035963 <+67>: mov 0x90(%ebx),%ecx
+ 0x00035969 <+73>: push %edi
+ 0x0003596a <+74>: popf
+ 0x0003596b <+75>: call *%esi
+ 0x000359a0 <+128>: pushf
+ 0x000359a1 <+129>: pop %edi
+ 0x000359a2 <+130>: mov %eax,0xc0(%ebx)
+ 0x000359b1 <+145>: mov %edx,0x60(%ebx)
+
+ ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
+ 0x000359a8 <+136>: mov -0x10(%ebp),%eax
+ 0x000359ab <+139>: and $0x8d5,%edi
+ 0x000359b4 <+148>: and $0xfffff72a,%eax
+ 0x000359b9 <+153>: or %eax,%edi
+ 0x000359bd <+157>: mov %edi,0x4(%ebx)
+
+For the most part this has gone unnoticed as emulation of guest code
+that can trigger fast emulation is effectively limited to MMIO when
+running on modern hardware, and MMIO is rarely, if ever, accessed by
+instructions that affect or consume flags.
+
+Breakage is almost instantaneous when running with unrestricted guest
+disabled, in which case KVM must emulate all instructions when the guest
+has invalid state, e.g. when the guest is in Big Real Mode during early
+BIOS.
+
+Fixes: 776b043848fd2 ("x86/retpoline: Add initial retpoline support")
+Fixes: 1a29b5b7f347a ("KVM: x86: Make indirect calls in emulator speculation safe")
+Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20190822211122.27579-1-sean.j.christopherson@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/nospec-branch.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -202,7 +202,7 @@
+ " lfence;\n" \
+ " jmp 902b;\n" \
+ " .align 16\n" \
+- "903: addl $4, %%esp;\n" \
++ "903: lea 4(%%esp), %%esp;\n" \
+ " pushl %[thunk_target];\n" \
+ " ret;\n" \
+ " .align 16\n" \