From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 29 Apr 2019 13:15:31 +0000 (+0200)
Subject: 5.0-stable patches
X-Git-Tag: v4.9.172~21
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=088b31d94dc530edbc78f28467046d5676079095;p=thirdparty%2Fkernel%2Fstable-queue.git

5.0-stable patches

added patches:
	aio-fold-lookup_kiocb-into-its-sole-caller.patch
	aio-keep-io_event-in-aio_kiocb.patch
	aio-store-event-at-final-iocb_put.patch
	fix-aio_poll-races.patch
	pin-iocb-through-aio.patch
	x86-retpolines-disable-switch-jump-tables-when-retpolines-are-enabled.patch
	x86-retpolines-raise-limit-for-generating-indirect-calls-from-switch-case.patch
---

diff --git a/queue-5.0/aio-fold-lookup_kiocb-into-its-sole-caller.patch b/queue-5.0/aio-fold-lookup_kiocb-into-its-sole-caller.patch
new file mode 100644
index 00000000000..4113707990c
--- /dev/null
+++ b/queue-5.0/aio-fold-lookup_kiocb-into-its-sole-caller.patch
@@ -0,0 +1,62 @@
+From 833f4154ed560232120bc475935ee1d6a20e159f Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Mon, 11 Mar 2019 19:00:36 -0400
+Subject: aio: fold lookup_kiocb() into its sole caller
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit 833f4154ed560232120bc475935ee1d6a20e159f upstream.
+
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/aio.c |   29 +++++++----------------------
+ 1 file changed, 7 insertions(+), 22 deletions(-)
+
+--- a/fs/aio.c
++++ b/fs/aio.c
+@@ -2002,24 +2002,6 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat
+ }
+ #endif
+ 
+-/* lookup_kiocb
+- *	Finds a given iocb for cancellation.
+- */
+-static struct aio_kiocb *
+-lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb)
+-{
+-	struct aio_kiocb *kiocb;
+-
+-	assert_spin_locked(&ctx->ctx_lock);
+-
+-	/* TODO: use a hash or array, this sucks. */
+-	list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
+-		if (kiocb->ki_user_iocb == iocb)
+-			return kiocb;
+-	}
+-	return NULL;
+-}
+-
+ /* sys_io_cancel:
+  *	Attempts to cancel an iocb previously passed to io_submit.  If
+  *	the operation is successfully cancelled, the resulting event is
+@@ -2048,10 +2030,13 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t
+ 		return -EINVAL;
+ 
+ 	spin_lock_irq(&ctx->ctx_lock);
+-	kiocb = lookup_kiocb(ctx, iocb);
+-	if (kiocb) {
+-		ret = kiocb->ki_cancel(&kiocb->rw);
+-		list_del_init(&kiocb->ki_list);
++	/* TODO: use a hash or array, this sucks. */
++	list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
++		if (kiocb->ki_user_iocb == iocb) {
++			ret = kiocb->ki_cancel(&kiocb->rw);
++			list_del_init(&kiocb->ki_list);
++			break;
++		}
+ 	}
+ 	spin_unlock_irq(&ctx->ctx_lock);
+ 
diff --git a/queue-5.0/aio-keep-io_event-in-aio_kiocb.patch b/queue-5.0/aio-keep-io_event-in-aio_kiocb.patch
new file mode 100644
index 00000000000..06f6eb5dabd
--- /dev/null
+++ b/queue-5.0/aio-keep-io_event-in-aio_kiocb.patch
@@ -0,0 +1,106 @@
+From a9339b7855094ba11a97e8822ae038135e879e79 Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Thu, 7 Mar 2019 19:43:45 -0500
+Subject: aio: keep io_event in aio_kiocb
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit a9339b7855094ba11a97e8822ae038135e879e79 upstream.
+
+We want to separate forming the resulting io_event from putting it
+into the ring buffer.
+
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/aio.c |   31 +++++++++++++------------------
+ 1 file changed, 13 insertions(+), 18 deletions(-)
+
+--- a/fs/aio.c
++++ b/fs/aio.c
+@@ -204,8 +204,7 @@ struct aio_kiocb {
+ 	struct kioctx		*ki_ctx;
+ 	kiocb_cancel_fn		*ki_cancel;
+ 
+-	struct iocb __user	*ki_user_iocb;	/* user's aiocb */
+-	__u64			ki_user_data;	/* user's data for completion */
++	struct io_event		ki_res;
+ 
+ 	struct list_head	ki_list;	/* the aio core uses this
+ 						 * for cancellation */
+@@ -1084,15 +1083,6 @@ static inline void iocb_put(struct aio_k
+ 		iocb_destroy(iocb);
+ }
+ 
+-static void aio_fill_event(struct io_event *ev, struct aio_kiocb *iocb,
+-			   long res, long res2)
+-{
+-	ev->obj = (u64)(unsigned long)iocb->ki_user_iocb;
+-	ev->data = iocb->ki_user_data;
+-	ev->res = res;
+-	ev->res2 = res2;
+-}
+-
+ /* aio_complete
+  *	Called when the io request on the given iocb is complete.
+  */
+@@ -1104,6 +1094,8 @@ static void aio_complete(struct aio_kioc
+ 	unsigned tail, pos, head;
+ 	unsigned long	flags;
+ 
++	iocb->ki_res.res = res;
++	iocb->ki_res.res2 = res2;
+ 	/*
+ 	 * Add a completion event to the ring buffer. Must be done holding
+ 	 * ctx->completion_lock to prevent other code from messing with the tail
+@@ -1120,14 +1112,14 @@ static void aio_complete(struct aio_kioc
+ 	ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
+ 	event = ev_page + pos % AIO_EVENTS_PER_PAGE;
+ 
+-	aio_fill_event(event, iocb, res, res2);
++	*event = iocb->ki_res;
+ 
+ 	kunmap_atomic(ev_page);
+ 	flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
+ 
+-	pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n",
+-		 ctx, tail, iocb, iocb->ki_user_iocb, iocb->ki_user_data,
+-		 res, res2);
++	pr_debug("%p[%u]: %p: %p %Lx %Lx %Lx\n", ctx, tail, iocb,
++		 (void __user *)(unsigned long)iocb->ki_res.obj,
++		 iocb->ki_res.data, iocb->ki_res.res, iocb->ki_res.res2);
+ 
+ 	/* after flagging the request as done, we
+ 	 * must never even look at it again
+@@ -1844,8 +1836,10 @@ static int __io_submit_one(struct kioctx
+ 		goto out_put_req;
+ 	}
+ 
+-	req->ki_user_iocb = user_iocb;
+-	req->ki_user_data = iocb->aio_data;
++	req->ki_res.obj = (u64)(unsigned long)user_iocb;
++	req->ki_res.data = iocb->aio_data;
++	req->ki_res.res = 0;
++	req->ki_res.res2 = 0;
+ 
+ 	switch (iocb->aio_lio_opcode) {
+ 	case IOCB_CMD_PREAD:
+@@ -2019,6 +2013,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t
+ 	struct aio_kiocb *kiocb;
+ 	int ret = -EINVAL;
+ 	u32 key;
++	u64 obj = (u64)(unsigned long)iocb;
+ 
+ 	if (unlikely(get_user(key, &iocb->aio_key)))
+ 		return -EFAULT;
+@@ -2032,7 +2027,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t
+ 	spin_lock_irq(&ctx->ctx_lock);
+ 	/* TODO: use a hash or array, this sucks. */
+ 	list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
+-		if (kiocb->ki_user_iocb == iocb) {
++		if (kiocb->ki_res.obj == obj) {
+ 			ret = kiocb->ki_cancel(&kiocb->rw);
+ 			list_del_init(&kiocb->ki_list);
+ 			break;
diff --git a/queue-5.0/aio-store-event-at-final-iocb_put.patch b/queue-5.0/aio-store-event-at-final-iocb_put.patch
new file mode 100644
index 00000000000..ac38a491308
--- /dev/null
+++ b/queue-5.0/aio-store-event-at-final-iocb_put.patch
@@ -0,0 +1,102 @@
+From 2bb874c0d873d13bd9b9b9c6d7b7c4edab18c8b4 Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Thu, 7 Mar 2019 19:49:55 -0500
+Subject: aio: store event at final iocb_put()
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit 2bb874c0d873d13bd9b9b9c6d7b7c4edab18c8b4 upstream.
+
+Instead of having aio_complete() set ->ki_res.{res,res2}, do that
+explicitly in its callers, drop the reference (as aio_complete()
+used to do) and delay the rest until the final iocb_put().
+
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/aio.c |   33 +++++++++++++++++----------------
+ 1 file changed, 17 insertions(+), 16 deletions(-)
+
+--- a/fs/aio.c
++++ b/fs/aio.c
+@@ -1077,16 +1077,10 @@ static inline void iocb_destroy(struct a
+ 	kmem_cache_free(kiocb_cachep, iocb);
+ }
+ 
+-static inline void iocb_put(struct aio_kiocb *iocb)
+-{
+-	if (refcount_dec_and_test(&iocb->ki_refcnt))
+-		iocb_destroy(iocb);
+-}
+-
+ /* aio_complete
+  *	Called when the io request on the given iocb is complete.
+  */
+-static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
++static void aio_complete(struct aio_kiocb *iocb)
+ {
+ 	struct kioctx	*ctx = iocb->ki_ctx;
+ 	struct aio_ring	*ring;
+@@ -1094,8 +1088,6 @@ static void aio_complete(struct aio_kioc
+ 	unsigned tail, pos, head;
+ 	unsigned long	flags;
+ 
+-	iocb->ki_res.res = res;
+-	iocb->ki_res.res2 = res2;
+ 	/*
+ 	 * Add a completion event to the ring buffer. Must be done holding
+ 	 * ctx->completion_lock to prevent other code from messing with the tail
+@@ -1161,7 +1153,14 @@ static void aio_complete(struct aio_kioc
+ 
+ 	if (waitqueue_active(&ctx->wait))
+ 		wake_up(&ctx->wait);
+-	iocb_put(iocb);
++}
++
++static inline void iocb_put(struct aio_kiocb *iocb)
++{
++	if (refcount_dec_and_test(&iocb->ki_refcnt)) {
++		aio_complete(iocb);
++		iocb_destroy(iocb);
++	}
+ }
+ 
+ /* aio_read_events_ring
+@@ -1435,7 +1434,9 @@ static void aio_complete_rw(struct kiocb
+ 		file_end_write(kiocb->ki_filp);
+ 	}
+ 
+-	aio_complete(iocb, res, res2);
++	iocb->ki_res.res = res;
++	iocb->ki_res.res2 = res2;
++	iocb_put(iocb);
+ }
+ 
+ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
+@@ -1583,11 +1584,10 @@ static ssize_t aio_write(struct kiocb *r
+ 
+ static void aio_fsync_work(struct work_struct *work)
+ {
+-	struct fsync_iocb *req = container_of(work, struct fsync_iocb, work);
+-	int ret;
++	struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, fsync.work);
+ 
+-	ret = vfs_fsync(req->file, req->datasync);
+-	aio_complete(container_of(req, struct aio_kiocb, fsync), ret, 0);
++	iocb->ki_res.res = vfs_fsync(iocb->fsync.file, iocb->fsync.datasync);
++	iocb_put(iocb);
+ }
+ 
+ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
+@@ -1608,7 +1608,8 @@ static int aio_fsync(struct fsync_iocb *
+ 
+ static inline void aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask)
+ {
+-	aio_complete(iocb, mangle_poll(mask), 0);
++	iocb->ki_res.res = mangle_poll(mask);
++	iocb_put(iocb);
+ }
+ 
+ static void aio_poll_complete_work(struct work_struct *work)
diff --git a/queue-5.0/fix-aio_poll-races.patch b/queue-5.0/fix-aio_poll-races.patch
new file mode 100644
index 00000000000..0c78e4fbfd8
--- /dev/null
+++ b/queue-5.0/fix-aio_poll-races.patch
@@ -0,0 +1,226 @@
+From af5c72b1fc7a00aa484e90b0c4e0eeb582545634 Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Thu, 7 Mar 2019 21:45:41 -0500
+Subject: Fix aio_poll() races
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit af5c72b1fc7a00aa484e90b0c4e0eeb582545634 upstream.
+
+aio_poll() has to cope with several unpleasant problems:
+	* requests that might stay around indefinitely need to
+be made visible for io_cancel(2); that must not be done to
+a request already completed, though.
+	* in cases when ->poll() has placed us on a waitqueue,
+wakeup might have happened (and request completed) before ->poll()
+returns.
+	* worse, in some early wakeup cases request might end
+up re-added into the queue later - we can't treat "woken up and
+currently not in the queue" as "it's not going to stick around
+indefinitely"
+	* ... moreover, ->poll() might have decided not to
+put it on any queues to start with, and that needs to be distinguished
+from the previous case
+	* ->poll() might have tried to put us on more than one queue.
+Only the first will succeed for aio poll, so we might end up missing
+wakeups.  OTOH, we might very well notice that only after the
+wakeup hits and request gets completed (all before ->poll() gets
+around to the second poll_wait()).  In that case it's too late to
+decide that we have an error.
+
+req->woken was an attempt to deal with that.  Unfortunately, it was
+broken.  What we need to keep track of is not that wakeup has happened -
+the thing might come back after that.  It's that async reference is
+already gone and won't come back, so we can't (and needn't) put the
+request on the list of cancellables.
+
+The easiest case is "request hadn't been put on any waitqueues"; we
+can tell by seeing NULL apt.head, and in that case there won't be
+anything async.  We should either complete the request ourselves
+(if vfs_poll() reports anything of interest) or return an error.
+
+In all other cases we get exclusion with wakeups by grabbing the
+queue lock.
+
+If request is currently on queue and we have something interesting
+from vfs_poll(), we can steal it and complete the request ourselves.
+
+If it's on queue and vfs_poll() has not reported anything interesting,
+we either put it on the cancellable list, or, if we know that it
+hadn't been put on all queues ->poll() wanted it on, we steal it and
+return an error.
+
+If it's _not_ on queue, it's either been already dealt with (in which
+case we do nothing), or there's aio_poll_complete_work() about to be
+executed.  In that case we either put it on the cancellable list,
+or, if we know it hadn't been put on all queues ->poll() wanted it on,
+simulate what cancel would've done.
+
+It's a lot more convoluted than I'd like it to be.  Single-consumer APIs
+suck, and unfortunately aio is not an exception...
+
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/aio.c |   90 ++++++++++++++++++++++++++++-----------------------------------
+ 1 file changed, 40 insertions(+), 50 deletions(-)
+
+--- a/fs/aio.c
++++ b/fs/aio.c
+@@ -181,7 +181,7 @@ struct poll_iocb {
+ 	struct file		*file;
+ 	struct wait_queue_head	*head;
+ 	__poll_t		events;
+-	bool			woken;
++	bool			done;
+ 	bool			cancelled;
+ 	struct wait_queue_entry	wait;
+ 	struct work_struct	work;
+@@ -1606,12 +1606,6 @@ static int aio_fsync(struct fsync_iocb *
+ 	return 0;
+ }
+ 
+-static inline void aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask)
+-{
+-	iocb->ki_res.res = mangle_poll(mask);
+-	iocb_put(iocb);
+-}
+-
+ static void aio_poll_complete_work(struct work_struct *work)
+ {
+ 	struct poll_iocb *req = container_of(work, struct poll_iocb, work);
+@@ -1637,9 +1631,11 @@ static void aio_poll_complete_work(struc
+ 		return;
+ 	}
+ 	list_del_init(&iocb->ki_list);
++	iocb->ki_res.res = mangle_poll(mask);
++	req->done = true;
+ 	spin_unlock_irq(&ctx->ctx_lock);
+ 
+-	aio_poll_complete(iocb, mask);
++	iocb_put(iocb);
+ }
+ 
+ /* assumes we are called with irqs disabled */
+@@ -1667,31 +1663,27 @@ static int aio_poll_wake(struct wait_que
+ 	__poll_t mask = key_to_poll(key);
+ 	unsigned long flags;
+ 
+-	req->woken = true;
+-
+ 	/* for instances that support it check for an event match first: */
+-	if (mask) {
+-		if (!(mask & req->events))
+-			return 0;
++	if (mask && !(mask & req->events))
++		return 0;
+ 
++	list_del_init(&req->wait.entry);
++
++	if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
+ 		/*
+ 		 * Try to complete the iocb inline if we can. Use
+ 		 * irqsave/irqrestore because not all filesystems (e.g. fuse)
+ 		 * call this function with IRQs disabled and because IRQs
+ 		 * have to be disabled before ctx_lock is obtained.
+ 		 */
+-		if (spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
+-			list_del(&iocb->ki_list);
+-			spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags);
+-
+-			list_del_init(&req->wait.entry);
+-			aio_poll_complete(iocb, mask);
+-			return 1;
+-		}
++		list_del(&iocb->ki_list);
++		iocb->ki_res.res = mangle_poll(mask);
++		req->done = true;
++		spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags);
++		iocb_put(iocb);
++	} else {
++		schedule_work(&req->work);
+ 	}
+-
+-	list_del_init(&req->wait.entry);
+-	schedule_work(&req->work);
+ 	return 1;
+ }
+ 
+@@ -1723,6 +1715,7 @@ static ssize_t aio_poll(struct aio_kiocb
+ 	struct kioctx *ctx = aiocb->ki_ctx;
+ 	struct poll_iocb *req = &aiocb->poll;
+ 	struct aio_poll_table apt;
++	bool cancel = false;
+ 	__poll_t mask;
+ 
+ 	/* reject any unknown events outside the normal event mask. */
+@@ -1736,7 +1729,7 @@ static ssize_t aio_poll(struct aio_kiocb
+ 	req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
+ 
+ 	req->head = NULL;
+-	req->woken = false;
++	req->done = false;
+ 	req->cancelled = false;
+ 
+ 	apt.pt._qproc = aio_poll_queue_proc;
+@@ -1749,36 +1742,33 @@ static ssize_t aio_poll(struct aio_kiocb
+ 	init_waitqueue_func_entry(&req->wait, aio_poll_wake);
+ 
+ 	mask = vfs_poll(req->file, &apt.pt) & req->events;
+-	if (unlikely(!req->head)) {
+-		/* we did not manage to set up a waitqueue, done */
+-		goto out;
+-	}
+-
+ 	spin_lock_irq(&ctx->ctx_lock);
+-	spin_lock(&req->head->lock);
+-	if (req->woken) {
+-		/* wake_up context handles the rest */
+-		mask = 0;
++	if (likely(req->head)) {
++		spin_lock(&req->head->lock);
++		if (unlikely(list_empty(&req->wait.entry))) {
++			if (apt.error)
++				cancel = true;
++			apt.error = 0;
++			mask = 0;
++		}
++		if (mask || apt.error) {
++			list_del_init(&req->wait.entry);
++		} else if (cancel) {
++			WRITE_ONCE(req->cancelled, true);
++		} else if (!req->done) { /* actually waiting for an event */
++			list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
++			aiocb->ki_cancel = aio_poll_cancel;
++		}
++		spin_unlock(&req->head->lock);
++	}
++	if (mask) { /* no async, we'd stolen it */
++		aiocb->ki_res.res = mangle_poll(mask);
+ 		apt.error = 0;
+-	} else if (mask || apt.error) {
+-		/* if we get an error or a mask we are done */
+-		WARN_ON_ONCE(list_empty(&req->wait.entry));
+-		list_del_init(&req->wait.entry);
+-	} else {
+-		/* actually waiting for an event */
+-		list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
+-		aiocb->ki_cancel = aio_poll_cancel;
+ 	}
+-	spin_unlock(&req->head->lock);
+ 	spin_unlock_irq(&ctx->ctx_lock);
+-
+-out:
+-	if (unlikely(apt.error))
+-		return apt.error;
+-
+ 	if (mask)
+-		aio_poll_complete(aiocb, mask);
+-	return 0;
++		iocb_put(aiocb);
++	return apt.error;
+ }
+ 
+ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
diff --git a/queue-5.0/pin-iocb-through-aio.patch b/queue-5.0/pin-iocb-through-aio.patch
new file mode 100644
index 00000000000..039087053c7
--- /dev/null
+++ b/queue-5.0/pin-iocb-through-aio.patch
@@ -0,0 +1,113 @@
+From b53119f13a04879c3bf502828d99d13726639ead Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Wed, 6 Mar 2019 20:22:54 -0500
+Subject: pin iocb through aio.
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit b53119f13a04879c3bf502828d99d13726639ead upstream.
+
+aio_poll() is not the only case that needs file pinned; worse, while
+aio_read()/aio_write() can live without pinning iocb itself, the
+proof is rather brittle and can easily break on later changes.
+
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/aio.c |   37 +++++++++++++++++++++----------------
+ 1 file changed, 21 insertions(+), 16 deletions(-)
+
+--- a/fs/aio.c
++++ b/fs/aio.c
+@@ -1022,6 +1022,9 @@ static bool get_reqs_available(struct ki
+ /* aio_get_req
+  *	Allocate a slot for an aio request.
+  * Returns NULL if no requests are free.
++ *
++ * The refcount is initialized to 2 - one for the async op completion,
++ * one for the synchronous code that does this.
+  */
+ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
+ {
+@@ -1034,7 +1037,7 @@ static inline struct aio_kiocb *aio_get_
+ 	percpu_ref_get(&ctx->reqs);
+ 	req->ki_ctx = ctx;
+ 	INIT_LIST_HEAD(&req->ki_list);
+-	refcount_set(&req->ki_refcnt, 0);
++	refcount_set(&req->ki_refcnt, 2);
+ 	req->ki_eventfd = NULL;
+ 	return req;
+ }
+@@ -1067,15 +1070,18 @@ out:
+ 	return ret;
+ }
+ 
++static inline void iocb_destroy(struct aio_kiocb *iocb)
++{
++	if (iocb->ki_filp)
++		fput(iocb->ki_filp);
++	percpu_ref_put(&iocb->ki_ctx->reqs);
++	kmem_cache_free(kiocb_cachep, iocb);
++}
++
+ static inline void iocb_put(struct aio_kiocb *iocb)
+ {
+-	if (refcount_read(&iocb->ki_refcnt) == 0 ||
+-	    refcount_dec_and_test(&iocb->ki_refcnt)) {
+-		if (iocb->ki_filp)
+-			fput(iocb->ki_filp);
+-		percpu_ref_put(&iocb->ki_ctx->reqs);
+-		kmem_cache_free(kiocb_cachep, iocb);
+-	}
++	if (refcount_dec_and_test(&iocb->ki_refcnt))
++		iocb_destroy(iocb);
+ }
+ 
+ static void aio_fill_event(struct io_event *ev, struct aio_kiocb *iocb,
+@@ -1749,9 +1755,6 @@ static ssize_t aio_poll(struct aio_kiocb
+ 	INIT_LIST_HEAD(&req->wait.entry);
+ 	init_waitqueue_func_entry(&req->wait, aio_poll_wake);
+ 
+-	/* one for removal from waitqueue, one for this function */
+-	refcount_set(&aiocb->ki_refcnt, 2);
+-
+ 	mask = vfs_poll(req->file, &apt.pt) & req->events;
+ 	if (unlikely(!req->head)) {
+ 		/* we did not manage to set up a waitqueue, done */
+@@ -1782,7 +1785,6 @@ out:
+ 
+ 	if (mask)
+ 		aio_poll_complete(aiocb, mask);
+-	iocb_put(aiocb);
+ 	return 0;
+ }
+ 
+@@ -1873,18 +1875,21 @@ static int __io_submit_one(struct kioctx
+ 		break;
+ 	}
+ 
++	/* Done with the synchronous reference */
++	iocb_put(req);
++
+ 	/*
+ 	 * If ret is 0, we'd either done aio_complete() ourselves or have
+ 	 * arranged for that to be done asynchronously.  Anything non-zero
+ 	 * means that we need to destroy req ourselves.
+ 	 */
+-	if (ret)
+-		goto out_put_req;
+-	return 0;
++	if (!ret)
++		return 0;
++
+ out_put_req:
+ 	if (req->ki_eventfd)
+ 		eventfd_ctx_put(req->ki_eventfd);
+-	iocb_put(req);
++	iocb_destroy(req);
+ out_put_reqs_available:
+ 	put_reqs_available(ctx, 1);
+ 	return ret;
diff --git a/queue-5.0/series b/queue-5.0/series
index 12cfe01fae7..4e963239266 100644
--- a/queue-5.0/series
+++ b/queue-5.0/series
@@ -61,3 +61,10 @@ tipc-check-link-name-with-right-length-in-tipc_nl_compat_link_set.patch
 net-netrom-fix-error-cleanup-path-of-nr_proto_init.patch
 net-rds-check-address-length-before-reading-address-family.patch
 rxrpc-fix-race-condition-in-rxrpc_input_packet.patch
+pin-iocb-through-aio.patch
+aio-fold-lookup_kiocb-into-its-sole-caller.patch
+aio-keep-io_event-in-aio_kiocb.patch
+aio-store-event-at-final-iocb_put.patch
+fix-aio_poll-races.patch
+x86-retpolines-raise-limit-for-generating-indirect-calls-from-switch-case.patch
+x86-retpolines-disable-switch-jump-tables-when-retpolines-are-enabled.patch
diff --git a/queue-5.0/x86-retpolines-disable-switch-jump-tables-when-retpolines-are-enabled.patch b/queue-5.0/x86-retpolines-disable-switch-jump-tables-when-retpolines-are-enabled.patch
new file mode 100644
index 00000000000..6657d2e7296
--- /dev/null
+++ b/queue-5.0/x86-retpolines-disable-switch-jump-tables-when-retpolines-are-enabled.patch
@@ -0,0 +1,70 @@
+From a9d57ef15cbe327fe54416dd194ee0ea66ae53a4 Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Mon, 25 Mar 2019 14:56:20 +0100
+Subject: x86/retpolines: Disable switch jump tables when retpolines are enabled
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+commit a9d57ef15cbe327fe54416dd194ee0ea66ae53a4 upstream.
+
+Commit ce02ef06fcf7 ("x86, retpolines: Raise limit for generating indirect
+calls from switch-case") raised the limit under retpolines to 20 switch
+cases where gcc would only then start to emit jump tables, and therefore
+effectively disabling the emission of slow indirect calls in this area.
+
+After this has been brought to attention to gcc folks [0], Martin Liska
+has then fixed gcc to align with clang by avoiding to generate switch jump
+tables entirely under retpolines. This is taking effect in gcc starting
+from stable version 8.4.0. Given kernel supports compilation with older
+versions of gcc where the fix is not being available or backported anymore,
+we need to keep the extra KBUILD_CFLAGS around for some time and generally
+set the -fno-jump-tables to align with what more recent gcc is doing
+automatically today.
+
+More than 20 switch cases are not expected to be fast-path critical, but
+it would still be good to align with gcc behavior for versions < 8.4.0 in
+order to have consistency across supported gcc versions. vmlinux size is
+slightly growing by 0.27% for older gcc. This flag is only set to work
+around affected gcc, no change for clang.
+
+  [0] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86952
+
+Suggested-by: Martin Liska <mliska@suse.cz>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: David Woodhouse <dwmw2@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Jesper Dangaard Brouer <brouer@redhat.com>
+Cc: BjÃ¶rn TÃ¶pel<bjorn.topel@intel.com>
+Cc: Magnus Karlsson <magnus.karlsson@intel.com>
+Cc: Alexei Starovoitov <ast@kernel.org>
+Cc: H.J. Lu <hjl.tools@gmail.com>
+Cc: Alexei Starovoitov <ast@kernel.org>
+Cc: David S. Miller <davem@davemloft.net>
+Link: https://lkml.kernel.org/r/20190325135620.14882-1-daniel@iogearbox.net
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/Makefile |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/Makefile
++++ b/arch/x86/Makefile
+@@ -220,8 +220,12 @@ ifdef CONFIG_RETPOLINE
+   # Additionally, avoid generating expensive indirect jumps which
+   # are subject to retpolines for small number of switch cases.
+   # clang turns off jump table generation by default when under
+-  # retpoline builds, however, gcc does not for x86.
+-  KBUILD_CFLAGS += $(call cc-option,--param=case-values-threshold=20)
++  # retpoline builds, however, gcc does not for x86. This has
++  # only been fixed starting from gcc stable version 8.4.0 and
++  # onwards, but not for older ones. See gcc bug #86952.
++  ifndef CONFIG_CC_IS_CLANG
++    KBUILD_CFLAGS += $(call cc-option,-fno-jump-tables)
++  endif
+ endif
+ 
+ archscripts: scripts_basic
diff --git a/queue-5.0/x86-retpolines-raise-limit-for-generating-indirect-calls-from-switch-case.patch b/queue-5.0/x86-retpolines-raise-limit-for-generating-indirect-calls-from-switch-case.patch
new file mode 100644
index 00000000000..73c21f1af65
--- /dev/null
+++ b/queue-5.0/x86-retpolines-raise-limit-for-generating-indirect-calls-from-switch-case.patch
@@ -0,0 +1,175 @@
+From ce02ef06fcf7a399a6276adb83f37373d10cbbe1 Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Thu, 21 Feb 2019 23:19:41 +0100
+Subject: x86, retpolines: Raise limit for generating indirect calls from switch-case
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+commit ce02ef06fcf7a399a6276adb83f37373d10cbbe1 upstream.
+
+From networking side, there are numerous attempts to get rid of indirect
+calls in fast-path wherever feasible in order to avoid the cost of
+retpolines, for example, just to name a few:
+
+  * 283c16a2dfd3 ("indirect call wrappers: helpers to speed-up indirect calls of builtin")
+  * aaa5d90b395a ("net: use indirect call wrappers at GRO network layer")
+  * 028e0a476684 ("net: use indirect call wrappers at GRO transport layer")
+  * 356da6d0cde3 ("dma-mapping: bypass indirect calls for dma-direct")
+  * 09772d92cd5a ("bpf: avoid retpoline for lookup/update/delete calls on maps")
+  * 10870dd89e95 ("netfilter: nf_tables: add direct calls for all builtin expressions")
+  [...]
+
+Recent work on XDP from BjÃ¶rn and Magnus additionally found that manually
+transforming the XDP return code switch statement with more than 5 cases
+into if-else combination would result in a considerable speedup in XDP
+layer due to avoidance of indirect calls in CONFIG_RETPOLINE enabled
+builds. On i40e driver with XDP prog attached, a 20-26% speedup has been
+observed [0]. Aside from XDP, there are many other places later in the
+networking stack's critical path with similar switch-case
+processing. Rather than fixing every XDP-enabled driver and locations in
+stack by hand, it would be good to instead raise the limit where gcc would
+emit expensive indirect calls from the switch under retpolines and stick
+with the default as-is in case of !retpoline configured kernels. This would
+also have the advantage that for archs where this is not necessary, we let
+compiler select the underlying target optimization for these constructs and
+avoid potential slow-downs by if-else hand-rewrite.
+
+In case of gcc, this setting is controlled by case-values-threshold which
+has an architecture global default that selects 4 or 5 (latter if target
+does not have a case insn that compares the bounds) where some arch back
+ends like arm64 or s390 override it with their own target hooks, for
+example, in gcc commit db7a90aa0de5 ("S/390: Disable prediction of indirect
+branches") the threshold pretty much disables jump tables by limit of 20
+under retpoline builds.  Comparing gcc's and clang's default code
+generation on x86-64 under O2 level with retpoline build results in the
+following outcome for 5 switch cases:
+
+* gcc with -mindirect-branch=thunk-inline -mindirect-branch-register:
+
+  # gdb -batch -ex 'disassemble dispatch' ./c-switch
+  Dump of assembler code for function dispatch:
+   0x0000000000400be0 <+0>:     cmp    $0x4,%edi
+   0x0000000000400be3 <+3>:     ja     0x400c35 <dispatch+85>
+   0x0000000000400be5 <+5>:     lea    0x915f8(%rip),%rdx        # 0x4921e4
+   0x0000000000400bec <+12>:    mov    %edi,%edi
+   0x0000000000400bee <+14>:    movslq (%rdx,%rdi,4),%rax
+   0x0000000000400bf2 <+18>:    add    %rdx,%rax
+   0x0000000000400bf5 <+21>:    callq  0x400c01 <dispatch+33>
+   0x0000000000400bfa <+26>:    pause
+   0x0000000000400bfc <+28>:    lfence
+   0x0000000000400bff <+31>:    jmp    0x400bfa <dispatch+26>
+   0x0000000000400c01 <+33>:    mov    %rax,(%rsp)
+   0x0000000000400c05 <+37>:    retq
+   0x0000000000400c06 <+38>:    nopw   %cs:0x0(%rax,%rax,1)
+   0x0000000000400c10 <+48>:    jmpq   0x400c90 <fn_3>
+   0x0000000000400c15 <+53>:    nopl   (%rax)
+   0x0000000000400c18 <+56>:    jmpq   0x400c70 <fn_2>
+   0x0000000000400c1d <+61>:    nopl   (%rax)
+   0x0000000000400c20 <+64>:    jmpq   0x400c50 <fn_1>
+   0x0000000000400c25 <+69>:    nopl   (%rax)
+   0x0000000000400c28 <+72>:    jmpq   0x400c40 <fn_0>
+   0x0000000000400c2d <+77>:    nopl   (%rax)
+   0x0000000000400c30 <+80>:    jmpq   0x400cb0 <fn_4>
+   0x0000000000400c35 <+85>:    push   %rax
+   0x0000000000400c36 <+86>:    callq  0x40dd80 <abort>
+  End of assembler dump.
+
+* clang with -mretpoline emitting search tree:
+
+  # gdb -batch -ex 'disassemble dispatch' ./c-switch
+  Dump of assembler code for function dispatch:
+   0x0000000000400b30 <+0>:     cmp    $0x1,%edi
+   0x0000000000400b33 <+3>:     jle    0x400b44 <dispatch+20>
+   0x0000000000400b35 <+5>:     cmp    $0x2,%edi
+   0x0000000000400b38 <+8>:     je     0x400b4d <dispatch+29>
+   0x0000000000400b3a <+10>:    cmp    $0x3,%edi
+   0x0000000000400b3d <+13>:    jne    0x400b52 <dispatch+34>
+   0x0000000000400b3f <+15>:    jmpq   0x400c50 <fn_3>
+   0x0000000000400b44 <+20>:    test   %edi,%edi
+   0x0000000000400b46 <+22>:    jne    0x400b5c <dispatch+44>
+   0x0000000000400b48 <+24>:    jmpq   0x400c20 <fn_0>
+   0x0000000000400b4d <+29>:    jmpq   0x400c40 <fn_2>
+   0x0000000000400b52 <+34>:    cmp    $0x4,%edi
+   0x0000000000400b55 <+37>:    jne    0x400b66 <dispatch+54>
+   0x0000000000400b57 <+39>:    jmpq   0x400c60 <fn_4>
+   0x0000000000400b5c <+44>:    cmp    $0x1,%edi
+   0x0000000000400b5f <+47>:    jne    0x400b66 <dispatch+54>
+   0x0000000000400b61 <+49>:    jmpq   0x400c30 <fn_1>
+   0x0000000000400b66 <+54>:    push   %rax
+   0x0000000000400b67 <+55>:    callq  0x40dd20 <abort>
+  End of assembler dump.
+
+  For sake of comparison, clang without -mretpoline:
+
+  # gdb -batch -ex 'disassemble dispatch' ./c-switch
+  Dump of assembler code for function dispatch:
+   0x0000000000400b30 <+0>:	cmp    $0x4,%edi
+   0x0000000000400b33 <+3>:	ja     0x400b57 <dispatch+39>
+   0x0000000000400b35 <+5>:	mov    %edi,%eax
+   0x0000000000400b37 <+7>:	jmpq   *0x492148(,%rax,8)
+   0x0000000000400b3e <+14>:	jmpq   0x400bf0 <fn_0>
+   0x0000000000400b43 <+19>:	jmpq   0x400c30 <fn_4>
+   0x0000000000400b48 <+24>:	jmpq   0x400c10 <fn_2>
+   0x0000000000400b4d <+29>:	jmpq   0x400c20 <fn_3>
+   0x0000000000400b52 <+34>:	jmpq   0x400c00 <fn_1>
+   0x0000000000400b57 <+39>:	push   %rax
+   0x0000000000400b58 <+40>:	callq  0x40dcf0 <abort>
+  End of assembler dump.
+
+Raising the cases to a high number (e.g. 100) will still result in similar
+code generation pattern with clang and gcc as above, in other words clang
+generally turns off jump table emission by having an extra expansion pass
+under retpoline build to turn indirectbr instructions from their IR into
+switch instructions as a built-in -mno-jump-table lowering of a switch (in
+this case, even if IR input already contained an indirect branch).
+
+For gcc, adding --param=case-values-threshold=20 as in similar fashion as
+s390 in order to raise the limit for x86 retpoline enabled builds results
+in a small vmlinux size increase of only 0.13% (before=18,027,528
+after=18,051,192). For clang this option is ignored due to i) not being
+needed as mentioned and ii) not having above cmdline
+parameter. Non-retpoline-enabled builds with gcc continue to use the
+default case-values-threshold setting, so nothing changes here.
+
+[0] https://lore.kernel.org/netdev/20190129095754.9390-1-bjorn.topel@gmail.com/
+    and "The Path to DPDK Speeds for AF_XDP", LPC 2018, networking track:
+  - http://vger.kernel.org/lpc_net2018_talks/lpc18_pres_af_xdp_perf-v3.pdf
+  - http://vger.kernel.org/lpc_net2018_talks/lpc18_paper_af_xdp_perf-v2.pdf
+
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
+Acked-by: BjÃ¶rn TÃ¶pel <bjorn.topel@intel.com>
+Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: netdev@vger.kernel.org
+Cc: David S. Miller <davem@davemloft.net>
+Cc: Magnus Karlsson <magnus.karlsson@intel.com>
+Cc: Alexei Starovoitov <ast@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: David Woodhouse <dwmw2@infradead.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Link: https://lkml.kernel.org/r/20190221221941.29358-1-daniel@iogearbox.net
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/Makefile |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/arch/x86/Makefile
++++ b/arch/x86/Makefile
+@@ -217,6 +217,11 @@ KBUILD_CFLAGS += -fno-asynchronous-unwin
+ # Avoid indirect branches in kernel to deal with Spectre
+ ifdef CONFIG_RETPOLINE
+   KBUILD_CFLAGS += $(RETPOLINE_CFLAGS)
++  # Additionally, avoid generating expensive indirect jumps which
++  # are subject to retpolines for small number of switch cases.
++  # clang turns off jump table generation by default when under
++  # retpoline builds, however, gcc does not for x86.
++  KBUILD_CFLAGS += $(call cc-option,--param=case-values-threshold=20)
+ endif
+ 
+ archscripts: scripts_basic