]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
patches for 4.14
authorSasha Levin <sashal@kernel.org>
Wed, 19 Dec 2018 00:46:56 +0000 (19:46 -0500)
committerSasha Levin <sashal@kernel.org>
Wed, 19 Dec 2018 00:46:56 +0000 (19:46 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
17 files changed:
queue-4.14/elevator-lookup-mq-vs-non-mq-elevators.patch [new file with mode: 0644]
queue-4.14/ib-hfi1-remove-race-conditions-in-user_sdma-send-pat.patch [new file with mode: 0644]
queue-4.14/locking-qspinlock-bound-spinning-on-pending-locked-t.patch [new file with mode: 0644]
queue-4.14/locking-qspinlock-ensure-node-is-initialised-before-.patch [new file with mode: 0644]
queue-4.14/locking-qspinlock-fix-build-for-anonymous-union-in-o.patch [new file with mode: 0644]
queue-4.14/locking-qspinlock-kill-cmpxchg-loop-when-claiming-lo.patch [new file with mode: 0644]
queue-4.14/locking-qspinlock-merge-struct-__qspinlock-into-stru.patch [new file with mode: 0644]
queue-4.14/locking-qspinlock-re-order-code.patch [new file with mode: 0644]
queue-4.14/locking-qspinlock-remove-duplicate-clear_pending-fun.patch [new file with mode: 0644]
queue-4.14/locking-qspinlock-remove-unbounded-cmpxchg-loop-from.patch [new file with mode: 0644]
queue-4.14/locking-qspinlock-x86-increase-_q_pending_loops-uppe.patch [new file with mode: 0644]
queue-4.14/locking-qspinlock-x86-provide-liveness-guarantee.patch [new file with mode: 0644]
queue-4.14/locking-remove-smp_read_barrier_depends-from-queued_.patch [new file with mode: 0644]
queue-4.14/mac80211-don-t-warn-on-bad-wmm-parameters-from-buggy.patch [new file with mode: 0644]
queue-4.14/mac80211-fix-condition-validating-wmm-ie.patch [new file with mode: 0644]
queue-4.14/netfilter-ipset-fix-wraparound-in-hash-net-types.patch [new file with mode: 0644]
queue-4.14/series

diff --git a/queue-4.14/elevator-lookup-mq-vs-non-mq-elevators.patch b/queue-4.14/elevator-lookup-mq-vs-non-mq-elevators.patch
new file mode 100644 (file)
index 0000000..643247b
--- /dev/null
@@ -0,0 +1,166 @@
+From 9b5f9b911afce2e70ae76ebaaa41dcadc381dc14 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 25 Oct 2017 12:33:42 -0600
+Subject: elevator: lookup mq vs non-mq elevators
+
+[ Upstream commit 2527d99789e248576ac8081530cd4fd88730f8c7 ]
+
+If an IO scheduler is selected via elevator= and it doesn't match
+the driver in question wrt blk-mq support, then we fail to boot.
+
+The elevator= parameter is deprecated and only supported for
+non-mq devices. Augment the elevator lookup API so that we
+pass in if we're looking for an mq capable scheduler or not,
+so that we only ever return a valid type for the queue in
+question.
+
+Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=196695
+Reviewed-by: Omar Sandoval <osandov@fb.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/elevator.c | 44 +++++++++++++++++++++-----------------------
+ 1 file changed, 21 insertions(+), 23 deletions(-)
+
+diff --git a/block/elevator.c b/block/elevator.c
+index 153926a90901..8320d97240be 100644
+--- a/block/elevator.c
++++ b/block/elevator.c
+@@ -83,12 +83,15 @@ bool elv_bio_merge_ok(struct request *rq, struct bio *bio)
+ }
+ EXPORT_SYMBOL(elv_bio_merge_ok);
+-static struct elevator_type *elevator_find(const char *name)
++/*
++ * Return scheduler with name 'name' and with matching 'mq capability
++ */
++static struct elevator_type *elevator_find(const char *name, bool mq)
+ {
+       struct elevator_type *e;
+       list_for_each_entry(e, &elv_list, list) {
+-              if (!strcmp(e->elevator_name, name))
++              if (!strcmp(e->elevator_name, name) && (mq == e->uses_mq))
+                       return e;
+       }
+@@ -100,25 +103,25 @@ static void elevator_put(struct elevator_type *e)
+       module_put(e->elevator_owner);
+ }
+-static struct elevator_type *elevator_get(const char *name, bool try_loading)
++static struct elevator_type *elevator_get(struct request_queue *q,
++                                        const char *name, bool try_loading)
+ {
+       struct elevator_type *e;
+       spin_lock(&elv_list_lock);
+-      e = elevator_find(name);
++      e = elevator_find(name, q->mq_ops != NULL);
+       if (!e && try_loading) {
+               spin_unlock(&elv_list_lock);
+               request_module("%s-iosched", name);
+               spin_lock(&elv_list_lock);
+-              e = elevator_find(name);
++              e = elevator_find(name, q->mq_ops != NULL);
+       }
+       if (e && !try_module_get(e->elevator_owner))
+               e = NULL;
+       spin_unlock(&elv_list_lock);
+-
+       return e;
+ }
+@@ -144,8 +147,12 @@ void __init load_default_elevator_module(void)
+       if (!chosen_elevator[0])
+               return;
++      /*
++       * Boot parameter is deprecated, we haven't supported that for MQ.
++       * Only look for non-mq schedulers from here.
++       */
+       spin_lock(&elv_list_lock);
+-      e = elevator_find(chosen_elevator);
++      e = elevator_find(chosen_elevator, false);
+       spin_unlock(&elv_list_lock);
+       if (!e)
+@@ -202,7 +209,7 @@ int elevator_init(struct request_queue *q, char *name)
+       q->boundary_rq = NULL;
+       if (name) {
+-              e = elevator_get(name, true);
++              e = elevator_get(q, name, true);
+               if (!e)
+                       return -EINVAL;
+       }
+@@ -214,7 +221,7 @@ int elevator_init(struct request_queue *q, char *name)
+        * allowed from async.
+        */
+       if (!e && !q->mq_ops && *chosen_elevator) {
+-              e = elevator_get(chosen_elevator, false);
++              e = elevator_get(q, chosen_elevator, false);
+               if (!e)
+                       printk(KERN_ERR "I/O scheduler %s not found\n",
+                                                       chosen_elevator);
+@@ -229,17 +236,17 @@ int elevator_init(struct request_queue *q, char *name)
+                */
+               if (q->mq_ops) {
+                       if (q->nr_hw_queues == 1)
+-                              e = elevator_get("mq-deadline", false);
++                              e = elevator_get(q, "mq-deadline", false);
+                       if (!e)
+                               return 0;
+               } else
+-                      e = elevator_get(CONFIG_DEFAULT_IOSCHED, false);
++                      e = elevator_get(q, CONFIG_DEFAULT_IOSCHED, false);
+               if (!e) {
+                       printk(KERN_ERR
+                               "Default I/O scheduler not found. " \
+                               "Using noop.\n");
+-                      e = elevator_get("noop", false);
++                      e = elevator_get(q, "noop", false);
+               }
+       }
+@@ -905,7 +912,7 @@ int elv_register(struct elevator_type *e)
+       /* register, don't allow duplicate names */
+       spin_lock(&elv_list_lock);
+-      if (elevator_find(e->elevator_name)) {
++      if (elevator_find(e->elevator_name, e->uses_mq)) {
+               spin_unlock(&elv_list_lock);
+               if (e->icq_cache)
+                       kmem_cache_destroy(e->icq_cache);
+@@ -1066,7 +1073,7 @@ static int __elevator_change(struct request_queue *q, const char *name)
+               return elevator_switch(q, NULL);
+       strlcpy(elevator_name, name, sizeof(elevator_name));
+-      e = elevator_get(strstrip(elevator_name), true);
++      e = elevator_get(q, strstrip(elevator_name), true);
+       if (!e)
+               return -EINVAL;
+@@ -1076,15 +1083,6 @@ static int __elevator_change(struct request_queue *q, const char *name)
+               return 0;
+       }
+-      if (!e->uses_mq && q->mq_ops) {
+-              elevator_put(e);
+-              return -EINVAL;
+-      }
+-      if (e->uses_mq && !q->mq_ops) {
+-              elevator_put(e);
+-              return -EINVAL;
+-      }
+-
+       return elevator_switch(q, e);
+ }
+-- 
+2.19.1
+
diff --git a/queue-4.14/ib-hfi1-remove-race-conditions-in-user_sdma-send-pat.patch b/queue-4.14/ib-hfi1-remove-race-conditions-in-user_sdma-send-pat.patch
new file mode 100644 (file)
index 0000000..ae6107e
--- /dev/null
@@ -0,0 +1,135 @@
+From 3f7a1ca75bfdd64c4904d128e06f3eefce52130e Mon Sep 17 00:00:00 2001
+From: "Michael J. Ruhl" <michael.j.ruhl@intel.com>
+Date: Tue, 18 Dec 2018 16:04:18 -0500
+Subject: IB/hfi1: Remove race conditions in user_sdma send path
+
+commit 28a9a9e83ceae2cee25b9af9ad20d53aaa9ab951 upstream
+
+Packet queue state is over used to determine SDMA descriptor
+availablitity and packet queue request state.
+
+cpu 0  ret = user_sdma_send_pkts(req, pcount);
+cpu 0  if (atomic_read(&pq->n_reqs))
+cpu 1  IRQ user_sdma_txreq_cb calls pq_update() (state to _INACTIVE)
+cpu 0        xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
+
+At this point pq->n_reqs == 0 and pq->state is incorrectly
+SDMA_PKT_Q_ACTIVE.  The close path will hang waiting for the state
+to return to _INACTIVE.
+
+This can also change the state from _DEFERRED to _ACTIVE.  However,
+this is a mostly benign race.
+
+Remove the racy code path.
+
+Use n_reqs to determine if a packet queue is active or not.
+
+Cc: <stable@vger.kernel.org> # 4.14.0>
+Reviewed-by: Mitko Haralanov <mitko.haralanov@intel.com>
+Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
+Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/infiniband/hw/hfi1/user_sdma.c | 24 ++++++++++--------------
+ drivers/infiniband/hw/hfi1/user_sdma.h |  9 +++++----
+ 2 files changed, 15 insertions(+), 18 deletions(-)
+
+diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
+index c14ec04f2a89..cbe5ab26d95b 100644
+--- a/drivers/infiniband/hw/hfi1/user_sdma.c
++++ b/drivers/infiniband/hw/hfi1/user_sdma.c
+@@ -187,7 +187,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
+       pq->ctxt = uctxt->ctxt;
+       pq->subctxt = fd->subctxt;
+       pq->n_max_reqs = hfi1_sdma_comp_ring_size;
+-      pq->state = SDMA_PKT_Q_INACTIVE;
+       atomic_set(&pq->n_reqs, 0);
+       init_waitqueue_head(&pq->wait);
+       atomic_set(&pq->n_locked, 0);
+@@ -276,7 +275,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
+               /* Wait until all requests have been freed. */
+               wait_event_interruptible(
+                       pq->wait,
+-                      (ACCESS_ONCE(pq->state) == SDMA_PKT_Q_INACTIVE));
++                      !atomic_read(&pq->n_reqs));
+               kfree(pq->reqs);
+               kfree(pq->req_in_use);
+               kmem_cache_destroy(pq->txreq_cache);
+@@ -312,6 +311,13 @@ static u8 dlid_to_selector(u16 dlid)
+       return mapping[hash];
+ }
++/**
++ * hfi1_user_sdma_process_request() - Process and start a user sdma request
++ * @fd: valid file descriptor
++ * @iovec: array of io vectors to process
++ * @dim: overall iovec array size
++ * @count: number of io vector array entries processed
++ */
+ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
+                                  struct iovec *iovec, unsigned long dim,
+                                  unsigned long *count)
+@@ -560,20 +566,12 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
+               req->ahg_idx = sdma_ahg_alloc(req->sde);
+       set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
++      pq->state = SDMA_PKT_Q_ACTIVE;
+       /* Send the first N packets in the request to buy us some time */
+       ret = user_sdma_send_pkts(req, pcount);
+       if (unlikely(ret < 0 && ret != -EBUSY))
+               goto free_req;
+-      /*
+-       * It is possible that the SDMA engine would have processed all the
+-       * submitted packets by the time we get here. Therefore, only set
+-       * packet queue state to ACTIVE if there are still uncompleted
+-       * requests.
+-       */
+-      if (atomic_read(&pq->n_reqs))
+-              xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
+-
+       /*
+        * This is a somewhat blocking send implementation.
+        * The driver will block the caller until all packets of the
+@@ -1391,10 +1389,8 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
+ static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq)
+ {
+-      if (atomic_dec_and_test(&pq->n_reqs)) {
+-              xchg(&pq->state, SDMA_PKT_Q_INACTIVE);
++      if (atomic_dec_and_test(&pq->n_reqs))
+               wake_up(&pq->wait);
+-      }
+ }
+ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
+diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h
+index 5af52334b7dc..2b5326d6db53 100644
+--- a/drivers/infiniband/hw/hfi1/user_sdma.h
++++ b/drivers/infiniband/hw/hfi1/user_sdma.h
+@@ -94,9 +94,10 @@
+ #define TXREQ_FLAGS_REQ_ACK   BIT(0)      /* Set the ACK bit in the header */
+ #define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */
+-#define SDMA_PKT_Q_INACTIVE BIT(0)
+-#define SDMA_PKT_Q_ACTIVE   BIT(1)
+-#define SDMA_PKT_Q_DEFERRED BIT(2)
++enum pkt_q_sdma_state {
++      SDMA_PKT_Q_ACTIVE,
++      SDMA_PKT_Q_DEFERRED,
++};
+ /*
+  * Maximum retry attempts to submit a TX request
+@@ -124,7 +125,7 @@ struct hfi1_user_sdma_pkt_q {
+       struct user_sdma_request *reqs;
+       unsigned long *req_in_use;
+       struct iowait busy;
+-      unsigned state;
++      enum pkt_q_sdma_state state;
+       wait_queue_head_t wait;
+       unsigned long unpinned;
+       struct mmu_rb_handler *handler;
+-- 
+2.19.1
+
diff --git a/queue-4.14/locking-qspinlock-bound-spinning-on-pending-locked-t.patch b/queue-4.14/locking-qspinlock-bound-spinning-on-pending-locked-t.patch
new file mode 100644 (file)
index 0000000..3c77033
--- /dev/null
@@ -0,0 +1,89 @@
+From 6108129907517b1d036c1df8cd9787db68c56e80 Mon Sep 17 00:00:00 2001
+From: Will Deacon <will.deacon@arm.com>
+Date: Tue, 18 Dec 2018 18:13:53 +0100
+Subject: locking/qspinlock: Bound spinning on pending->locked transition in
+ slowpath
+
+commit 6512276d97b160d90b53285bd06f7f201459a7e3 upstream.
+
+If a locker taking the qspinlock slowpath reads a lock value indicating
+that only the pending bit is set, then it will spin whilst the
+concurrent pending->locked transition takes effect.
+
+Unfortunately, there is no guarantee that such a transition will ever be
+observed since concurrent lockers could continuously set pending and
+hand over the lock amongst themselves, leading to starvation. Whilst
+this would probably resolve in practice, it means that it is not
+possible to prove liveness properties about the lock and means that lock
+acquisition time is unbounded.
+
+Rather than removing the pending->locked spinning from the slowpath
+altogether (which has been shown to heavily penalise a 2-threaded
+locking stress test on x86), this patch replaces the explicit spinning
+with a call to atomic_cond_read_relaxed and allows the architecture to
+provide a bound on the number of spins. For architectures that can
+respond to changes in cacheline state in their smp_cond_load implementation,
+it should be sufficient to use the default bound of 1.
+
+Suggested-by: Waiman Long <longman@redhat.com>
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Waiman Long <longman@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: boqun.feng@gmail.com
+Cc: linux-arm-kernel@lists.infradead.org
+Cc: paulmck@linux.vnet.ibm.com
+Link: http://lkml.kernel.org/r/1524738868-31318-4-git-send-email-will.deacon@arm.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/locking/qspinlock.c | 20 +++++++++++++++++---
+ 1 file changed, 17 insertions(+), 3 deletions(-)
+
+diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
+index d880296245c5..18161264227a 100644
+--- a/kernel/locking/qspinlock.c
++++ b/kernel/locking/qspinlock.c
+@@ -76,6 +76,18 @@
+ #define MAX_NODES     4
+ #endif
++/*
++ * The pending bit spinning loop count.
++ * This heuristic is used to limit the number of lockword accesses
++ * made by atomic_cond_read_relaxed when waiting for the lock to
++ * transition out of the "== _Q_PENDING_VAL" state. We don't spin
++ * indefinitely because there's no guarantee that we'll make forward
++ * progress.
++ */
++#ifndef _Q_PENDING_LOOPS
++#define _Q_PENDING_LOOPS      1
++#endif
++
+ /*
+  * Per-CPU queue node structures; we can never have more than 4 nested
+  * contexts: task, softirq, hardirq, nmi.
+@@ -306,13 +318,15 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
+               return;
+       /*
+-       * wait for in-progress pending->locked hand-overs
++       * Wait for in-progress pending->locked hand-overs with a bounded
++       * number of spins so that we guarantee forward progress.
+        *
+        * 0,1,0 -> 0,0,1
+        */
+       if (val == _Q_PENDING_VAL) {
+-              while ((val = atomic_read(&lock->val)) == _Q_PENDING_VAL)
+-                      cpu_relax();
++              int cnt = _Q_PENDING_LOOPS;
++              val = smp_cond_load_acquire(&lock->val.counter,
++                                             (VAL != _Q_PENDING_VAL) || !cnt--);
+       }
+       /*
+-- 
+2.19.1
+
diff --git a/queue-4.14/locking-qspinlock-ensure-node-is-initialised-before-.patch b/queue-4.14/locking-qspinlock-ensure-node-is-initialised-before-.patch
new file mode 100644 (file)
index 0000000..1921e8b
--- /dev/null
@@ -0,0 +1,87 @@
+From 0ef35cbbeda4f2800c50baa0e5f757f0fb20295f Mon Sep 17 00:00:00 2001
+From: Will Deacon <will.deacon@arm.com>
+Date: Tue, 18 Dec 2018 18:13:52 +0100
+Subject: locking/qspinlock: Ensure node is initialised before updating
+ prev->next
+
+commit 95bcade33a8af38755c9b0636e36a36ad3789fe6 upstream.
+
+When a locker ends up queuing on the qspinlock locking slowpath, we
+initialise the relevant mcs node and publish it indirectly by updating
+the tail portion of the lock word using xchg_tail. If we find that there
+was a pre-existing locker in the queue, we subsequently update their
+->next field to point at our node so that we are notified when it's our
+turn to take the lock.
+
+This can be roughly illustrated as follows:
+
+  /* Initialise the fields in node and encode a pointer to node in tail */
+  tail = initialise_node(node);
+
+  /*
+   * Exchange tail into the lockword using an atomic read-modify-write
+   * operation with release semantics
+   */
+  old = xchg_tail(lock, tail);
+
+  /* If there was a pre-existing waiter ... */
+  if (old & _Q_TAIL_MASK) {
+       prev = decode_tail(old);
+       smp_read_barrier_depends();
+
+       /* ... then update their ->next field to point to node.
+       WRITE_ONCE(prev->next, node);
+  }
+
+The conditional update of prev->next therefore relies on the address
+dependency from the result of xchg_tail ensuring order against the
+prior initialisation of node. However, since the release semantics of
+the xchg_tail operation apply only to the write portion of the RmW,
+then this ordering is not guaranteed and it is possible for the CPU
+to return old before the writes to node have been published, consequently
+allowing us to point prev->next to an uninitialised node.
+
+This patch fixes the problem by making the update of prev->next a RELEASE
+operation, which also removes the reliance on dependency ordering.
+
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/1518528177-19169-2-git-send-email-will.deacon@arm.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/locking/qspinlock.c | 13 +++++++------
+ 1 file changed, 7 insertions(+), 6 deletions(-)
+
+diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
+index 5541acb79e15..d880296245c5 100644
+--- a/kernel/locking/qspinlock.c
++++ b/kernel/locking/qspinlock.c
+@@ -416,14 +416,15 @@ queue:
+        */
+       if (old & _Q_TAIL_MASK) {
+               prev = decode_tail(old);
++
+               /*
+-               * The above xchg_tail() is also a load of @lock which
+-               * generates, through decode_tail(), a pointer.  The address
+-               * dependency matches the RELEASE of xchg_tail() such that
+-               * the subsequent access to @prev happens after.
++               * We must ensure that the stores to @node are observed before
++               * the write to prev->next. The address dependency from
++               * xchg_tail is not sufficient to ensure this because the read
++               * component of xchg_tail is unordered with respect to the
++               * initialisation of @node.
+                */
+-
+-              WRITE_ONCE(prev->next, node);
++              smp_store_release(&prev->next, node);
+               pv_wait_node(node, prev);
+               arch_mcs_spin_lock_contended(&node->locked);
+-- 
+2.19.1
+
diff --git a/queue-4.14/locking-qspinlock-fix-build-for-anonymous-union-in-o.patch b/queue-4.14/locking-qspinlock-fix-build-for-anonymous-union-in-o.patch
new file mode 100644 (file)
index 0000000..ee77b49
--- /dev/null
@@ -0,0 +1,58 @@
+From cbcbd11c96c4ff7299e1c24ed518b6924211aa2b Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
+Date: Thu, 21 Jun 2018 20:35:26 -0400
+Subject: locking/qspinlock: Fix build for anonymous union in older GCC
+ compilers
+
+[ Upstream commit 6cc65be4f6f2a7186af8f3e09900787c7912dad2 ]
+
+One of my tests compiles the kernel with gcc 4.5.3, and I hit the
+following build error:
+
+  include/linux/semaphore.h: In function 'sema_init':
+  include/linux/semaphore.h:35:17: error: unknown field 'val' specified in initializer
+  include/linux/semaphore.h:35:17: warning: missing braces around initializer
+  include/linux/semaphore.h:35:17: warning: (near initialization for '(anonymous).raw_lock.<anonymous>.val')
+
+I bisected it down to:
+
+ 625e88be1f41 ("locking/qspinlock: Merge 'struct __qspinlock' into 'struct qspinlock'")
+
+... which makes qspinlock have an anonymous union, which makes initializing it special
+for older compilers. By adding strategic brackets, it makes the build
+happy again.
+
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Acked-by: Waiman Long <longman@redhat.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Boqun Feng <boqun.feng@gmail.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: linux-arm-kernel@lists.infradead.org
+Fixes: 625e88be1f41 ("locking/qspinlock: Merge 'struct __qspinlock' into 'struct qspinlock'")
+Link: http://lkml.kernel.org/r/20180621203526.172ab5c4@vmware.local.home
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/asm-generic/qspinlock_types.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/include/asm-generic/qspinlock_types.h b/include/asm-generic/qspinlock_types.h
+index 0763f065b975..d10f1e7d6ba8 100644
+--- a/include/asm-generic/qspinlock_types.h
++++ b/include/asm-generic/qspinlock_types.h
+@@ -63,7 +63,7 @@ typedef struct qspinlock {
+ /*
+  * Initializier
+  */
+-#define       __ARCH_SPIN_LOCK_UNLOCKED       { .val = ATOMIC_INIT(0) }
++#define       __ARCH_SPIN_LOCK_UNLOCKED       { { .val = ATOMIC_INIT(0) } }
+ /*
+  * Bitfields in the atomic value:
+-- 
+2.19.1
+
diff --git a/queue-4.14/locking-qspinlock-kill-cmpxchg-loop-when-claiming-lo.patch b/queue-4.14/locking-qspinlock-kill-cmpxchg-loop-when-claiming-lo.patch
new file mode 100644 (file)
index 0000000..6e7e52e
--- /dev/null
@@ -0,0 +1,75 @@
+From f6b1a26cb6bd024eeca6070dca00117a194be2c5 Mon Sep 17 00:00:00 2001
+From: Will Deacon <will.deacon@arm.com>
+Date: Tue, 18 Dec 2018 18:13:57 +0100
+Subject: locking/qspinlock: Kill cmpxchg() loop when claiming lock from head
+ of queue
+
+commit c61da58d8a9ba9238250a548f00826eaf44af0f7 upstream.
+
+When a queued locker reaches the head of the queue, it claims the lock
+by setting _Q_LOCKED_VAL in the lockword. If there isn't contention, it
+must also clear the tail as part of this operation so that subsequent
+lockers can avoid taking the slowpath altogether.
+
+Currently this is expressed as a cmpxchg() loop that practically only
+runs up to two iterations. This is confusing to the reader and unhelpful
+to the compiler. Rewrite the cmpxchg() loop without the loop, so that a
+failed cmpxchg() implies that there is contention and we just need to
+write to _Q_LOCKED_VAL without considering the rest of the lockword.
+
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Waiman Long <longman@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: boqun.feng@gmail.com
+Cc: linux-arm-kernel@lists.infradead.org
+Cc: paulmck@linux.vnet.ibm.com
+Link: http://lkml.kernel.org/r/1524738868-31318-7-git-send-email-will.deacon@arm.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/locking/qspinlock.c | 19 ++++++++-----------
+ 1 file changed, 8 insertions(+), 11 deletions(-)
+
+diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
+index 7bd053e528c2..841550dfb7b8 100644
+--- a/kernel/locking/qspinlock.c
++++ b/kernel/locking/qspinlock.c
+@@ -465,24 +465,21 @@ locked:
+        * and nobody is pending, clear the tail code and grab the lock.
+        * Otherwise, we only need to grab the lock.
+        */
+-      for (;;) {
+-              /* In the PV case we might already have _Q_LOCKED_VAL set */
+-              if ((val & _Q_TAIL_MASK) != tail || (val & _Q_PENDING_MASK)) {
+-                      set_locked(lock);
+-                      break;
+-              }
++
++      /* In the PV case we might already have _Q_LOCKED_VAL set */
++      if ((val & _Q_TAIL_MASK) == tail) {
+               /*
+                * The smp_cond_load_acquire() call above has provided the
+-               * necessary acquire semantics required for locking. At most
+-               * two iterations of this loop may be ran.
++               * necessary acquire semantics required for locking.
+                */
+               old = atomic_cmpxchg_relaxed(&lock->val, val, _Q_LOCKED_VAL);
+               if (old == val)
+-                      goto release;   /* No contention */
+-
+-              val = old;
++                      goto release; /* No contention */
+       }
++      /* Either somebody is queued behind us or _Q_PENDING_VAL is set */
++      set_locked(lock);
++
+       /*
+        * contended path; wait for next if not observed yet, release.
+        */
+-- 
+2.19.1
+
diff --git a/queue-4.14/locking-qspinlock-merge-struct-__qspinlock-into-stru.patch b/queue-4.14/locking-qspinlock-merge-struct-__qspinlock-into-stru.patch
new file mode 100644 (file)
index 0000000..8f3a1e1
--- /dev/null
@@ -0,0 +1,327 @@
+From e243c3c5a17a35c95a640bbebaf611d1f51a224f Mon Sep 17 00:00:00 2001
+From: Will Deacon <will.deacon@arm.com>
+Date: Tue, 18 Dec 2018 18:13:54 +0100
+Subject: locking/qspinlock: Merge 'struct __qspinlock' into 'struct qspinlock'
+
+commit 625e88be1f41b53cec55827c984e4a89ea8ee9f9 upstream.
+
+'struct __qspinlock' provides a handy union of fields so that
+subcomponents of the lockword can be accessed by name, without having to
+manage shifts and masks explicitly and take endianness into account.
+
+This is useful in qspinlock.h and also potentially in arch headers, so
+move the 'struct __qspinlock' into 'struct qspinlock' and kill the extra
+definition.
+
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Waiman Long <longman@redhat.com>
+Acked-by: Boqun Feng <boqun.feng@gmail.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-arm-kernel@lists.infradead.org
+Cc: paulmck@linux.vnet.ibm.com
+Link: http://lkml.kernel.org/r/1524738868-31318-3-git-send-email-will.deacon@arm.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/include/asm/qspinlock.h          |  2 +-
+ arch/x86/include/asm/qspinlock_paravirt.h |  3 +-
+ include/asm-generic/qspinlock_types.h     | 32 +++++++++++++++-
+ kernel/locking/qspinlock.c                | 46 ++---------------------
+ kernel/locking/qspinlock_paravirt.h       | 34 ++++++-----------
+ 5 files changed, 46 insertions(+), 71 deletions(-)
+
+diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
+index 9982dd96f093..cf4cdf508ef4 100644
+--- a/arch/x86/include/asm/qspinlock.h
++++ b/arch/x86/include/asm/qspinlock.h
+@@ -15,7 +15,7 @@
+  */
+ static inline void native_queued_spin_unlock(struct qspinlock *lock)
+ {
+-      smp_store_release((u8 *)lock, 0);
++      smp_store_release(&lock->locked, 0);
+ }
+ #ifdef CONFIG_PARAVIRT_SPINLOCKS
+diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h
+index 923307ea11c7..9ef5ee03d2d7 100644
+--- a/arch/x86/include/asm/qspinlock_paravirt.h
++++ b/arch/x86/include/asm/qspinlock_paravirt.h
+@@ -22,8 +22,7 @@ PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath);
+  *
+  * void __pv_queued_spin_unlock(struct qspinlock *lock)
+  * {
+- *    struct __qspinlock *l = (void *)lock;
+- *    u8 lockval = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0);
++ *    u8 lockval = cmpxchg(&lock->locked, _Q_LOCKED_VAL, 0);
+  *
+  *    if (likely(lockval == _Q_LOCKED_VAL))
+  *            return;
+diff --git a/include/asm-generic/qspinlock_types.h b/include/asm-generic/qspinlock_types.h
+index 034acd0c4956..0763f065b975 100644
+--- a/include/asm-generic/qspinlock_types.h
++++ b/include/asm-generic/qspinlock_types.h
+@@ -29,13 +29,41 @@
+ #endif
+ typedef struct qspinlock {
+-      atomic_t        val;
++      union {
++              atomic_t val;
++
++              /*
++               * By using the whole 2nd least significant byte for the
++               * pending bit, we can allow better optimization of the lock
++               * acquisition for the pending bit holder.
++               */
++#ifdef __LITTLE_ENDIAN
++              struct {
++                      u8      locked;
++                      u8      pending;
++              };
++              struct {
++                      u16     locked_pending;
++                      u16     tail;
++              };
++#else
++              struct {
++                      u16     tail;
++                      u16     locked_pending;
++              };
++              struct {
++                      u8      reserved[2];
++                      u8      pending;
++                      u8      locked;
++              };
++#endif
++      };
+ } arch_spinlock_t;
+ /*
+  * Initializier
+  */
+-#define       __ARCH_SPIN_LOCK_UNLOCKED       { ATOMIC_INIT(0) }
++#define       __ARCH_SPIN_LOCK_UNLOCKED       { .val = ATOMIC_INIT(0) }
+ /*
+  * Bitfields in the atomic value:
+diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
+index 18161264227a..e60e618287b4 100644
+--- a/kernel/locking/qspinlock.c
++++ b/kernel/locking/qspinlock.c
+@@ -126,40 +126,6 @@ static inline __pure struct mcs_spinlock *decode_tail(u32 tail)
+ #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
+-/*
+- * By using the whole 2nd least significant byte for the pending bit, we
+- * can allow better optimization of the lock acquisition for the pending
+- * bit holder.
+- *
+- * This internal structure is also used by the set_locked function which
+- * is not restricted to _Q_PENDING_BITS == 8.
+- */
+-struct __qspinlock {
+-      union {
+-              atomic_t val;
+-#ifdef __LITTLE_ENDIAN
+-              struct {
+-                      u8      locked;
+-                      u8      pending;
+-              };
+-              struct {
+-                      u16     locked_pending;
+-                      u16     tail;
+-              };
+-#else
+-              struct {
+-                      u16     tail;
+-                      u16     locked_pending;
+-              };
+-              struct {
+-                      u8      reserved[2];
+-                      u8      pending;
+-                      u8      locked;
+-              };
+-#endif
+-      };
+-};
+-
+ #if _Q_PENDING_BITS == 8
+ /**
+  * clear_pending_set_locked - take ownership and clear the pending bit.
+@@ -171,9 +137,7 @@ struct __qspinlock {
+  */
+ static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
+ {
+-      struct __qspinlock *l = (void *)lock;
+-
+-      WRITE_ONCE(l->locked_pending, _Q_LOCKED_VAL);
++      WRITE_ONCE(lock->locked_pending, _Q_LOCKED_VAL);
+ }
+ /*
+@@ -188,13 +152,11 @@ static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
+  */
+ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
+ {
+-      struct __qspinlock *l = (void *)lock;
+-
+       /*
+        * Use release semantics to make sure that the MCS node is properly
+        * initialized before changing the tail code.
+        */
+-      return (u32)xchg_release(&l->tail,
++      return (u32)xchg_release(&lock->tail,
+                                tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
+ }
+@@ -249,9 +211,7 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
+  */
+ static __always_inline void set_locked(struct qspinlock *lock)
+ {
+-      struct __qspinlock *l = (void *)lock;
+-
+-      WRITE_ONCE(l->locked, _Q_LOCKED_VAL);
++      WRITE_ONCE(lock->locked, _Q_LOCKED_VAL);
+ }
+diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
+index 15b6a39366c6..1435ba7954c3 100644
+--- a/kernel/locking/qspinlock_paravirt.h
++++ b/kernel/locking/qspinlock_paravirt.h
+@@ -70,10 +70,8 @@ struct pv_node {
+ #define queued_spin_trylock(l)        pv_queued_spin_steal_lock(l)
+ static inline bool pv_queued_spin_steal_lock(struct qspinlock *lock)
+ {
+-      struct __qspinlock *l = (void *)lock;
+-
+       if (!(atomic_read(&lock->val) & _Q_LOCKED_PENDING_MASK) &&
+-          (cmpxchg_acquire(&l->locked, 0, _Q_LOCKED_VAL) == 0)) {
++          (cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) == 0)) {
+               qstat_inc(qstat_pv_lock_stealing, true);
+               return true;
+       }
+@@ -88,16 +86,12 @@ static inline bool pv_queued_spin_steal_lock(struct qspinlock *lock)
+ #if _Q_PENDING_BITS == 8
+ static __always_inline void set_pending(struct qspinlock *lock)
+ {
+-      struct __qspinlock *l = (void *)lock;
+-
+-      WRITE_ONCE(l->pending, 1);
++      WRITE_ONCE(lock->pending, 1);
+ }
+ static __always_inline void clear_pending(struct qspinlock *lock)
+ {
+-      struct __qspinlock *l = (void *)lock;
+-
+-      WRITE_ONCE(l->pending, 0);
++      WRITE_ONCE(lock->pending, 0);
+ }
+ /*
+@@ -107,10 +101,8 @@ static __always_inline void clear_pending(struct qspinlock *lock)
+  */
+ static __always_inline int trylock_clear_pending(struct qspinlock *lock)
+ {
+-      struct __qspinlock *l = (void *)lock;
+-
+-      return !READ_ONCE(l->locked) &&
+-             (cmpxchg_acquire(&l->locked_pending, _Q_PENDING_VAL,
++      return !READ_ONCE(lock->locked) &&
++             (cmpxchg_acquire(&lock->locked_pending, _Q_PENDING_VAL,
+                               _Q_LOCKED_VAL) == _Q_PENDING_VAL);
+ }
+ #else /* _Q_PENDING_BITS == 8 */
+@@ -355,7 +347,6 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
+ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
+ {
+       struct pv_node *pn = (struct pv_node *)node;
+-      struct __qspinlock *l = (void *)lock;
+       /*
+        * If the vCPU is indeed halted, advance its state to match that of
+@@ -384,7 +375,7 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
+        * the hash table later on at unlock time, no atomic instruction is
+        * needed.
+        */
+-      WRITE_ONCE(l->locked, _Q_SLOW_VAL);
++      WRITE_ONCE(lock->locked, _Q_SLOW_VAL);
+       (void)pv_hash(lock, pn);
+ }
+@@ -399,7 +390,6 @@ static u32
+ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
+ {
+       struct pv_node *pn = (struct pv_node *)node;
+-      struct __qspinlock *l = (void *)lock;
+       struct qspinlock **lp = NULL;
+       int waitcnt = 0;
+       int loop;
+@@ -450,13 +440,13 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
+                        *
+                        * Matches the smp_rmb() in __pv_queued_spin_unlock().
+                        */
+-                      if (xchg(&l->locked, _Q_SLOW_VAL) == 0) {
++                      if (xchg(&lock->locked, _Q_SLOW_VAL) == 0) {
+                               /*
+                                * The lock was free and now we own the lock.
+                                * Change the lock value back to _Q_LOCKED_VAL
+                                * and unhash the table.
+                                */
+-                              WRITE_ONCE(l->locked, _Q_LOCKED_VAL);
++                              WRITE_ONCE(lock->locked, _Q_LOCKED_VAL);
+                               WRITE_ONCE(*lp, NULL);
+                               goto gotlock;
+                       }
+@@ -464,7 +454,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
+               WRITE_ONCE(pn->state, vcpu_hashed);
+               qstat_inc(qstat_pv_wait_head, true);
+               qstat_inc(qstat_pv_wait_again, waitcnt);
+-              pv_wait(&l->locked, _Q_SLOW_VAL);
++              pv_wait(&lock->locked, _Q_SLOW_VAL);
+               /*
+                * Because of lock stealing, the queue head vCPU may not be
+@@ -489,7 +479,6 @@ gotlock:
+ __visible void
+ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
+ {
+-      struct __qspinlock *l = (void *)lock;
+       struct pv_node *node;
+       if (unlikely(locked != _Q_SLOW_VAL)) {
+@@ -518,7 +507,7 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
+        * Now that we have a reference to the (likely) blocked pv_node,
+        * release the lock.
+        */
+-      smp_store_release(&l->locked, 0);
++      smp_store_release(&lock->locked, 0);
+       /*
+        * At this point the memory pointed at by lock can be freed/reused,
+@@ -544,7 +533,6 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
+ #ifndef __pv_queued_spin_unlock
+ __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
+ {
+-      struct __qspinlock *l = (void *)lock;
+       u8 locked;
+       /*
+@@ -552,7 +540,7 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
+        * unhash. Otherwise it would be possible to have multiple @lock
+        * entries, which would be BAD.
+        */
+-      locked = cmpxchg_release(&l->locked, _Q_LOCKED_VAL, 0);
++      locked = cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0);
+       if (likely(locked == _Q_LOCKED_VAL))
+               return;
+-- 
+2.19.1
+
diff --git a/queue-4.14/locking-qspinlock-re-order-code.patch b/queue-4.14/locking-qspinlock-re-order-code.patch
new file mode 100644 (file)
index 0000000..a67c165
--- /dev/null
@@ -0,0 +1,98 @@
+From 96327141b36d0706e2e758c004fd1a5f4bcc4e53 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 18 Dec 2018 18:13:58 +0100
+Subject: locking/qspinlock: Re-order code
+
+commit 53bf57fab7321fb42b703056a4c80fc9d986d170 upstream.
+
+Flip the branch condition after atomic_fetch_or_acquire(_Q_PENDING_VAL)
+such that we loose the indent. This also result in a more natural code
+flow IMO.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Will Deacon <will.deacon@arm.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: andrea.parri@amarulasolutions.com
+Cc: longman@redhat.com
+Link: https://lkml.kernel.org/r/20181003130257.156322446@infradead.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/locking/qspinlock.c | 54 ++++++++++++++++++--------------------
+ 1 file changed, 26 insertions(+), 28 deletions(-)
+
+diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
+index 841550dfb7b8..9ffc2f9af8b8 100644
+--- a/kernel/locking/qspinlock.c
++++ b/kernel/locking/qspinlock.c
+@@ -324,38 +324,36 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
+        * 0,0,1 -> 0,1,1 ; pending
+        */
+       val = atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val);
+-      if (!(val & ~_Q_LOCKED_MASK)) {
+-              /*
+-               * We're pending, wait for the owner to go away.
+-               *
+-               * *,1,1 -> *,1,0
+-               *
+-               * this wait loop must be a load-acquire such that we match the
+-               * store-release that clears the locked bit and create lock
+-               * sequentiality; this is because not all
+-               * clear_pending_set_locked() implementations imply full
+-               * barriers.
+-               */
+-              if (val & _Q_LOCKED_MASK) {
+-                      smp_cond_load_acquire(&lock->val.counter,
+-                                            !(VAL & _Q_LOCKED_MASK));
+-              }
+-
+-              /*
+-               * take ownership and clear the pending bit.
+-               *
+-               * *,1,0 -> *,0,1
+-               */
+-              clear_pending_set_locked(lock);
+-              return;
++      /*
++       * If we observe any contention; undo and queue.
++       */
++      if (unlikely(val & ~_Q_LOCKED_MASK)) {
++              if (!(val & _Q_PENDING_MASK))
++                      clear_pending(lock);
++              goto queue;
+       }
+       /*
+-       * If pending was clear but there are waiters in the queue, then
+-       * we need to undo our setting of pending before we queue ourselves.
++       * We're pending, wait for the owner to go away.
++       *
++       * 0,1,1 -> 0,1,0
++       *
++       * this wait loop must be a load-acquire such that we match the
++       * store-release that clears the locked bit and create lock
++       * sequentiality; this is because not all
++       * clear_pending_set_locked() implementations imply full
++       * barriers.
++       */
++      if (val & _Q_LOCKED_MASK)
++              smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_MASK));
++
++      /*
++       * take ownership and clear the pending bit.
++       *
++       * 0,1,0 -> 0,0,1
+        */
+-      if (!(val & _Q_PENDING_MASK))
+-              clear_pending(lock);
++      clear_pending_set_locked(lock);
++      return;
+       /*
+        * End of pending bit optimistic spinning and beginning of MCS
+-- 
+2.19.1
+
diff --git a/queue-4.14/locking-qspinlock-remove-duplicate-clear_pending-fun.patch b/queue-4.14/locking-qspinlock-remove-duplicate-clear_pending-fun.patch
new file mode 100644 (file)
index 0000000..7d5eb8c
--- /dev/null
@@ -0,0 +1,48 @@
+From 9c953f6477aed65dcf16c629b154dc9aa53dfd05 Mon Sep 17 00:00:00 2001
+From: Will Deacon <will.deacon@arm.com>
+Date: Tue, 18 Dec 2018 18:13:56 +0100
+Subject: locking/qspinlock: Remove duplicate clear_pending() function from PV
+ code
+
+commit 3bea9adc96842b8a7345c7fb202c16ae9c8d5b25 upstream.
+
+The native clear_pending() function is identical to the PV version, so the
+latter can simply be removed.
+
+This fixes the build for systems with >= 16K CPUs using the PV lock implementation.
+
+Reported-by: Waiman Long <longman@redhat.com>
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: boqun.feng@gmail.com
+Cc: linux-arm-kernel@lists.infradead.org
+Cc: paulmck@linux.vnet.ibm.com
+Link: http://lkml.kernel.org/r/20180427101619.GB21705@arm.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/locking/qspinlock_paravirt.h | 5 -----
+ 1 file changed, 5 deletions(-)
+
+diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
+index 854443f7b60b..1e882dfc8b79 100644
+--- a/kernel/locking/qspinlock_paravirt.h
++++ b/kernel/locking/qspinlock_paravirt.h
+@@ -106,11 +106,6 @@ static __always_inline void set_pending(struct qspinlock *lock)
+       atomic_or(_Q_PENDING_VAL, &lock->val);
+ }
+-static __always_inline void clear_pending(struct qspinlock *lock)
+-{
+-      atomic_andnot(_Q_PENDING_VAL, &lock->val);
+-}
+-
+ static __always_inline int trylock_clear_pending(struct qspinlock *lock)
+ {
+       int val = atomic_read(&lock->val);
+-- 
+2.19.1
+
diff --git a/queue-4.14/locking-qspinlock-remove-unbounded-cmpxchg-loop-from.patch b/queue-4.14/locking-qspinlock-remove-unbounded-cmpxchg-loop-from.patch
new file mode 100644 (file)
index 0000000..fc30343
--- /dev/null
@@ -0,0 +1,233 @@
+From 76883080803df1fce620c3da6bb7ee43011b6b4b Mon Sep 17 00:00:00 2001
+From: Will Deacon <will.deacon@arm.com>
+Date: Tue, 18 Dec 2018 18:13:55 +0100
+Subject: locking/qspinlock: Remove unbounded cmpxchg() loop from locking
+ slowpath
+
+commit 59fb586b4a07b4e1a0ee577140ab4842ba451acd upstream.
+
+The qspinlock locking slowpath utilises a "pending" bit as a simple form
+of an embedded test-and-set lock that can avoid the overhead of explicit
+queuing in cases where the lock is held but uncontended. This bit is
+managed using a cmpxchg() loop which tries to transition the uncontended
+lock word from (0,0,0) -> (0,0,1) or (0,0,1) -> (0,1,1).
+
+Unfortunately, the cmpxchg() loop is unbounded and lockers can be starved
+indefinitely if the lock word is seen to oscillate between unlocked
+(0,0,0) and locked (0,0,1). This could happen if concurrent lockers are
+able to take the lock in the cmpxchg() loop without queuing and pass it
+around amongst themselves.
+
+This patch fixes the problem by unconditionally setting _Q_PENDING_VAL
+using atomic_fetch_or, and then inspecting the old value to see whether
+we need to spin on the current lock owner, or whether we now effectively
+hold the lock. The tricky scenario is when concurrent lockers end up
+queuing on the lock and the lock becomes available, causing us to see
+a lockword of (n,0,0). With pending now set, simply queuing could lead
+to deadlock as the head of the queue may not have observed the pending
+flag being cleared. Conversely, if the head of the queue did observe
+pending being cleared, then it could transition the lock from (n,0,0) ->
+(0,0,1) meaning that any attempt to "undo" our setting of the pending
+bit could race with a concurrent locker trying to set it.
+
+We handle this race by preserving the pending bit when taking the lock
+after reaching the head of the queue and leaving the tail entry intact
+if we saw pending set, because we know that the tail is going to be
+updated shortly.
+
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Waiman Long <longman@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: boqun.feng@gmail.com
+Cc: linux-arm-kernel@lists.infradead.org
+Cc: paulmck@linux.vnet.ibm.com
+Link: http://lkml.kernel.org/r/1524738868-31318-6-git-send-email-will.deacon@arm.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/locking/qspinlock.c          | 102 ++++++++++++++++------------
+ kernel/locking/qspinlock_paravirt.h |   5 --
+ 2 files changed, 58 insertions(+), 49 deletions(-)
+
+diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
+index e60e618287b4..7bd053e528c2 100644
+--- a/kernel/locking/qspinlock.c
++++ b/kernel/locking/qspinlock.c
+@@ -127,6 +127,17 @@ static inline __pure struct mcs_spinlock *decode_tail(u32 tail)
+ #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
+ #if _Q_PENDING_BITS == 8
++/**
++ * clear_pending - clear the pending bit.
++ * @lock: Pointer to queued spinlock structure
++ *
++ * *,1,* -> *,0,*
++ */
++static __always_inline void clear_pending(struct qspinlock *lock)
++{
++      WRITE_ONCE(lock->pending, 0);
++}
++
+ /**
+  * clear_pending_set_locked - take ownership and clear the pending bit.
+  * @lock: Pointer to queued spinlock structure
+@@ -162,6 +173,17 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
+ #else /* _Q_PENDING_BITS == 8 */
++/**
++ * clear_pending - clear the pending bit.
++ * @lock: Pointer to queued spinlock structure
++ *
++ * *,1,* -> *,0,*
++ */
++static __always_inline void clear_pending(struct qspinlock *lock)
++{
++      atomic_andnot(_Q_PENDING_VAL, &lock->val);
++}
++
+ /**
+  * clear_pending_set_locked - take ownership and clear the pending bit.
+  * @lock: Pointer to queued spinlock structure
+@@ -266,7 +288,7 @@ static __always_inline u32  __pv_wait_head_or_lock(struct qspinlock *lock,
+ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
+ {
+       struct mcs_spinlock *prev, *next, *node;
+-      u32 new, old, tail;
++      u32 old, tail;
+       int idx;
+       BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
+@@ -289,59 +311,51 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
+                                              (VAL != _Q_PENDING_VAL) || !cnt--);
+       }
++      /*
++       * If we observe any contention; queue.
++       */
++      if (val & ~_Q_LOCKED_MASK)
++              goto queue;
++
+       /*
+        * trylock || pending
+        *
+        * 0,0,0 -> 0,0,1 ; trylock
+        * 0,0,1 -> 0,1,1 ; pending
+        */
+-      for (;;) {
++      val = atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val);
++      if (!(val & ~_Q_LOCKED_MASK)) {
+               /*
+-               * If we observe any contention; queue.
++               * We're pending, wait for the owner to go away.
++               *
++               * *,1,1 -> *,1,0
++               *
++               * this wait loop must be a load-acquire such that we match the
++               * store-release that clears the locked bit and create lock
++               * sequentiality; this is because not all
++               * clear_pending_set_locked() implementations imply full
++               * barriers.
+                */
+-              if (val & ~_Q_LOCKED_MASK)
+-                      goto queue;
+-
+-              new = _Q_LOCKED_VAL;
+-              if (val == new)
+-                      new |= _Q_PENDING_VAL;
++              if (val & _Q_LOCKED_MASK) {
++                      smp_cond_load_acquire(&lock->val.counter,
++                                            !(VAL & _Q_LOCKED_MASK));
++              }
+               /*
+-               * Acquire semantic is required here as the function may
+-               * return immediately if the lock was free.
++               * take ownership and clear the pending bit.
++               *
++               * *,1,0 -> *,0,1
+                */
+-              old = atomic_cmpxchg_acquire(&lock->val, val, new);
+-              if (old == val)
+-                      break;
+-
+-              val = old;
+-      }
+-
+-      /*
+-       * we won the trylock
+-       */
+-      if (new == _Q_LOCKED_VAL)
++              clear_pending_set_locked(lock);
+               return;
++      }
+       /*
+-       * we're pending, wait for the owner to go away.
+-       *
+-       * *,1,1 -> *,1,0
+-       *
+-       * this wait loop must be a load-acquire such that we match the
+-       * store-release that clears the locked bit and create lock
+-       * sequentiality; this is because not all clear_pending_set_locked()
+-       * implementations imply full barriers.
+-       */
+-      smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_MASK));
+-
+-      /*
+-       * take ownership and clear the pending bit.
+-       *
+-       * *,1,0 -> *,0,1
++       * If pending was clear but there are waiters in the queue, then
++       * we need to undo our setting of pending before we queue ourselves.
+        */
+-      clear_pending_set_locked(lock);
+-      return;
++      if (!(val & _Q_PENDING_MASK))
++              clear_pending(lock);
+       /*
+        * End of pending bit optimistic spinning and beginning of MCS
+@@ -445,15 +459,15 @@ locked:
+        * claim the lock:
+        *
+        * n,0,0 -> 0,0,1 : lock, uncontended
+-       * *,0,0 -> *,0,1 : lock, contended
++       * *,*,0 -> *,*,1 : lock, contended
+        *
+-       * If the queue head is the only one in the queue (lock value == tail),
+-       * clear the tail code and grab the lock. Otherwise, we only need
+-       * to grab the lock.
++       * If the queue head is the only one in the queue (lock value == tail)
++       * and nobody is pending, clear the tail code and grab the lock.
++       * Otherwise, we only need to grab the lock.
+        */
+       for (;;) {
+               /* In the PV case we might already have _Q_LOCKED_VAL set */
+-              if ((val & _Q_TAIL_MASK) != tail) {
++              if ((val & _Q_TAIL_MASK) != tail || (val & _Q_PENDING_MASK)) {
+                       set_locked(lock);
+                       break;
+               }
+diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
+index 1435ba7954c3..854443f7b60b 100644
+--- a/kernel/locking/qspinlock_paravirt.h
++++ b/kernel/locking/qspinlock_paravirt.h
+@@ -89,11 +89,6 @@ static __always_inline void set_pending(struct qspinlock *lock)
+       WRITE_ONCE(lock->pending, 1);
+ }
+-static __always_inline void clear_pending(struct qspinlock *lock)
+-{
+-      WRITE_ONCE(lock->pending, 0);
+-}
+-
+ /*
+  * The pending bit check in pv_queued_spin_steal_lock() isn't a memory
+  * barrier. Therefore, an atomic cmpxchg_acquire() is used to acquire the
+-- 
+2.19.1
+
diff --git a/queue-4.14/locking-qspinlock-x86-increase-_q_pending_loops-uppe.patch b/queue-4.14/locking-qspinlock-x86-increase-_q_pending_loops-uppe.patch
new file mode 100644 (file)
index 0000000..9eb395c
--- /dev/null
@@ -0,0 +1,53 @@
+From 941072765d75949a0169fb8e4bf302de6960a28c Mon Sep 17 00:00:00 2001
+From: Will Deacon <will.deacon@arm.com>
+Date: Tue, 18 Dec 2018 18:13:59 +0100
+Subject: locking/qspinlock/x86: Increase _Q_PENDING_LOOPS upper bound
+
+commit b247be3fe89b6aba928bf80f4453d1c4ba8d2063 upstream.
+
+On x86, atomic_cond_read_relaxed will busy-wait with a cpu_relax() loop,
+so it is desirable to increase the number of times we spin on the qspinlock
+lockword when it is found to be transitioning from pending to locked.
+
+According to Waiman Long:
+
+ | Ideally, the spinning times should be at least a few times the typical
+ | cacheline load time from memory which I think can be down to 100ns or
+ | so for each cacheline load with the newest systems or up to several
+ | hundreds ns for older systems.
+
+which in his benchmarking corresponded to 512 iterations.
+
+Suggested-by: Waiman Long <longman@redhat.com>
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Waiman Long <longman@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: boqun.feng@gmail.com
+Cc: linux-arm-kernel@lists.infradead.org
+Cc: paulmck@linux.vnet.ibm.com
+Link: http://lkml.kernel.org/r/1524738868-31318-5-git-send-email-will.deacon@arm.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/include/asm/qspinlock.h | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
+index cf4cdf508ef4..2cb6624acaec 100644
+--- a/arch/x86/include/asm/qspinlock.h
++++ b/arch/x86/include/asm/qspinlock.h
+@@ -6,6 +6,8 @@
+ #include <asm-generic/qspinlock_types.h>
+ #include <asm/paravirt.h>
++#define _Q_PENDING_LOOPS      (1 << 9)
++
+ #define       queued_spin_unlock queued_spin_unlock
+ /**
+  * queued_spin_unlock - release a queued spinlock
+-- 
+2.19.1
+
diff --git a/queue-4.14/locking-qspinlock-x86-provide-liveness-guarantee.patch b/queue-4.14/locking-qspinlock-x86-provide-liveness-guarantee.patch
new file mode 100644 (file)
index 0000000..97e2f5d
--- /dev/null
@@ -0,0 +1,153 @@
+From 6b6561aa13e5a0e50d58b6c22336869043d6cfe8 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 18 Dec 2018 18:14:00 +0100
+Subject: locking/qspinlock, x86: Provide liveness guarantee
+
+commit 7aa54be2976550f17c11a1c3e3630002dea39303 upstream.
+
+On x86 we cannot do fetch_or() with a single instruction and thus end up
+using a cmpxchg loop, this reduces determinism. Replace the fetch_or()
+with a composite operation: tas-pending + load.
+
+Using two instructions of course opens a window we previously did not
+have. Consider the scenario:
+
+       CPU0            CPU1            CPU2
+
+ 1)    lock
+         trylock -> (0,0,1)
+
+ 2)                    lock
+                         trylock /* fail */
+
+ 3)    unlock -> (0,0,0)
+
+ 4)                                    lock
+                                         trylock -> (0,0,1)
+
+ 5)                      tas-pending -> (0,1,1)
+                         load-val <- (0,1,0) from 3
+
+ 6)                      clear-pending-set-locked -> (0,0,1)
+
+                         FAIL: _2_ owners
+
+where 5) is our new composite operation. When we consider each part of
+the qspinlock state as a separate variable (as we can when
+_Q_PENDING_BITS == 8) then the above is entirely possible, because
+tas-pending will only RmW the pending byte, so the later load is able
+to observe prior tail and lock state (but not earlier than its own
+trylock, which operates on the whole word, due to coherence).
+
+To avoid this we need 2 things:
+
+ - the load must come after the tas-pending (obviously, otherwise it
+   can trivially observe prior state).
+
+ - the tas-pending must be a full word RmW instruction, it cannot be an XCHGB for
+   example, such that we cannot observe other state prior to setting
+   pending.
+
+On x86 we can realize this by using "LOCK BTS m32, r32" for
+tas-pending followed by a regular load.
+
+Note that observing later state is not a problem:
+
+ - if we fail to observe a later unlock, we'll simply spin-wait for
+   that store to become visible.
+
+ - if we observe a later xchg_tail(), there is no difference from that
+   xchg_tail() having taken place before the tas-pending.
+
+Suggested-by: Will Deacon <will.deacon@arm.com>
+Reported-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Will Deacon <will.deacon@arm.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: andrea.parri@amarulasolutions.com
+Cc: longman@redhat.com
+Fixes: 59fb586b4a07 ("locking/qspinlock: Remove unbounded cmpxchg() loop from locking slowpath")
+Link: https://lkml.kernel.org/r/20181003130957.183726335@infradead.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+[bigeasy: GEN_BINARY_RMWcc macro redo]
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/include/asm/qspinlock.h | 21 +++++++++++++++++++++
+ kernel/locking/qspinlock.c       | 17 ++++++++++++++++-
+ 2 files changed, 37 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
+index 2cb6624acaec..f784b95e44df 100644
+--- a/arch/x86/include/asm/qspinlock.h
++++ b/arch/x86/include/asm/qspinlock.h
+@@ -5,9 +5,30 @@
+ #include <asm/cpufeature.h>
+ #include <asm-generic/qspinlock_types.h>
+ #include <asm/paravirt.h>
++#include <asm/rmwcc.h>
+ #define _Q_PENDING_LOOPS      (1 << 9)
++#define queued_fetch_set_pending_acquire queued_fetch_set_pending_acquire
++
++static __always_inline bool __queued_RMW_btsl(struct qspinlock *lock)
++{
++      GEN_BINARY_RMWcc(LOCK_PREFIX "btsl", lock->val.counter,
++                       "I", _Q_PENDING_OFFSET, "%0", c);
++}
++
++static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock *lock)
++{
++      u32 val = 0;
++
++      if (__queued_RMW_btsl(lock))
++              val |= _Q_PENDING_VAL;
++
++      val |= atomic_read(&lock->val) & ~_Q_PENDING_MASK;
++
++      return val;
++}
++
+ #define       queued_spin_unlock queued_spin_unlock
+ /**
+  * queued_spin_unlock - release a queued spinlock
+diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
+index 9ffc2f9af8b8..1011a1b292ac 100644
+--- a/kernel/locking/qspinlock.c
++++ b/kernel/locking/qspinlock.c
+@@ -225,6 +225,20 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
+ }
+ #endif /* _Q_PENDING_BITS == 8 */
++/**
++ * queued_fetch_set_pending_acquire - fetch the whole lock value and set pending
++ * @lock : Pointer to queued spinlock structure
++ * Return: The previous lock value
++ *
++ * *,*,* -> *,1,*
++ */
++#ifndef queued_fetch_set_pending_acquire
++static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock *lock)
++{
++      return atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val);
++}
++#endif
++
+ /**
+  * set_locked - Set the lock bit and own the lock
+  * @lock: Pointer to queued spinlock structure
+@@ -323,7 +337,8 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
+        * 0,0,0 -> 0,0,1 ; trylock
+        * 0,0,1 -> 0,1,1 ; pending
+        */
+-      val = atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val);
++      val = queued_fetch_set_pending_acquire(lock);
++
+       /*
+        * If we observe any contention; undo and queue.
+        */
+-- 
+2.19.1
+
diff --git a/queue-4.14/locking-remove-smp_read_barrier_depends-from-queued_.patch b/queue-4.14/locking-remove-smp_read_barrier_depends-from-queued_.patch
new file mode 100644 (file)
index 0000000..b23f94b
--- /dev/null
@@ -0,0 +1,57 @@
+From 35d7f6de453053ded0c8ac5f199030e63dadb082 Mon Sep 17 00:00:00 2001
+From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
+Date: Tue, 18 Dec 2018 18:13:51 +0100
+Subject: locking: Remove smp_read_barrier_depends() from
+ queued_spin_lock_slowpath()
+
+commit 548095dea63ffc016d39c35b32c628d033638aca upstream.
+
+Queued spinlocks are not used by DEC Alpha, and furthermore operations
+such as READ_ONCE() and release/relaxed RMW atomics are being changed
+to imply smp_read_barrier_depends().  This commit therefore removes the
+now-redundant smp_read_barrier_depends() from queued_spin_lock_slowpath(),
+and adjusts the comments accordingly.
+
+Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Ingo Molnar <mingo@redhat.com>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/locking/qspinlock.c | 12 +++++-------
+ 1 file changed, 5 insertions(+), 7 deletions(-)
+
+diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
+index 50dc42aeaa56..5541acb79e15 100644
+--- a/kernel/locking/qspinlock.c
++++ b/kernel/locking/qspinlock.c
+@@ -170,7 +170,7 @@ static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
+  * @tail : The new queue tail code word
+  * Return: The previous queue tail code word
+  *
+- * xchg(lock, tail)
++ * xchg(lock, tail), which heads an address dependency
+  *
+  * p,*,* -> n,*,* ; prev = xchg(lock, node)
+  */
+@@ -417,13 +417,11 @@ queue:
+       if (old & _Q_TAIL_MASK) {
+               prev = decode_tail(old);
+               /*
+-               * The above xchg_tail() is also a load of @lock which generates,
+-               * through decode_tail(), a pointer.
+-               *
+-               * The address dependency matches the RELEASE of xchg_tail()
+-               * such that the access to @prev must happen after.
++               * The above xchg_tail() is also a load of @lock which
++               * generates, through decode_tail(), a pointer.  The address
++               * dependency matches the RELEASE of xchg_tail() such that
++               * the subsequent access to @prev happens after.
+                */
+-              smp_read_barrier_depends();
+               WRITE_ONCE(prev->next, node);
+-- 
+2.19.1
+
diff --git a/queue-4.14/mac80211-don-t-warn-on-bad-wmm-parameters-from-buggy.patch b/queue-4.14/mac80211-don-t-warn-on-bad-wmm-parameters-from-buggy.patch
new file mode 100644 (file)
index 0000000..ae35db9
--- /dev/null
@@ -0,0 +1,44 @@
+From 346049c6643c3c53a5747bac57b3f3cddde98ec7 Mon Sep 17 00:00:00 2001
+From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
+Date: Mon, 26 Mar 2018 16:21:04 +0300
+Subject: mac80211: don't WARN on bad WMM parameters from buggy APs
+
+[ Upstream commit c470bdc1aaf36669e04ba65faf1092b2d1c6cabe ]
+
+Apparently, some APs are buggy enough to send a zeroed
+WMM IE. Don't WARN on this since this is not caused by a bug
+on the client's system.
+
+This aligns the condition of the WARNING in drv_conf_tx
+with the validity check in ieee80211_sta_wmm_params.
+We will now pick the default values whenever we get
+a zeroed WMM IE.
+
+This has been reported here:
+https://bugzilla.kernel.org/show_bug.cgi?id=199161
+
+Fixes: f409079bb678 ("mac80211: sanity check CW_min/CW_max towards driver")
+Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mac80211/mlme.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
+index 328ac10084e4..75909a744121 100644
+--- a/net/mac80211/mlme.c
++++ b/net/mac80211/mlme.c
+@@ -1861,7 +1861,8 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
+               params[ac].acm = acm;
+               params[ac].uapsd = uapsd;
+-              if (params[ac].cw_min > params[ac].cw_max) {
++              if (params->cw_min == 0 ||
++                  params[ac].cw_min > params[ac].cw_max) {
+                       sdata_info(sdata,
+                                  "AP has invalid WMM params (CWmin/max=%d/%d for ACI %d), using defaults\n",
+                                  params[ac].cw_min, params[ac].cw_max, aci);
+-- 
+2.19.1
+
diff --git a/queue-4.14/mac80211-fix-condition-validating-wmm-ie.patch b/queue-4.14/mac80211-fix-condition-validating-wmm-ie.patch
new file mode 100644 (file)
index 0000000..4398643
--- /dev/null
@@ -0,0 +1,38 @@
+From 5b0c98dd3ee1f9eeb8678a8abc9cc303116e462d Mon Sep 17 00:00:00 2001
+From: Ilan Peer <ilan.peer@intel.com>
+Date: Tue, 3 Apr 2018 11:35:22 +0300
+Subject: mac80211: Fix condition validating WMM IE
+
+[ Upstream commit 911a26484c33e10de6237228ca1d7293548e9f49 ]
+
+Commit c470bdc1aaf3 ("mac80211: don't WARN on bad WMM parameters from
+buggy APs") handled cases where an AP reports a zeroed WMM
+IE. However, the condition that checks the validity accessed the wrong
+index in the ieee80211_tx_queue_params array, thus wrongly deducing
+that the parameters are invalid. Fix it.
+
+Fixes: c470bdc1aaf3 ("mac80211: don't WARN on bad WMM parameters from buggy APs")
+Signed-off-by: Ilan Peer <ilan.peer@intel.com>
+Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mac80211/mlme.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
+index 75909a744121..4c59b5507e7a 100644
+--- a/net/mac80211/mlme.c
++++ b/net/mac80211/mlme.c
+@@ -1861,7 +1861,7 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
+               params[ac].acm = acm;
+               params[ac].uapsd = uapsd;
+-              if (params->cw_min == 0 ||
++              if (params[ac].cw_min == 0 ||
+                   params[ac].cw_min > params[ac].cw_max) {
+                       sdata_info(sdata,
+                                  "AP has invalid WMM params (CWmin/max=%d/%d for ACI %d), using defaults\n",
+-- 
+2.19.1
+
diff --git a/queue-4.14/netfilter-ipset-fix-wraparound-in-hash-net-types.patch b/queue-4.14/netfilter-ipset-fix-wraparound-in-hash-net-types.patch
new file mode 100644 (file)
index 0000000..a2b9a5b
--- /dev/null
@@ -0,0 +1,321 @@
+From 415da73138f3d5a875db2d779f7fee43f5a63650 Mon Sep 17 00:00:00 2001
+From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+Date: Fri, 12 Jan 2018 11:16:50 +0100
+Subject: netfilter: ipset: Fix wraparound in hash:*net* types
+
+[ Upstream commit 0b8d9073539e217f79ec1bff65eb205ac796723d ]
+
+Fix wraparound bug which could lead to memory exhaustion when adding an
+x.x.x.x-255.255.255.255 range to any hash:*net* types.
+
+Fixes Netfilter's bugzilla id #1212, reported by Thomas Schwark.
+
+Fixes: 48596a8ddc46 ("netfilter: ipset: Fix adding an IPv4 range containing more than 2^31 addresses")
+Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/ipset/ip_set_hash_ipportnet.c  | 26 +++++++--------
+ net/netfilter/ipset/ip_set_hash_net.c        |  9 +++--
+ net/netfilter/ipset/ip_set_hash_netiface.c   |  9 +++--
+ net/netfilter/ipset/ip_set_hash_netnet.c     | 28 ++++++++--------
+ net/netfilter/ipset/ip_set_hash_netport.c    | 19 ++++++-----
+ net/netfilter/ipset/ip_set_hash_netportnet.c | 35 ++++++++++----------
+ 6 files changed, 63 insertions(+), 63 deletions(-)
+
+diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
+index a2f19b9906e9..543518384aa7 100644
+--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
++++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
+@@ -168,7 +168,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+       struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 };
+       struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+       u32 ip = 0, ip_to = 0, p = 0, port, port_to;
+-      u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2;
++      u32 ip2_from = 0, ip2_to = 0, ip2;
+       bool with_ports = false;
+       u8 cidr;
+       int ret;
+@@ -269,22 +269,21 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+               ip_set_mask_from_to(ip2_from, ip2_to, e.cidr + 1);
+       }
+-      if (retried)
++      if (retried) {
+               ip = ntohl(h->next.ip);
++              p = ntohs(h->next.port);
++              ip2 = ntohl(h->next.ip2);
++      } else {
++              p = port;
++              ip2 = ip2_from;
++      }
+       for (; ip <= ip_to; ip++) {
+               e.ip = htonl(ip);
+-              p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
+-                                                     : port;
+               for (; p <= port_to; p++) {
+                       e.port = htons(p);
+-                      ip2 = retried &&
+-                            ip == ntohl(h->next.ip) &&
+-                            p == ntohs(h->next.port)
+-                              ? ntohl(h->next.ip2) : ip2_from;
+-                      while (ip2 <= ip2_to) {
++                      do {
+                               e.ip2 = htonl(ip2);
+-                              ip2_last = ip_set_range_to_cidr(ip2, ip2_to,
+-                                                              &cidr);
++                              ip2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr);
+                               e.cidr = cidr - 1;
+                               ret = adtfn(set, &e, &ext, &ext, flags);
+@@ -292,9 +291,10 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+                                       return ret;
+                               ret = 0;
+-                              ip2 = ip2_last + 1;
+-                      }
++                      } while (ip2++ < ip2_to);
++                      ip2 = ip2_from;
+               }
++              p = port;
+       }
+       return ret;
+ }
+diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
+index 1c67a1761e45..5449e23af13a 100644
+--- a/net/netfilter/ipset/ip_set_hash_net.c
++++ b/net/netfilter/ipset/ip_set_hash_net.c
+@@ -143,7 +143,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
+       ipset_adtfn adtfn = set->variant->adt[adt];
+       struct hash_net4_elem e = { .cidr = HOST_MASK };
+       struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+-      u32 ip = 0, ip_to = 0, last;
++      u32 ip = 0, ip_to = 0;
+       int ret;
+       if (tb[IPSET_ATTR_LINENO])
+@@ -193,16 +193,15 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
+       }
+       if (retried)
+               ip = ntohl(h->next.ip);
+-      while (ip <= ip_to) {
++      do {
+               e.ip = htonl(ip);
+-              last = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
++              ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
+               ret = adtfn(set, &e, &ext, &ext, flags);
+               if (ret && !ip_set_eexist(ret, flags))
+                       return ret;
+               ret = 0;
+-              ip = last + 1;
+-      }
++      } while (ip++ < ip_to);
+       return ret;
+ }
+diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
+index d417074f1c1a..f5164c1efce2 100644
+--- a/net/netfilter/ipset/ip_set_hash_netiface.c
++++ b/net/netfilter/ipset/ip_set_hash_netiface.c
+@@ -200,7 +200,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
+       ipset_adtfn adtfn = set->variant->adt[adt];
+       struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 };
+       struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+-      u32 ip = 0, ip_to = 0, last;
++      u32 ip = 0, ip_to = 0;
+       int ret;
+       if (tb[IPSET_ATTR_LINENO])
+@@ -255,17 +255,16 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
+       if (retried)
+               ip = ntohl(h->next.ip);
+-      while (ip <= ip_to) {
++      do {
+               e.ip = htonl(ip);
+-              last = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
++              ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
+               ret = adtfn(set, &e, &ext, &ext, flags);
+               if (ret && !ip_set_eexist(ret, flags))
+                       return ret;
+               ret = 0;
+-              ip = last + 1;
+-      }
++      } while (ip++ < ip_to);
+       return ret;
+ }
+diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
+index 7f9ae2e9645b..5a2b923bd81f 100644
+--- a/net/netfilter/ipset/ip_set_hash_netnet.c
++++ b/net/netfilter/ipset/ip_set_hash_netnet.c
+@@ -169,8 +169,8 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+       ipset_adtfn adtfn = set->variant->adt[adt];
+       struct hash_netnet4_elem e = { };
+       struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+-      u32 ip = 0, ip_to = 0, last;
+-      u32 ip2 = 0, ip2_from = 0, ip2_to = 0, last2;
++      u32 ip = 0, ip_to = 0;
++      u32 ip2 = 0, ip2_from = 0, ip2_to = 0;
+       int ret;
+       if (tb[IPSET_ATTR_LINENO])
+@@ -247,27 +247,27 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+               ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
+       }
+-      if (retried)
++      if (retried) {
+               ip = ntohl(h->next.ip[0]);
++              ip2 = ntohl(h->next.ip[1]);
++      } else {
++              ip2 = ip2_from;
++      }
+-      while (ip <= ip_to) {
++      do {
+               e.ip[0] = htonl(ip);
+-              last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
+-              ip2 = (retried &&
+-                     ip == ntohl(h->next.ip[0])) ? ntohl(h->next.ip[1])
+-                                                 : ip2_from;
+-              while (ip2 <= ip2_to) {
++              ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
++              do {
+                       e.ip[1] = htonl(ip2);
+-                      last2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]);
++                      ip2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]);
+                       ret = adtfn(set, &e, &ext, &ext, flags);
+                       if (ret && !ip_set_eexist(ret, flags))
+                               return ret;
+                       ret = 0;
+-                      ip2 = last2 + 1;
+-              }
+-              ip = last + 1;
+-      }
++              } while (ip2++ < ip2_to);
++              ip2 = ip2_from;
++      } while (ip++ < ip_to);
+       return ret;
+ }
+diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
+index e6ef382febe4..1a187be9ebc8 100644
+--- a/net/netfilter/ipset/ip_set_hash_netport.c
++++ b/net/netfilter/ipset/ip_set_hash_netport.c
+@@ -161,7 +161,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
+       ipset_adtfn adtfn = set->variant->adt[adt];
+       struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 };
+       struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+-      u32 port, port_to, p = 0, ip = 0, ip_to = 0, last;
++      u32 port, port_to, p = 0, ip = 0, ip_to = 0;
+       bool with_ports = false;
+       u8 cidr;
+       int ret;
+@@ -239,25 +239,26 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
+               ip_set_mask_from_to(ip, ip_to, e.cidr + 1);
+       }
+-      if (retried)
++      if (retried) {
+               ip = ntohl(h->next.ip);
+-      while (ip <= ip_to) {
++              p = ntohs(h->next.port);
++      } else {
++              p = port;
++      }
++      do {
+               e.ip = htonl(ip);
+-              last = ip_set_range_to_cidr(ip, ip_to, &cidr);
++              ip = ip_set_range_to_cidr(ip, ip_to, &cidr);
+               e.cidr = cidr - 1;
+-              p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
+-                                                     : port;
+               for (; p <= port_to; p++) {
+                       e.port = htons(p);
+                       ret = adtfn(set, &e, &ext, &ext, flags);
+-
+                       if (ret && !ip_set_eexist(ret, flags))
+                               return ret;
+                       ret = 0;
+               }
+-              ip = last + 1;
+-      }
++              p = port;
++      } while (ip++ < ip_to);
+       return ret;
+ }
+diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
+index 0e6e40c6f652..613e18e720a4 100644
+--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
++++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
+@@ -184,8 +184,8 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+       ipset_adtfn adtfn = set->variant->adt[adt];
+       struct hash_netportnet4_elem e = { };
+       struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+-      u32 ip = 0, ip_to = 0, ip_last, p = 0, port, port_to;
+-      u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2;
++      u32 ip = 0, ip_to = 0, p = 0, port, port_to;
++      u32 ip2_from = 0, ip2_to = 0, ip2;
+       bool with_ports = false;
+       int ret;
+@@ -288,33 +288,34 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+               ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
+       }
+-      if (retried)
++      if (retried) {
+               ip = ntohl(h->next.ip[0]);
++              p = ntohs(h->next.port);
++              ip2 = ntohl(h->next.ip[1]);
++      } else {
++              p = port;
++              ip2 = ip2_from;
++      }
+-      while (ip <= ip_to) {
++      do {
+               e.ip[0] = htonl(ip);
+-              ip_last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
+-              p = retried && ip == ntohl(h->next.ip[0]) ? ntohs(h->next.port)
+-                                                        : port;
++              ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
+               for (; p <= port_to; p++) {
+                       e.port = htons(p);
+-                      ip2 = (retried && ip == ntohl(h->next.ip[0]) &&
+-                             p == ntohs(h->next.port)) ? ntohl(h->next.ip[1])
+-                                                       : ip2_from;
+-                      while (ip2 <= ip2_to) {
++                      do {
+                               e.ip[1] = htonl(ip2);
+-                              ip2_last = ip_set_range_to_cidr(ip2, ip2_to,
+-                                                              &e.cidr[1]);
++                              ip2 = ip_set_range_to_cidr(ip2, ip2_to,
++                                                         &e.cidr[1]);
+                               ret = adtfn(set, &e, &ext, &ext, flags);
+                               if (ret && !ip_set_eexist(ret, flags))
+                                       return ret;
+                               ret = 0;
+-                              ip2 = ip2_last + 1;
+-                      }
++                      } while (ip2++ < ip2_to);
++                      ip2 = ip2_from;
+               }
+-              ip = ip_last + 1;
+-      }
++              p = port;
++      } while (ip++ < ip_to);
+       return ret;
+ }
+-- 
+2.19.1
+
index d3dbe9ffb9e6962273770590f7025716e37c09d2..87780fabd89999b6cc0d0ececf35621f4acc111a 100644 (file)
@@ -17,3 +17,19 @@ revert-drm-rockchip-allow-driver-to-be-shutdown-on-reboot-kexec.patch
 drm-i915-execlists-apply-a-full-mb-before-execution-for-braswell.patch
 drm-amdgpu-update-smc-firmware-image-for-polaris10-variants.patch
 x86-build-fix-compiler-support-check-for-config_retpoline.patch
+locking-remove-smp_read_barrier_depends-from-queued_.patch
+locking-qspinlock-ensure-node-is-initialised-before-.patch
+locking-qspinlock-bound-spinning-on-pending-locked-t.patch
+locking-qspinlock-merge-struct-__qspinlock-into-stru.patch
+locking-qspinlock-remove-unbounded-cmpxchg-loop-from.patch
+locking-qspinlock-remove-duplicate-clear_pending-fun.patch
+locking-qspinlock-kill-cmpxchg-loop-when-claiming-lo.patch
+locking-qspinlock-re-order-code.patch
+locking-qspinlock-x86-increase-_q_pending_loops-uppe.patch
+locking-qspinlock-x86-provide-liveness-guarantee.patch
+elevator-lookup-mq-vs-non-mq-elevators.patch
+netfilter-ipset-fix-wraparound-in-hash-net-types.patch
+mac80211-don-t-warn-on-bad-wmm-parameters-from-buggy.patch
+mac80211-fix-condition-validating-wmm-ie.patch
+ib-hfi1-remove-race-conditions-in-user_sdma-send-pat.patch
+locking-qspinlock-fix-build-for-anonymous-union-in-o.patch