From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 4 Mar 2015 05:44:05 +0000 (-0800)
Subject: 3.19-stable patches
X-Git-Tag: v3.10.71~13
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=b6cd2e0095444ae79a96c4747d5796b9adfc3f82;p=thirdparty%2Fkernel%2Fstable-queue.git

3.19-stable patches

added patches:
	kernel-fix-sparse-warning-for-access_once.patch
	kernel-make-read_once-valid-on-const-arguments.patch
	kernel-tighten-rules-for-access-once.patch
	x86-spinlocks-paravirt-fix-memory-corruption-on-unlock.patch
---

diff --git a/queue-3.19/kernel-fix-sparse-warning-for-access_once.patch b/queue-3.19/kernel-fix-sparse-warning-for-access_once.patch
new file mode 100644
index 00000000000..9307185ddd3
--- /dev/null
+++ b/queue-3.19/kernel-fix-sparse-warning-for-access_once.patch
@@ -0,0 +1,34 @@
+From c5b19946eb76c67566aae6a84bf2b10ad59295ea Mon Sep 17 00:00:00 2001
+From: Christian Borntraeger <borntraeger@de.ibm.com>
+Date: Mon, 12 Jan 2015 12:13:39 +0100
+Subject: kernel: Fix sparse warning for ACCESS_ONCE
+
+From: Christian Borntraeger <borntraeger@de.ibm.com>
+
+commit c5b19946eb76c67566aae6a84bf2b10ad59295ea upstream.
+
+Commit 927609d622a3 ("kernel: tighten rules for ACCESS ONCE") results in
+sparse warnings like "Using plain integer as NULL pointer" - Let's add a
+type cast to the dummy assignment.
+To avoid warnings lik "sparse: warning: cast to restricted __hc32" we also
+use __force on that cast.
+
+Fixes: 927609d622a3 ("kernel: tighten rules for ACCESS ONCE")
+Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/compiler.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/linux/compiler.h
++++ b/include/linux/compiler.h
+@@ -461,7 +461,7 @@ static __always_inline void __write_once
+  * If possible use READ_ONCE/ASSIGN_ONCE instead.
+  */
+ #define __ACCESS_ONCE(x) ({ \
+-	 __maybe_unused typeof(x) __var = 0; \
++	 __maybe_unused typeof(x) __var = (__force typeof(x)) 0; \
+ 	(volatile typeof(x) *)&(x); })
+ #define ACCESS_ONCE(x) (*__ACCESS_ONCE(x))
+ 
diff --git a/queue-3.19/kernel-make-read_once-valid-on-const-arguments.patch b/queue-3.19/kernel-make-read_once-valid-on-const-arguments.patch
new file mode 100644
index 00000000000..59a83ea6ad8
--- /dev/null
+++ b/queue-3.19/kernel-make-read_once-valid-on-const-arguments.patch
@@ -0,0 +1,56 @@
+From dd36929720f40f17685e841ae0d4c581c165ea60 Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Fri, 20 Feb 2015 15:46:31 -0800
+Subject: kernel: make READ_ONCE() valid on const arguments
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit dd36929720f40f17685e841ae0d4c581c165ea60 upstream.
+
+The use of READ_ONCE() causes lots of warnings witht he pending paravirt
+spinlock fixes, because those ends up having passing a member to a
+'const' structure to READ_ONCE().
+
+There should certainly be nothing wrong with using READ_ONCE() with a
+const source, but the helper function __read_once_size() would cause
+warnings because it would drop the 'const' qualifier, but also because
+the destination would be marked 'const' too due to the use of 'typeof'.
+
+Use a union of types in READ_ONCE() to avoid this issue.
+
+Also make sure to use parenthesis around the macro arguments to avoid
+possible operator precedence issues.
+
+Tested-by: Ingo Molnar <mingo@kernel.org>
+Cc: Christian Borntraeger <borntraeger@de.ibm.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/compiler.h |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/include/linux/compiler.h
++++ b/include/linux/compiler.h
+@@ -198,7 +198,7 @@ static __always_inline void data_access_
+ {
+ }
+ 
+-static __always_inline void __read_once_size(volatile void *p, void *res, int size)
++static __always_inline void __read_once_size(const volatile void *p, void *res, int size)
+ {
+ 	switch (size) {
+ 	case 1: *(__u8 *)res = *(volatile __u8 *)p; break;
+@@ -255,10 +255,10 @@ static __always_inline void __write_once
+  */
+ 
+ #define READ_ONCE(x) \
+-	({ typeof(x) __val; __read_once_size(&x, &__val, sizeof(__val)); __val; })
++	({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
+ 
+ #define WRITE_ONCE(x, val) \
+-	({ typeof(x) __val; __val = val; __write_once_size(&x, &__val, sizeof(__val)); __val; })
++	({ typeof(x) __val = (val); __write_once_size(&(x), &__val, sizeof(__val)); __val; })
+ 
+ #endif /* __KERNEL__ */
+ 
diff --git a/queue-3.19/kernel-tighten-rules-for-access-once.patch b/queue-3.19/kernel-tighten-rules-for-access-once.patch
new file mode 100644
index 00000000000..423ed557be5
--- /dev/null
+++ b/queue-3.19/kernel-tighten-rules-for-access-once.patch
@@ -0,0 +1,53 @@
+From 927609d622a3773995f84bc03b4564f873cf0e22 Mon Sep 17 00:00:00 2001
+From: Christian Borntraeger <borntraeger@de.ibm.com>
+Date: Tue, 25 Nov 2014 10:16:39 +0100
+Subject: kernel: tighten rules for ACCESS ONCE
+
+From: Christian Borntraeger <borntraeger@de.ibm.com>
+
+commit 927609d622a3773995f84bc03b4564f873cf0e22 upstream.
+
+Now that all non-scalar users of ACCESS_ONCE have been converted
+to READ_ONCE or ASSIGN once, lets tighten ACCESS_ONCE to only
+work on scalar types.
+This variant was proposed by Alexei Starovoitov.
+
+Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/compiler.h |   21 ++++++++++++++++-----
+ 1 file changed, 16 insertions(+), 5 deletions(-)
+
+--- a/include/linux/compiler.h
++++ b/include/linux/compiler.h
+@@ -447,12 +447,23 @@ static __always_inline void __write_once
+  * to make the compiler aware of ordering is to put the two invocations of
+  * ACCESS_ONCE() in different C statements.
+  *
+- * This macro does absolutely -nothing- to prevent the CPU from reordering,
+- * merging, or refetching absolutely anything at any time.  Its main intended
+- * use is to mediate communication between process-level code and irq/NMI
+- * handlers, all running on the same CPU.
++ * ACCESS_ONCE will only work on scalar types. For union types, ACCESS_ONCE
++ * on a union member will work as long as the size of the member matches the
++ * size of the union and the size is smaller than word size.
++ *
++ * The major use cases of ACCESS_ONCE used to be (1) Mediating communication
++ * between process-level code and irq/NMI handlers, all running on the same CPU,
++ * and (2) Ensuring that the compiler does not  fold, spindle, or otherwise
++ * mutilate accesses that either do not require ordering or that interact
++ * with an explicit memory barrier or atomic instruction that provides the
++ * required ordering.
++ *
++ * If possible use READ_ONCE/ASSIGN_ONCE instead.
+  */
+-#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
++#define __ACCESS_ONCE(x) ({ \
++	 __maybe_unused typeof(x) __var = 0; \
++	(volatile typeof(x) *)&(x); })
++#define ACCESS_ONCE(x) (*__ACCESS_ONCE(x))
+ 
+ /* Ignore/forbid kprobes attach on very low level functions marked by this attribute: */
+ #ifdef CONFIG_KPROBES
diff --git a/queue-3.19/series b/queue-3.19/series
index a35a3062422..87233685ea9 100644
--- a/queue-3.19/series
+++ b/queue-3.19/series
@@ -168,3 +168,7 @@ x86-efi-avoid-triple-faults-during-efi-mixed-mode-calls.patch
 x86-mm-aslr-fix-stack-randomization-on-64-bit-systems.patch
 x86-irq-fix-regression-caused-by-commit-b568b8601f05.patch
 x86-pmc-atom-assign-debugfs-node-as-soon-as-possible.patch
+kernel-tighten-rules-for-access-once.patch
+kernel-fix-sparse-warning-for-access_once.patch
+kernel-make-read_once-valid-on-const-arguments.patch
+x86-spinlocks-paravirt-fix-memory-corruption-on-unlock.patch
diff --git a/queue-3.19/x86-spinlocks-paravirt-fix-memory-corruption-on-unlock.patch b/queue-3.19/x86-spinlocks-paravirt-fix-memory-corruption-on-unlock.patch
new file mode 100644
index 00000000000..adfcd67322a
--- /dev/null
+++ b/queue-3.19/x86-spinlocks-paravirt-fix-memory-corruption-on-unlock.patch
@@ -0,0 +1,372 @@
+From d6abfdb2022368d8c6c4be3f11a06656601a6cc2 Mon Sep 17 00:00:00 2001
+From: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
+Date: Fri, 6 Feb 2015 16:44:11 +0530
+Subject: x86/spinlocks/paravirt: Fix memory corruption on unlock
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
+
+commit d6abfdb2022368d8c6c4be3f11a06656601a6cc2 upstream.
+
+Paravirt spinlock clears slowpath flag after doing unlock.
+As explained by Linus currently it does:
+
+                prev = *lock;
+                add_smp(&lock->tickets.head, TICKET_LOCK_INC);
+
+                /* add_smp() is a full mb() */
+
+                if (unlikely(lock->tickets.tail & TICKET_SLOWPATH_FLAG))
+                        __ticket_unlock_slowpath(lock, prev);
+
+which is *exactly* the kind of things you cannot do with spinlocks,
+because after you've done the "add_smp()" and released the spinlock
+for the fast-path, you can't access the spinlock any more.  Exactly
+because a fast-path lock might come in, and release the whole data
+structure.
+
+Linus suggested that we should not do any writes to lock after unlock(),
+and we can move slowpath clearing to fastpath lock.
+
+So this patch implements the fix with:
+
+ 1. Moving slowpath flag to head (Oleg):
+    Unlocked locks don't care about the slowpath flag; therefore we can keep
+    it set after the last unlock, and clear it again on the first (try)lock.
+    -- this removes the write after unlock. note that keeping slowpath flag would
+    result in unnecessary kicks.
+    By moving the slowpath flag from the tail to the head ticket we also avoid
+    the need to access both the head and tail tickets on unlock.
+
+ 2. use xadd to avoid read/write after unlock that checks the need for
+    unlock_kick (Linus):
+    We further avoid the need for a read-after-release by using xadd;
+    the prev head value will include the slowpath flag and indicate if we
+    need to do PV kicking of suspended spinners -- on modern chips xadd
+    isn't (much) more expensive than an add + load.
+
+Result:
+ setup: 16core (32 cpu +ht sandy bridge 8GB 16vcpu guest)
+ benchmark overcommit %improve
+ kernbench  1x           -0.13
+ kernbench  2x            0.02
+ dbench     1x           -1.77
+ dbench     2x           -0.63
+
+[Jeremy: Hinted missing TICKET_LOCK_INC for kick]
+[Oleg: Moved slowpath flag to head, ticket_equals idea]
+[PeterZ: Added detailed changelog]
+
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Reported-by: Sasha Levin <sasha.levin@oracle.com>
+Tested-by: Sasha Levin <sasha.levin@oracle.com>
+Signed-off-by: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Oleg Nesterov <oleg@redhat.com>
+Cc: Andrew Jones <drjones@redhat.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Christian Borntraeger <borntraeger@de.ibm.com>
+Cc: Christoph Lameter <cl@linux.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Dave Jones <davej@redhat.com>
+Cc: David Vrabel <david.vrabel@citrix.com>
+Cc: Fernando Luis VÃ¡zquez Cao <fernando_b1@lab.ntt.co.jp>
+Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+Cc: Ulrich Obergfell <uobergfe@redhat.com>
+Cc: Waiman Long <Waiman.Long@hp.com>
+Cc: a.ryabinin@samsung.com
+Cc: dave@stgolabs.net
+Cc: hpa@zytor.com
+Cc: jasowang@redhat.com
+Cc: jeremy@goop.org
+Cc: paul.gortmaker@windriver.com
+Cc: riel@redhat.com
+Cc: tglx@linutronix.de
+Cc: waiman.long@hp.com
+Cc: xen-devel@lists.xenproject.org
+Link: http://lkml.kernel.org/r/20150215173043.GA7471@linux.vnet.ibm.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/spinlock.h |   94 +++++++++++++++++++---------------------
+ arch/x86/kernel/kvm.c           |   13 +++--
+ arch/x86/xen/spinlock.c         |   13 +++--
+ 3 files changed, 64 insertions(+), 56 deletions(-)
+
+--- a/arch/x86/include/asm/spinlock.h
++++ b/arch/x86/include/asm/spinlock.h
+@@ -46,7 +46,7 @@ static __always_inline bool static_key_f
+ 
+ static inline void __ticket_enter_slowpath(arch_spinlock_t *lock)
+ {
+-	set_bit(0, (volatile unsigned long *)&lock->tickets.tail);
++	set_bit(0, (volatile unsigned long *)&lock->tickets.head);
+ }
+ 
+ #else  /* !CONFIG_PARAVIRT_SPINLOCKS */
+@@ -60,10 +60,30 @@ static inline void __ticket_unlock_kick(
+ }
+ 
+ #endif /* CONFIG_PARAVIRT_SPINLOCKS */
++static inline int  __tickets_equal(__ticket_t one, __ticket_t two)
++{
++	return !((one ^ two) & ~TICKET_SLOWPATH_FLAG);
++}
++
++static inline void __ticket_check_and_clear_slowpath(arch_spinlock_t *lock,
++							__ticket_t head)
++{
++	if (head & TICKET_SLOWPATH_FLAG) {
++		arch_spinlock_t old, new;
++
++		old.tickets.head = head;
++		new.tickets.head = head & ~TICKET_SLOWPATH_FLAG;
++		old.tickets.tail = new.tickets.head + TICKET_LOCK_INC;
++		new.tickets.tail = old.tickets.tail;
++
++		/* try to clear slowpath flag when there are no contenders */
++		cmpxchg(&lock->head_tail, old.head_tail, new.head_tail);
++	}
++}
+ 
+ static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
+ {
+-	return lock.tickets.head == lock.tickets.tail;
++	return __tickets_equal(lock.tickets.head, lock.tickets.tail);
+ }
+ 
+ /*
+@@ -87,18 +107,21 @@ static __always_inline void arch_spin_lo
+ 	if (likely(inc.head == inc.tail))
+ 		goto out;
+ 
+-	inc.tail &= ~TICKET_SLOWPATH_FLAG;
+ 	for (;;) {
+ 		unsigned count = SPIN_THRESHOLD;
+ 
+ 		do {
+-			if (READ_ONCE(lock->tickets.head) == inc.tail)
+-				goto out;
++			inc.head = READ_ONCE(lock->tickets.head);
++			if (__tickets_equal(inc.head, inc.tail))
++				goto clear_slowpath;
+ 			cpu_relax();
+ 		} while (--count);
+ 		__ticket_lock_spinning(lock, inc.tail);
+ 	}
+-out:	barrier();	/* make sure nothing creeps before the lock is taken */
++clear_slowpath:
++	__ticket_check_and_clear_slowpath(lock, inc.head);
++out:
++	barrier();	/* make sure nothing creeps before the lock is taken */
+ }
+ 
+ static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
+@@ -106,56 +129,30 @@ static __always_inline int arch_spin_try
+ 	arch_spinlock_t old, new;
+ 
+ 	old.tickets = READ_ONCE(lock->tickets);
+-	if (old.tickets.head != (old.tickets.tail & ~TICKET_SLOWPATH_FLAG))
++	if (!__tickets_equal(old.tickets.head, old.tickets.tail))
+ 		return 0;
+ 
+ 	new.head_tail = old.head_tail + (TICKET_LOCK_INC << TICKET_SHIFT);
++	new.head_tail &= ~TICKET_SLOWPATH_FLAG;
+ 
+ 	/* cmpxchg is a full barrier, so nothing can move before it */
+ 	return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail;
+ }
+ 
+-static inline void __ticket_unlock_slowpath(arch_spinlock_t *lock,
+-					    arch_spinlock_t old)
+-{
+-	arch_spinlock_t new;
+-
+-	BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS);
+-
+-	/* Perform the unlock on the "before" copy */
+-	old.tickets.head += TICKET_LOCK_INC;
+-
+-	/* Clear the slowpath flag */
+-	new.head_tail = old.head_tail & ~(TICKET_SLOWPATH_FLAG << TICKET_SHIFT);
+-
+-	/*
+-	 * If the lock is uncontended, clear the flag - use cmpxchg in
+-	 * case it changes behind our back though.
+-	 */
+-	if (new.tickets.head != new.tickets.tail ||
+-	    cmpxchg(&lock->head_tail, old.head_tail,
+-					new.head_tail) != old.head_tail) {
+-		/*
+-		 * Lock still has someone queued for it, so wake up an
+-		 * appropriate waiter.
+-		 */
+-		__ticket_unlock_kick(lock, old.tickets.head);
+-	}
+-}
+-
+ static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
+ {
+ 	if (TICKET_SLOWPATH_FLAG &&
+-	    static_key_false(&paravirt_ticketlocks_enabled)) {
+-		arch_spinlock_t prev;
++		static_key_false(&paravirt_ticketlocks_enabled)) {
++		__ticket_t head;
+ 
+-		prev = *lock;
+-		add_smp(&lock->tickets.head, TICKET_LOCK_INC);
++		BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS);
+ 
+-		/* add_smp() is a full mb() */
++		head = xadd(&lock->tickets.head, TICKET_LOCK_INC);
+ 
+-		if (unlikely(lock->tickets.tail & TICKET_SLOWPATH_FLAG))
+-			__ticket_unlock_slowpath(lock, prev);
++		if (unlikely(head & TICKET_SLOWPATH_FLAG)) {
++			head &= ~TICKET_SLOWPATH_FLAG;
++			__ticket_unlock_kick(lock, (head + TICKET_LOCK_INC));
++		}
+ 	} else
+ 		__add(&lock->tickets.head, TICKET_LOCK_INC, UNLOCK_LOCK_PREFIX);
+ }
+@@ -164,14 +161,15 @@ static inline int arch_spin_is_locked(ar
+ {
+ 	struct __raw_tickets tmp = READ_ONCE(lock->tickets);
+ 
+-	return tmp.tail != tmp.head;
++	return !__tickets_equal(tmp.tail, tmp.head);
+ }
+ 
+ static inline int arch_spin_is_contended(arch_spinlock_t *lock)
+ {
+ 	struct __raw_tickets tmp = READ_ONCE(lock->tickets);
+ 
+-	return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC;
++	tmp.head &= ~TICKET_SLOWPATH_FLAG;
++	return (tmp.tail - tmp.head) > TICKET_LOCK_INC;
+ }
+ #define arch_spin_is_contended	arch_spin_is_contended
+ 
+@@ -183,16 +181,16 @@ static __always_inline void arch_spin_lo
+ 
+ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+ {
+-	__ticket_t head = ACCESS_ONCE(lock->tickets.head);
++	__ticket_t head = READ_ONCE(lock->tickets.head);
+ 
+ 	for (;;) {
+-		struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
++		struct __raw_tickets tmp = READ_ONCE(lock->tickets);
+ 		/*
+ 		 * We need to check "unlocked" in a loop, tmp.head == head
+ 		 * can be false positive because of overflow.
+ 		 */
+-		if (tmp.head == (tmp.tail & ~TICKET_SLOWPATH_FLAG) ||
+-		    tmp.head != head)
++		if (__tickets_equal(tmp.head, tmp.tail) ||
++				!__tickets_equal(tmp.head, head))
+ 			break;
+ 
+ 		cpu_relax();
+--- a/arch/x86/kernel/kvm.c
++++ b/arch/x86/kernel/kvm.c
+@@ -609,7 +609,7 @@ static inline void check_zero(void)
+ 	u8 ret;
+ 	u8 old;
+ 
+-	old = ACCESS_ONCE(zero_stats);
++	old = READ_ONCE(zero_stats);
+ 	if (unlikely(old)) {
+ 		ret = cmpxchg(&zero_stats, old, 0);
+ 		/* This ensures only one fellow resets the stat */
+@@ -727,6 +727,7 @@ __visible void kvm_lock_spinning(struct
+ 	int cpu;
+ 	u64 start;
+ 	unsigned long flags;
++	__ticket_t head;
+ 
+ 	if (in_nmi())
+ 		return;
+@@ -768,11 +769,15 @@ __visible void kvm_lock_spinning(struct
+ 	 */
+ 	__ticket_enter_slowpath(lock);
+ 
++	/* make sure enter_slowpath, which is atomic does not cross the read */
++	smp_mb__after_atomic();
++
+ 	/*
+ 	 * check again make sure it didn't become free while
+ 	 * we weren't looking.
+ 	 */
+-	if (ACCESS_ONCE(lock->tickets.head) == want) {
++	head = READ_ONCE(lock->tickets.head);
++	if (__tickets_equal(head, want)) {
+ 		add_stats(TAKEN_SLOW_PICKUP, 1);
+ 		goto out;
+ 	}
+@@ -803,8 +808,8 @@ static void kvm_unlock_kick(struct arch_
+ 	add_stats(RELEASED_SLOW, 1);
+ 	for_each_cpu(cpu, &waiting_cpus) {
+ 		const struct kvm_lock_waiting *w = &per_cpu(klock_waiting, cpu);
+-		if (ACCESS_ONCE(w->lock) == lock &&
+-		    ACCESS_ONCE(w->want) == ticket) {
++		if (READ_ONCE(w->lock) == lock &&
++		    READ_ONCE(w->want) == ticket) {
+ 			add_stats(RELEASED_SLOW_KICKED, 1);
+ 			kvm_kick_cpu(cpu);
+ 			break;
+--- a/arch/x86/xen/spinlock.c
++++ b/arch/x86/xen/spinlock.c
+@@ -41,7 +41,7 @@ static u8 zero_stats;
+ static inline void check_zero(void)
+ {
+ 	u8 ret;
+-	u8 old = ACCESS_ONCE(zero_stats);
++	u8 old = READ_ONCE(zero_stats);
+ 	if (unlikely(old)) {
+ 		ret = cmpxchg(&zero_stats, old, 0);
+ 		/* This ensures only one fellow resets the stat */
+@@ -112,6 +112,7 @@ __visible void xen_lock_spinning(struct
+ 	struct xen_lock_waiting *w = this_cpu_ptr(&lock_waiting);
+ 	int cpu = smp_processor_id();
+ 	u64 start;
++	__ticket_t head;
+ 	unsigned long flags;
+ 
+ 	/* If kicker interrupts not initialized yet, just spin */
+@@ -159,11 +160,15 @@ __visible void xen_lock_spinning(struct
+ 	 */
+ 	__ticket_enter_slowpath(lock);
+ 
++	/* make sure enter_slowpath, which is atomic does not cross the read */
++	smp_mb__after_atomic();
++
+ 	/*
+ 	 * check again make sure it didn't become free while
+ 	 * we weren't looking
+ 	 */
+-	if (ACCESS_ONCE(lock->tickets.head) == want) {
++	head = READ_ONCE(lock->tickets.head);
++	if (__tickets_equal(head, want)) {
+ 		add_stats(TAKEN_SLOW_PICKUP, 1);
+ 		goto out;
+ 	}
+@@ -204,8 +209,8 @@ static void xen_unlock_kick(struct arch_
+ 		const struct xen_lock_waiting *w = &per_cpu(lock_waiting, cpu);
+ 
+ 		/* Make sure we read lock before want */
+-		if (ACCESS_ONCE(w->lock) == lock &&
+-		    ACCESS_ONCE(w->want) == next) {
++		if (READ_ONCE(w->lock) == lock &&
++		    READ_ONCE(w->want) == next) {
+ 			add_stats(RELEASED_SLOW_KICKED, 1);
+ 			xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
+ 			break;