]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
jump_label: Fix concurrency issues in static_key_slow_dec()
authorThomas Gleixner <tglx@linutronix.de>
Mon, 10 Jun 2024 12:46:36 +0000 (14:46 +0200)
committerPeter Zijlstra <peterz@infradead.org>
Tue, 11 Jun 2024 09:25:23 +0000 (11:25 +0200)
The commit which tried to fix the concurrency issues of concurrent
static_key_slow_inc() failed to fix the equivalent issues
vs. static_key_slow_dec():

CPU0                     CPU1

static_key_slow_dec()
  static_key_slow_try_dec()

key->enabled == 1
val = atomic_fetch_add_unless(&key->enabled, -1, 1);
if (val == 1)
     return false;

  jump_label_lock();
  if (atomic_dec_and_test(&key->enabled)) {
     --> key->enabled == 0
   __jump_label_update()

 static_key_slow_dec()
   static_key_slow_try_dec()

     key->enabled == 0
     val = atomic_fetch_add_unless(&key->enabled, -1, 1);

      --> key->enabled == -1 <- FAIL

There is another bug in that code, when there is a concurrent
static_key_slow_inc() which enables the key as that sets key->enabled to -1
so on the other CPU

val = atomic_fetch_add_unless(&key->enabled, -1, 1);

will succeed and decrement to -2, which is invalid.

Cure all of this by replacing the atomic_fetch_add_unless() with a
atomic_try_cmpxchg() loop similar to static_key_fast_inc_not_disabled().

[peterz: add WARN_ON_ONCE for the -1 race]
Fixes: 4c5ea0a9cd02 ("locking/static_key: Fix concurrent static_key_slow_inc()")
Reported-by: Yue Sun <samsun1006219@gmail.com>
Reported-by: Xingwei Lee <xrivendell7@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20240610124406.422897838@linutronix.de
kernel/jump_label.c

index 3218fa5688b9394097f9c0bdcddb3683641178ba..1f05a19918f47998bf492d75da673ddf80274c5c 100644 (file)
@@ -131,7 +131,7 @@ bool static_key_fast_inc_not_disabled(struct static_key *key)
        STATIC_KEY_CHECK_USE(key);
        /*
         * Negative key->enabled has a special meaning: it sends
-        * static_key_slow_inc() down the slow path, and it is non-zero
+        * static_key_slow_inc/dec() down the slow path, and it is non-zero
         * so it counts as "enabled" in jump_label_update().  Note that
         * atomic_inc_unless_negative() checks >= 0, so roll our own.
         */
@@ -150,7 +150,7 @@ bool static_key_slow_inc_cpuslocked(struct static_key *key)
        lockdep_assert_cpus_held();
 
        /*
-        * Careful if we get concurrent static_key_slow_inc() calls;
+        * Careful if we get concurrent static_key_slow_inc/dec() calls;
         * later calls must wait for the first one to _finish_ the
         * jump_label_update() process.  At the same time, however,
         * the jump_label_update() call below wants to see
@@ -247,20 +247,32 @@ EXPORT_SYMBOL_GPL(static_key_disable);
 
 static bool static_key_slow_try_dec(struct static_key *key)
 {
-       int val;
-
-       val = atomic_fetch_add_unless(&key->enabled, -1, 1);
-       if (val == 1)
-               return false;
+       int v;
 
        /*
-        * The negative count check is valid even when a negative
-        * key->enabled is in use by static_key_slow_inc(); a
-        * __static_key_slow_dec() before the first static_key_slow_inc()
-        * returns is unbalanced, because all other static_key_slow_inc()
-        * instances block while the update is in progress.
+        * Go into the slow path if key::enabled is less than or equal than
+        * one. One is valid to shut down the key, anything less than one
+        * is an imbalance, which is handled at the call site.
+        *
+        * That includes the special case of '-1' which is set in
+        * static_key_slow_inc_cpuslocked(), but that's harmless as it is
+        * fully serialized in the slow path below. By the time this task
+        * acquires the jump label lock the value is back to one and the
+        * retry under the lock must succeed.
         */
-       WARN(val < 0, "jump label: negative count!\n");
+       v = atomic_read(&key->enabled);
+       do {
+               /*
+                * Warn about the '-1' case though; since that means a
+                * decrement is concurrent with a first (0->1) increment. IOW
+                * people are trying to disable something that wasn't yet fully
+                * enabled. This suggests an ordering problem on the user side.
+                */
+               WARN_ON_ONCE(v < 0);
+               if (v <= 1)
+                       return false;
+       } while (!likely(atomic_try_cmpxchg(&key->enabled, &v, v - 1)));
+
        return true;
 }
 
@@ -271,10 +283,11 @@ static void __static_key_slow_dec_cpuslocked(struct static_key *key)
        if (static_key_slow_try_dec(key))
                return;
 
-       jump_label_lock();
-       if (atomic_dec_and_test(&key->enabled))
+       guard(mutex)(&jump_label_mutex);
+       if (atomic_cmpxchg(&key->enabled, 1, 0))
                jump_label_update(key);
-       jump_label_unlock();
+       else
+               WARN_ON_ONCE(!static_key_slow_try_dec(key));
 }
 
 static void __static_key_slow_dec(struct static_key *key)