When counters are rotated, there is contention between the threads which
can slow down the operation of the thread performing the rotation. Let's
apply a cpu_relax there to let the first thread finish faster.
/* remove the bit, used for the lock */
curr_tick &= ~1;
- } while (!_HA_ATOMIC_CAS(&ctr->curr_tick, &curr_tick, curr_tick | 0x1));
+ } while (!_HA_ATOMIC_CAS(&ctr->curr_tick, &curr_tick, curr_tick | 0x1) && __ha_cpu_relax());
__ha_barrier_atomic_store();
if (now_ms_tmp - curr_tick >= period) {