1 /* Linuxthreads - a simple clone()-based implementation of Posix */
2 /* threads for Linux. */
3 /* Copyright (C) 1998 Xavier Leroy (Xavier.Leroy@inria.fr) */
5 /* This program is free software; you can redistribute it and/or */
6 /* modify it under the terms of the GNU Library General Public License */
7 /* as published by the Free Software Foundation; either version 2 */
8 /* of the License, or (at your option) any later version. */
10 /* This program is distributed in the hope that it will be useful, */
11 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
12 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
13 /* GNU Library General Public License for more details. */
23 #include "internals.h"
27 /* The status field of a spinlock is a pointer whose least significant
30 Thus the field values have the following meanings:
32 status == 0: spinlock is free
33 status == 1: spinlock is taken; no thread is waiting on it
35 (status & 1) == 1: spinlock is taken and (status & ~1L) is a
36 pointer to the first waiting thread; other
37 waiting threads are linked via the p_nextlock
39 (status & 1) == 0: same as above, but spinlock is not taken.
41 The waiting list is not sorted by priority order.
42 Actually, we always insert at top of list (sole insertion mode
43 that can be performed without locking).
44 For __pthread_unlock, we perform a linear search in the list
45 to find the highest-priority, oldest waiting thread.
46 This is safe because there are no concurrent __pthread_unlock
47 operations -- only the thread that locked the mutex can unlock it. */
50 void internal_function
__pthread_lock(struct _pthread_fastlock
* lock
,
53 #if defined HAS_COMPARE_AND_SWAP
54 long oldstatus
, newstatus
;
55 int successful_seizure
, spurious_wakeup_count
= 0;
59 #if defined TEST_FOR_COMPARE_AND_SWAP
60 if (!__pthread_has_cas
)
62 #if !defined HAS_COMPARE_AND_SWAP
64 __pthread_acquire(&lock
->__spinlock
);
69 #if defined HAS_COMPARE_AND_SWAP
72 /* On SMP, try spinning to get the lock. */
74 if (__pthread_smp_kernel
) {
75 int max_count
= lock
->__spinlock
* 2 + 10;
77 for (spin_count
= 0; spin_count
< max_count
; spin_count
++) {
78 if (((oldstatus
= lock
->__status
) & 1) == 0) {
79 if(__compare_and_swap(&lock
->__status
, oldstatus
, oldstatus
| 1))
82 lock
->__spinlock
+= (spin_count
- lock
->__spinlock
) / 8;
88 lock
->__spinlock
+= (spin_count
- lock
->__spinlock
) / 8;
91 /* No luck, try once more or suspend. */
94 oldstatus
= lock
->__status
;
95 successful_seizure
= 0;
97 if ((oldstatus
& 1) == 0) {
98 newstatus
= oldstatus
| 1;
99 successful_seizure
= 1;
102 self
= thread_self();
103 newstatus
= (long) self
| 1;
107 THREAD_SETMEM(self
, p_nextlock
, (pthread_descr
) (oldstatus
& ~1L));
108 /* Make sure the store in p_nextlock completes before performing
109 the compare-and-swap */
112 } while(! __compare_and_swap(&lock
->__status
, oldstatus
, newstatus
));
114 /* Suspend with guard against spurious wakeup.
115 This can happen in pthread_cond_timedwait_relative, when the thread
116 wakes up due to timeout and is still on the condvar queue, and then
117 locks the queue to remove itself. At that point it may still be on the
118 queue, and may be resumed by a condition signal. */
120 if (!successful_seizure
) {
123 if (self
->p_nextlock
!= NULL
) {
124 /* Count resumes that don't belong to us. */
125 spurious_wakeup_count
++;
133 /* Put back any resumes we caught that don't belong to us. */
134 while (spurious_wakeup_count
--)
139 int __pthread_unlock(struct _pthread_fastlock
* lock
)
141 #if defined HAS_COMPARE_AND_SWAP
143 pthread_descr thr
, * ptr
, * maxptr
;
147 #if defined TEST_FOR_COMPARE_AND_SWAP
148 if (!__pthread_has_cas
)
150 #if !defined HAS_COMPARE_AND_SWAP
152 WRITE_MEMORY_BARRIER();
153 lock
->__spinlock
= 0;
158 #if defined HAS_COMPARE_AND_SWAP
160 oldstatus
= lock
->__status
;
162 while ((oldstatus
= lock
->__status
) == 1) {
163 if (__compare_and_swap_with_release_semantics(&lock
->__status
,
168 /* Find thread in waiting queue with maximal priority */
169 ptr
= (pthread_descr
*) &lock
->__status
;
170 thr
= (pthread_descr
) (oldstatus
& ~1L);
174 if (thr
->p_priority
>= maxprio
) {
176 maxprio
= thr
->p_priority
;
178 ptr
= &(thr
->p_nextlock
);
179 /* Prevent reordering of the load of lock->__status above and the
180 load of *ptr below, as well as reordering of *ptr between
181 several iterations of the while loop. Some processors (e.g.
182 multiprocessor Alphas) could perform such reordering even though
183 the loads are dependent. */
184 READ_MEMORY_BARRIER();
187 /* Prevent reordering of the load of lock->__status above and
188 thr->p_nextlock below */
189 READ_MEMORY_BARRIER();
190 /* Remove max prio thread from waiting list. */
191 if (maxptr
== (pthread_descr
*) &lock
->__status
) {
192 /* If max prio thread is at head, remove it with compare-and-swap
193 to guard against concurrent lock operation. This removal
194 also has the side effect of marking the lock as released
195 because the new status comes from thr->p_nextlock whose
196 least significant bit is clear. */
197 thr
= (pthread_descr
) (oldstatus
& ~1L);
198 if (! __compare_and_swap_with_release_semantics
199 (&lock
->__status
, oldstatus
, (long)(thr
->p_nextlock
)))
202 /* No risk of concurrent access, remove max prio thread normally.
203 But in this case we must also flip the least significant bit
204 of the status to mark the lock as released. */
206 *maxptr
= thr
->p_nextlock
;
209 oldstatus
= lock
->__status
;
210 } while (!__compare_and_swap_with_release_semantics(&lock
->__status
,
211 oldstatus
, oldstatus
& ~1L));
213 /* Prevent reordering of store to *maxptr above and store to thr->p_nextlock
215 WRITE_MEMORY_BARRIER();
216 /* Wake up the selected waiting thread */
217 thr
->p_nextlock
= NULL
;
225 * Alternate fastlocks do not queue threads directly. Instead, they queue
226 * these wait queue node structures. When a timed wait wakes up due to
227 * a timeout, it can leave its wait node in the queue (because there
228 * is no safe way to remove from the quue). Some other thread will
229 * deallocate the abandoned node.
234 struct wait_node
*next
; /* Next node in null terminated linked list */
235 pthread_descr thr
; /* The thread waiting with this node */
236 int abandoned
; /* Atomic flag */
239 static long wait_node_free_list
;
240 static int wait_node_free_list_spinlock
;
242 /* Allocate a new node from the head of the free list using an atomic
243 operation, or else using malloc if that list is empty. A fundamental
244 assumption here is that we can safely access wait_node_free_list->next.
245 That's because we never free nodes once we allocate them, so a pointer to a
246 node remains valid indefinitely. */
248 static struct wait_node
*wait_node_alloc(void)
250 long oldvalue
, newvalue
;
253 oldvalue
= wait_node_free_list
;
256 return malloc(sizeof *wait_node_alloc());
258 newvalue
= (long) ((struct wait_node
*) oldvalue
)->next
;
259 WRITE_MEMORY_BARRIER();
260 } while (! compare_and_swap(&wait_node_free_list
, oldvalue
, newvalue
,
261 &wait_node_free_list_spinlock
));
263 return (struct wait_node
*) oldvalue
;
266 /* Return a node to the head of the free list using an atomic
269 static void wait_node_free(struct wait_node
*wn
)
271 long oldvalue
, newvalue
;
274 oldvalue
= wait_node_free_list
;
275 wn
->next
= (struct wait_node
*) oldvalue
;
276 newvalue
= (long) wn
;
277 WRITE_MEMORY_BARRIER();
278 } while (! compare_and_swap(&wait_node_free_list
, oldvalue
, newvalue
,
279 &wait_node_free_list_spinlock
));
282 /* Remove a wait node from the specified queue. It is assumed
283 that the removal takes place concurrently with only atomic insertions at the
284 head of the queue. */
286 static void wait_node_dequeue(struct wait_node
**pp_head
,
287 struct wait_node
**pp_node
,
288 struct wait_node
*p_node
,
291 long oldvalue
, newvalue
;
293 /* If the node is being deleted from the head of the
294 list, it must be deleted using atomic compare-and-swap.
295 Otherwise it can be deleted in the straightforward way. */
297 if (pp_node
== pp_head
) {
298 oldvalue
= (long) p_node
;
299 newvalue
= (long) p_node
->next
;
301 if (compare_and_swap((long *) pp_node
, oldvalue
, newvalue
, spinlock
))
304 /* Oops! Compare and swap failed, which means the node is
305 no longer first. We delete it using the ordinary method. But we don't
306 know the identity of the node which now holds the pointer to the node
307 being deleted, so we must search from the beginning. */
309 for (pp_node
= pp_head
; *pp_node
!= p_node
; pp_node
= &(*pp_node
)->next
)
313 *pp_node
= p_node
->next
;
317 void __pthread_alt_lock(struct _pthread_fastlock
* lock
,
320 struct wait_node wait_node
;
321 long oldstatus
, newstatus
;
324 oldstatus
= lock
->__status
;
325 if (oldstatus
== 0) {
329 wait_node
.thr
= self
= thread_self();
330 newstatus
= (long) &wait_node
;
332 wait_node
.abandoned
= 0;
333 wait_node
.next
= (struct wait_node
*) oldstatus
;
334 /* Make sure the store in wait_node.next completes before performing
335 the compare-and-swap */
337 } while(! compare_and_swap(&lock
->__status
, oldstatus
, newstatus
,
340 /* Suspend. Note that unlike in __pthread_lock, we don't worry
341 here about spurious wakeup. That's because this lock is not
342 used in situations where that can happen; the restart can
343 only come from the previous lock owner. */
349 /* Timed-out lock operation; returns 0 to indicate timeout. */
351 int __pthread_alt_timedlock(struct _pthread_fastlock
* lock
,
352 pthread_descr self
, const struct timespec
*abstime
)
354 struct wait_node
*p_wait_node
= wait_node_alloc();
355 long oldstatus
, newstatus
;
357 /* Out of memory, just give up and do ordinary lock. */
358 if (p_wait_node
== 0) {
359 __pthread_alt_lock(lock
, self
);
364 oldstatus
= lock
->__status
;
365 if (oldstatus
== 0) {
369 p_wait_node
->thr
= self
= thread_self();
370 newstatus
= (long) p_wait_node
;
372 p_wait_node
->abandoned
= 0;
373 p_wait_node
->next
= (struct wait_node
*) oldstatus
;
374 /* Make sure the store in wait_node.next completes before performing
375 the compare-and-swap */
377 } while(! compare_and_swap(&lock
->__status
, oldstatus
, newstatus
,
380 /* If we did not get the lock, do a timed suspend. If we wake up due
381 to a timeout, then there is a race; the old lock owner may try
382 to remove us from the queue. This race is resolved by us and the owner
383 doing an atomic testandset() to change the state of the wait node from 0
384 to 1. If we succeed, then it's a timeout and we abandon the node in the
385 queue. If we fail, it means the owner gave us the lock. */
387 if (oldstatus
!= 0) {
388 if (timedsuspend(self
, abstime
) == 0) {
389 if (!testandset(&p_wait_node
->abandoned
))
390 return 0; /* Timeout! */
392 /* Eat oustanding resume from owner, otherwise wait_node_free() below
393 will race with owner's wait_node_dequeue(). */
398 wait_node_free(p_wait_node
);
400 return 1; /* Got the lock! */
403 void __pthread_alt_unlock(struct _pthread_fastlock
*lock
)
406 struct wait_node
*p_node
, **pp_node
, *p_max_prio
, **pp_max_prio
;
407 struct wait_node
** const pp_head
= (struct wait_node
**) &lock
->__status
;
412 /* If no threads are waiting for this lock, try to just
413 atomically release it. */
415 oldstatus
= lock
->__status
;
416 if (oldstatus
== 0 || oldstatus
== 1) {
417 if (compare_and_swap_with_release_semantics (&lock
->__status
, oldstatus
,
418 0, &lock
->__spinlock
))
424 /* Process the entire queue of wait nodes. Remove all abandoned
425 wait nodes and put them into the global free queue, and
426 remember the one unabandoned node which refers to the thread
427 having the highest priority. */
429 pp_max_prio
= pp_node
= pp_head
;
430 p_max_prio
= p_node
= *pp_head
;
433 while (p_node
!= (struct wait_node
*) 1) {
436 if (p_node
->abandoned
) {
437 /* Remove abandoned node. */
438 wait_node_dequeue(pp_head
, pp_node
, p_node
, &lock
->__spinlock
);
439 wait_node_free(p_node
);
440 READ_MEMORY_BARRIER();
443 } else if ((prio
= p_node
->thr
->p_priority
) >= maxprio
) {
444 /* Otherwise remember it if its thread has a higher or equal priority
445 compared to that of any node seen thus far. */
447 pp_max_prio
= pp_node
;
451 pp_node
= &p_node
->next
;
452 READ_MEMORY_BARRIER();
456 READ_MEMORY_BARRIER();
458 /* If all threads abandoned, go back to top */
459 if (maxprio
== INT_MIN
)
462 ASSERT (p_max_prio
!= (struct wait_node
*) 1);
464 /* Now we want to to remove the max priority thread's wait node from
465 the list. Before we can do this, we must atomically try to change the
466 node's abandon state from zero to nonzero. If we succeed, that means we
467 have the node that we will wake up. If we failed, then it means the
468 thread timed out and abandoned the node in which case we repeat the
469 whole unlock operation. */
471 if (!testandset(&p_max_prio
->abandoned
)) {
472 wait_node_dequeue(pp_head
, pp_max_prio
, p_max_prio
, &lock
->__spinlock
);
473 WRITE_MEMORY_BARRIER();
474 restart(p_max_prio
->thr
);
481 /* Compare-and-swap emulation with a spinlock */
483 #ifdef TEST_FOR_COMPARE_AND_SWAP
484 int __pthread_has_cas
= 0;
487 #if !defined HAS_COMPARE_AND_SWAP || defined TEST_FOR_COMPARE_AND_SWAP
489 static void __pthread_acquire(int * spinlock
);
491 int __pthread_compare_and_swap(long * ptr
, long oldval
, long newval
,
495 if (testandset(spinlock
)) __pthread_acquire(spinlock
);
496 if (*ptr
== oldval
) {
497 *ptr
= newval
; res
= 1;
501 /* Prevent reordering of store to *ptr above and store to *spinlock below */
502 WRITE_MEMORY_BARRIER();
507 /* This function is called if the inlined test-and-set
508 in __pthread_compare_and_swap() failed */
510 /* The retry strategy is as follows:
511 - We test and set the spinlock MAX_SPIN_COUNT times, calling
512 sched_yield() each time. This gives ample opportunity for other
513 threads with priority >= our priority to make progress and
514 release the spinlock.
515 - If a thread with priority < our priority owns the spinlock,
516 calling sched_yield() repeatedly is useless, since we're preventing
517 the owning thread from making progress and releasing the spinlock.
518 So, after MAX_SPIN_LOCK attemps, we suspend the calling thread
519 using nanosleep(). This again should give time to the owning thread
520 for releasing the spinlock.
521 Notice that the nanosleep() interval must not be too small,
522 since the kernel does busy-waiting for short intervals in a realtime
523 process (!). The smallest duration that guarantees thread
524 suspension is currently 2ms.
525 - When nanosleep() returns, we try again, doing MAX_SPIN_COUNT
526 sched_yield(), then sleeping again if needed. */
528 static void __pthread_acquire(int * spinlock
)
533 while (testandset(spinlock
)) {
534 if (cnt
< MAX_SPIN_COUNT
) {
539 tm
.tv_nsec
= SPIN_SLEEP_DURATION
;
540 nanosleep(&tm
, NULL
);