From: Emeric Brun Date: Fri, 7 Jul 2017 08:26:46 +0000 (+0200) Subject: MINOR: threads: Add atomic-ops and plock includes in import dir X-Git-Tag: v1.8-rc1~168 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=7122ab31b195edb511fecf9c20904701970b195f;p=thirdparty%2Fhaproxy.git MINOR: threads: Add atomic-ops and plock includes in import dir atomic-ops header contains some low-level functions to do atomic operations. These operations are used by the progressive locks (plock). --- diff --git a/include/import/atomic-ops.h b/include/import/atomic-ops.h new file mode 100644 index 0000000000..ae2c075c1d --- /dev/null +++ b/include/import/atomic-ops.h @@ -0,0 +1,510 @@ +#ifndef PL_ATOMIC_OPS_H +#define PL_ATOMIC_OPS_H + + +/* compiler-only memory barrier, for use around locks */ +static inline void pl_barrier() +{ + asm volatile("" ::: "memory"); +} + +/* full memory barrier */ +static inline void pl_mb() +{ + __sync_synchronize(); +} + +#if defined(__i386__) || defined (__i486__) || defined (__i586__) || defined (__i686__) || defined (__x86_64__) + +/* + * Generic functions common to the x86 family + */ + +static inline void pl_cpu_relax() +{ + asm volatile("rep;nop\n"); +} + +/* increment integer value pointed to by pointer , and return non-zero if + * result is non-null. + */ +#define pl_inc(ptr) ( \ + (sizeof(long) == 8 && sizeof(*(ptr)) == 8) ? ({ \ + unsigned char ret; \ + asm volatile("lock incq %0\n" \ + "setne %1\n" \ + : "+m" (*(ptr)), "=qm" (ret) \ + : \ + : "cc"); \ + ret; /* return value */ \ + }) : (sizeof(*(ptr)) == 4) ? ({ \ + unsigned char ret; \ + asm volatile("lock incl %0\n" \ + "setne %1\n" \ + : "+m" (*(ptr)), "=qm" (ret) \ + : \ + : "cc"); \ + ret; /* return value */ \ + }) : (sizeof(*(ptr)) == 2) ? ({ \ + unsigned char ret; \ + asm volatile("lock incw %0\n" \ + "setne %1\n" \ + : "+m" (*(ptr)), "=qm" (ret) \ + : \ + : "cc"); \ + ret; /* return value */ \ + }) : (sizeof(*(ptr)) == 1) ? ({ \ + unsigned char ret; \ + asm volatile("lock incb %0\n" \ + "setne %1\n" \ + : "+m" (*(ptr)), "=qm" (ret) \ + : \ + : "cc"); \ + ret; /* return value */ \ + }) : ({ \ + void __unsupported_argument_size_for_pl_inc__(char *,int); \ + __unsupported_argument_size_for_pl_inc__(__FILE__,__LINE__); \ + 0; \ + }) \ +) + +/* decrement integer value pointed to by pointer , and return non-zero if + * result is non-null. + */ +#define pl_dec(ptr) ( \ + (sizeof(long) == 8 && sizeof(*(ptr)) == 8) ? ({ \ + unsigned char ret; \ + asm volatile("lock decq %0\n" \ + "setne %1\n" \ + : "+m" (*(ptr)), "=qm" (ret) \ + : \ + : "cc"); \ + ret; /* return value */ \ + }) : (sizeof(*(ptr)) == 4) ? ({ \ + unsigned char ret; \ + asm volatile("lock decl %0\n" \ + "setne %1\n" \ + : "+m" (*(ptr)), "=qm" (ret) \ + : \ + : "cc"); \ + ret; /* return value */ \ + }) : (sizeof(*(ptr)) == 2) ? ({ \ + unsigned char ret; \ + asm volatile("lock decw %0\n" \ + "setne %1\n" \ + : "+m" (*(ptr)), "=qm" (ret) \ + : \ + : "cc"); \ + ret; /* return value */ \ + }) : (sizeof(*(ptr)) == 1) ? ({ \ + unsigned char ret; \ + asm volatile("lock decb %0\n" \ + "setne %1\n" \ + : "+m" (*(ptr)), "=qm" (ret) \ + : \ + : "cc"); \ + ret; /* return value */ \ + }) : ({ \ + void __unsupported_argument_size_for_pl_dec__(char *,int); \ + __unsupported_argument_size_for_pl_dec__(__FILE__,__LINE__); \ + 0; \ + }) \ +) + +/* increment integer value pointed to by pointer , no return */ +#define pl_inc_noret(ptr) ({ \ + if (sizeof(long) == 8 && sizeof(*(ptr)) == 8) { \ + asm volatile("lock incq %0\n" \ + : "+m" (*(ptr)) \ + : \ + : "cc"); \ + } else if (sizeof(*(ptr)) == 4) { \ + asm volatile("lock incl %0\n" \ + : "+m" (*(ptr)) \ + : \ + : "cc"); \ + } else if (sizeof(*(ptr)) == 2) { \ + asm volatile("lock incw %0\n" \ + : "+m" (*(ptr)) \ + : \ + : "cc"); \ + } else if (sizeof(*(ptr)) == 1) { \ + asm volatile("lock incb %0\n" \ + : "+m" (*(ptr)) \ + : \ + : "cc"); \ + } else { \ + void __unsupported_argument_size_for_pl_inc_noret__(char *,int); \ + __unsupported_argument_size_for_pl_inc_noret__(__FILE__,__LINE__); \ + } \ +}) + +/* decrement integer value pointed to by pointer , no return */ +#define pl_dec_noret(ptr) ({ \ + if (sizeof(long) == 8 && sizeof(*(ptr)) == 8) { \ + asm volatile("lock decq %0\n" \ + : "+m" (*(ptr)) \ + : \ + : "cc"); \ + } else if (sizeof(*(ptr)) == 4) { \ + asm volatile("lock decl %0\n" \ + : "+m" (*(ptr)) \ + : \ + : "cc"); \ + } else if (sizeof(*(ptr)) == 2) { \ + asm volatile("lock decw %0\n" \ + : "+m" (*(ptr)) \ + : \ + : "cc"); \ + } else if (sizeof(*(ptr)) == 1) { \ + asm volatile("lock decb %0\n" \ + : "+m" (*(ptr)) \ + : \ + : "cc"); \ + } else { \ + void __unsupported_argument_size_for_pl_dec_noret__(char *,int); \ + __unsupported_argument_size_for_pl_dec_noret__(__FILE__,__LINE__); \ + } \ +}) + +/* add integer constant to integer value pointed to by pointer , + * no return. Size of is not checked. + */ +#define pl_add(ptr, x) ({ \ + if (sizeof(long) == 8 && sizeof(*(ptr)) == 8) { \ + asm volatile("lock addq %1, %0\n" \ + : "+m" (*(ptr)) \ + : "er" ((unsigned long)(x)) \ + : "cc"); \ + } else if (sizeof(*(ptr)) == 4) { \ + asm volatile("lock addl %1, %0\n" \ + : "+m" (*(ptr)) \ + : "er" ((unsigned int)(x)) \ + : "cc"); \ + } else if (sizeof(*(ptr)) == 2) { \ + asm volatile("lock addw %1, %0\n" \ + : "+m" (*(ptr)) \ + : "er" ((unsigned short)(x)) \ + : "cc"); \ + } else if (sizeof(*(ptr)) == 1) { \ + asm volatile("lock addb %1, %0\n" \ + : "+m" (*(ptr)) \ + : "er" ((unsigned char)(x)) \ + : "cc"); \ + } else { \ + void __unsupported_argument_size_for_pl_add__(char *,int); \ + __unsupported_argument_size_for_pl_add__(__FILE__,__LINE__); \ + } \ +}) + +/* subtract integer constant from integer value pointed to by pointer + * , no return. Size of is not checked. + */ +#define pl_sub(ptr, x) ({ \ + if (sizeof(long) == 8 && sizeof(*(ptr)) == 8) { \ + asm volatile("lock subq %1, %0\n" \ + : "+m" (*(ptr)) \ + : "er" ((unsigned long)(x)) \ + : "cc"); \ + } else if (sizeof(*(ptr)) == 4) { \ + asm volatile("lock subl %1, %0\n" \ + : "+m" (*(ptr)) \ + : "er" ((unsigned int)(x)) \ + : "cc"); \ + } else if (sizeof(*(ptr)) == 2) { \ + asm volatile("lock subw %1, %0\n" \ + : "+m" (*(ptr)) \ + : "er" ((unsigned short)(x)) \ + : "cc"); \ + } else if (sizeof(*(ptr)) == 1) { \ + asm volatile("lock subb %1, %0\n" \ + : "+m" (*(ptr)) \ + : "er" ((unsigned char)(x)) \ + : "cc"); \ + } else { \ + void __unsupported_argument_size_for_pl_sub__(char *,int); \ + __unsupported_argument_size_for_pl_sub__(__FILE__,__LINE__); \ + } \ +}) + +/* binary and integer value pointed to by pointer with constant , no + * return. Size of is not checked. + */ +#define pl_and(ptr, x) ({ \ + if (sizeof(long) == 8 && sizeof(*(ptr)) == 8) { \ + asm volatile("lock andq %1, %0\n" \ + : "+m" (*(ptr)) \ + : "er" ((unsigned long)(x)) \ + : "cc"); \ + } else if (sizeof(*(ptr)) == 4) { \ + asm volatile("lock andl %1, %0\n" \ + : "+m" (*(ptr)) \ + : "er" ((unsigned int)(x)) \ + : "cc"); \ + } else if (sizeof(*(ptr)) == 2) { \ + asm volatile("lock andw %1, %0\n" \ + : "+m" (*(ptr)) \ + : "er" ((unsigned short)(x)) \ + : "cc"); \ + } else if (sizeof(*(ptr)) == 1) { \ + asm volatile("lock andb %1, %0\n" \ + : "+m" (*(ptr)) \ + : "er" ((unsigned char)(x)) \ + : "cc"); \ + } else { \ + void __unsupported_argument_size_for_pl_and__(char *,int); \ + __unsupported_argument_size_for_pl_and__(__FILE__,__LINE__); \ + } \ +}) + +/* binary or integer value pointed to by pointer with constant , no + * return. Size of is not checked. + */ +#define pl_or(ptr, x) ({ \ + if (sizeof(long) == 8 && sizeof(*(ptr)) == 8) { \ + asm volatile("lock orq %1, %0\n" \ + : "+m" (*(ptr)) \ + : "er" ((unsigned long)(x)) \ + : "cc"); \ + } else if (sizeof(*(ptr)) == 4) { \ + asm volatile("lock orl %1, %0\n" \ + : "+m" (*(ptr)) \ + : "er" ((unsigned int)(x)) \ + : "cc"); \ + } else if (sizeof(*(ptr)) == 2) { \ + asm volatile("lock orw %1, %0\n" \ + : "+m" (*(ptr)) \ + : "er" ((unsigned short)(x)) \ + : "cc"); \ + } else if (sizeof(*(ptr)) == 1) { \ + asm volatile("lock orb %1, %0\n" \ + : "+m" (*(ptr)) \ + : "er" ((unsigned char)(x)) \ + : "cc"); \ + } else { \ + void __unsupported_argument_size_for_pl_or__(char *,int); \ + __unsupported_argument_size_for_pl_or__(__FILE__,__LINE__); \ + } \ +}) + +/* binary xor integer value pointed to by pointer with constant , no + * return. Size of is not checked. + */ +#define pl_xor(ptr, x) ({ \ + if (sizeof(long) == 8 && sizeof(*(ptr)) == 8) { \ + asm volatile("lock xorq %1, %0\n" \ + : "+m" (*(ptr)) \ + : "er" ((unsigned long)(x)) \ + : "cc"); \ + } else if (sizeof(*(ptr)) == 4) { \ + asm volatile("lock xorl %1, %0\n" \ + : "+m" (*(ptr)) \ + : "er" ((unsigned int)(x)) \ + : "cc"); \ + } else if (sizeof(*(ptr)) == 2) { \ + asm volatile("lock xorw %1, %0\n" \ + : "+m" (*(ptr)) \ + : "er" ((unsigned short)(x)) \ + : "cc"); \ + } else if (sizeof(*(ptr)) == 1) { \ + asm volatile("lock xorb %1, %0\n" \ + : "+m" (*(ptr)) \ + : "er" ((unsigned char)(x)) \ + : "cc"); \ + } else { \ + void __unsupported_argument_size_for_pl_xor__(char *,int); \ + __unsupported_argument_size_for_pl_xor__(__FILE__,__LINE__); \ + } \ +}) + +/* test and set bit in integer value pointed to by pointer . Returns + * 0 if the bit was not set, or ~0 of the same type as *ptr if it was set. Note + * that there is no 8-bit equivalent operation. + */ +#define pl_bts(ptr, bit) ( \ + (sizeof(long) == 8 && sizeof(*(ptr)) == 8) ? ({ \ + unsigned long ret; \ + asm volatile("lock btsq %2, %0\n\t" \ + "sbb %1, %1\n\t" \ + : "+m" (*(ptr)), "=r" (ret) \ + : "Ir" ((unsigned long)(bit)) \ + : "cc"); \ + ret; /* return value */ \ + }) : (sizeof(*(ptr)) == 4) ? ({ \ + unsigned int ret; \ + asm volatile("lock btsl %2, %0\n\t" \ + "sbb %1, %1\n\t" \ + : "+m" (*(ptr)), "=r" (ret) \ + : "Ir" ((unsigned int)(bit)) \ + : "cc"); \ + ret; /* return value */ \ + }) : (sizeof(*(ptr)) == 2) ? ({ \ + unsigned short ret; \ + asm volatile("lock btsw %2, %0\n\t" \ + "sbb %1, %1\n\t" \ + : "+m" (*(ptr)), "=r" (ret) \ + : "Ir" ((unsigned short)(bit)) \ + : "cc"); \ + ret; /* return value */ \ + }) : ({ \ + void __unsupported_argument_size_for_pl_bts__(char *,int); \ + __unsupported_argument_size_for_pl_bts__(__FILE__,__LINE__); \ + 0; \ + }) \ +) + +/* Note: for an unclear reason, gcc's __sync_fetch_and_add() implementation + * produces less optimal than hand-crafted asm code so let's implement here the + * operations we need for the most common archs. + */ + +/* fetch-and-add: fetch integer value pointed to by pointer , add to + * to <*ptr> and return the previous value. + */ +#define pl_xadd(ptr, x) ( \ + (sizeof(long) == 8 && sizeof(*(ptr)) == 8) ? ({ \ + unsigned long ret = (unsigned long)(x); \ + asm volatile("lock xaddq %0, %1\n" \ + : "=r" (ret), "+m" (*(ptr)) \ + : "0" (ret) \ + : "cc"); \ + ret; /* return value */ \ + }) : (sizeof(*(ptr)) == 4) ? ({ \ + unsigned int ret = (unsigned int)(x); \ + asm volatile("lock xaddl %0, %1\n" \ + : "=r" (ret), "+m" (*(ptr)) \ + : "0" (ret) \ + : "cc"); \ + ret; /* return value */ \ + }) : (sizeof(*(ptr)) == 2) ? ({ \ + unsigned short ret = (unsigned short)(x); \ + asm volatile("lock xaddw %0, %1\n" \ + : "=r" (ret), "+m" (*(ptr)) \ + : "0" (ret) \ + : "cc"); \ + ret; /* return value */ \ + }) : (sizeof(*(ptr)) == 1) ? ({ \ + unsigned char ret = (unsigned char)(x); \ + asm volatile("lock xaddb %0, %1\n" \ + : "=r" (ret), "+m" (*(ptr)) \ + : "0" (ret) \ + : "cc"); \ + ret; /* return value */ \ + }) : ({ \ + void __unsupported_argument_size_for_pl_xadd__(char *,int); \ + __unsupported_argument_size_for_pl_xadd__(__FILE__,__LINE__); \ + 0; \ + }) \ +) + +/* exchage value with integer value pointed to by pointer , and return + * previous <*ptr> value. must be of the same size as <*ptr>. + */ +#define pl_xchg(ptr, x) ( \ + (sizeof(long) == 8 && sizeof(*(ptr)) == 8) ? ({ \ + unsigned long ret = (unsigned long)(x); \ + asm volatile("xchgq %0, %1\n" \ + : "=r" (ret), "+m" (*(ptr)) \ + : "0" (ret) \ + : "cc"); \ + ret; /* return value */ \ + }) : (sizeof(*(ptr)) == 4) ? ({ \ + unsigned int ret = (unsigned int)(x); \ + asm volatile("xchgl %0, %1\n" \ + : "=r" (ret), "+m" (*(ptr)) \ + : "0" (ret) \ + : "cc"); \ + ret; /* return value */ \ + }) : (sizeof(*(ptr)) == 2) ? ({ \ + unsigned short ret = (unsigned short)(x); \ + asm volatile("xchgw %0, %1\n" \ + : "=r" (ret), "+m" (*(ptr)) \ + : "0" (ret) \ + : "cc"); \ + ret; /* return value */ \ + }) : (sizeof(*(ptr)) == 1) ? ({ \ + unsigned char ret = (unsigned char)(x); \ + asm volatile("xchgb %0, %1\n" \ + : "=r" (ret), "+m" (*(ptr)) \ + : "0" (ret) \ + : "cc"); \ + ret; /* return value */ \ + }) : ({ \ + void __unsupported_argument_size_for_pl_xchg__(char *,int); \ + __unsupported_argument_size_for_pl_xchg__(__FILE__,__LINE__); \ + 0; \ + }) \ +) + +/* compare integer value <*ptr> with and exchange it with if + * it matches, and return . and must be of the same size as + * <*ptr>. + */ +#define pl_cmpxchg(ptr, old, new) ( \ + (sizeof(long) == 8 && sizeof(*(ptr)) == 8) ? ({ \ + unsigned long ret; \ + asm volatile("lock cmpxchgq %2,%1" \ + : "=a" (ret), "+m" (*(ptr)) \ + : "r" ((unsigned long)(new)), \ + "0" ((unsigned long)(old)) \ + : "cc"); \ + ret; /* return value */ \ + }) : (sizeof(*(ptr)) == 4) ? ({ \ + unsigned int ret; \ + asm volatile("lock cmpxchgl %2,%1" \ + : "=a" (ret), "+m" (*(ptr)) \ + : "r" ((unsigned int)(new)), \ + "0" ((unsigned int)(old)) \ + : "cc"); \ + ret; /* return value */ \ + }) : (sizeof(*(ptr)) == 2) ? ({ \ + unsigned short ret; \ + asm volatile("lock cmpxchgw %2,%1" \ + : "=a" (ret), "+m" (*(ptr)) \ + : "r" ((unsigned short)(new)), \ + "0" ((unsigned short)(old)) \ + : "cc"); \ + ret; /* return value */ \ + }) : (sizeof(*(ptr)) == 1) ? ({ \ + unsigned char ret; \ + asm volatile("lock cmpxchgb %2,%1" \ + : "=a" (ret), "+m" (*(ptr)) \ + : "r" ((unsigned char)(new)), \ + "0" ((unsigned char)(old)) \ + : "cc"); \ + ret; /* return value */ \ + }) : ({ \ + void __unsupported_argument_size_for_pl_cmpxchg__(char *,int); \ + __unsupported_argument_size_for_pl_cmpxchg__(__FILE__,__LINE__); \ + 0; \ + }) \ +) + +#else +/* generic implementations */ + +static inline void pl_cpu_relax() +{ + asm volatile(""); +} + +#define pl_inc_noret(ptr) ({ __sync_add_and_fetch((ptr), 1); }) +#define pl_dec_noret(ptr) ({ __sync_sub_and_fetch((ptr), 1); }) +#define pl_inc(ptr) ({ __sync_add_and_fetch((ptr), 1); }) +#define pl_dec(ptr) ({ __sync_sub_and_fetch((ptr), 1); }) +#define pl_add(ptr, x) ({ __sync_add_and_fetch((ptr), (x)); }) +#define pl_and(ptr, x) ({ __sync_and_and_fetch((ptr), (x)); }) +#define pl_or(ptr, x) ({ __sync_or_and_fetch((ptr), (x)); }) +#define pl_xor(ptr, x) ({ __sync_xor_and_fetch((ptr), (x)); }) +#define pl_sub(ptr, x) ({ __sync_sub_and_fetch((ptr), (x)); }) +#define pl_xadd(ptr, x) ({ __sync_fetch_and_add((ptr), (x)); }) +#define pl_cmpxchg(ptr, o, n) ({ __sync_val_compare_and_swap((ptr), (o), (n)); }) +#define pl_xchg(ptr, x) ({ typeof(*(ptr)) t; \ + do { t = *(ptr); \ + } while (!__sync_bool_compare_and_swap((ptr), t, (x))); \ + t; \ + }) + +#endif + +#endif /* PL_ATOMIC_OPS_H */ diff --git a/include/import/plock.h b/include/import/plock.h new file mode 100644 index 0000000000..5f23f223c2 --- /dev/null +++ b/include/import/plock.h @@ -0,0 +1,427 @@ +/* plock - progressive locks + * + * Copyright (C) 2012-2017 Willy Tarreau + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "atomic-ops.h" + +/* 64 bit */ +#define PLOCK64_RL_1 0x0000000000000004ULL +#define PLOCK64_RL_ANY 0x00000000FFFFFFFCULL +#define PLOCK64_SL_1 0x0000000100000000ULL +#define PLOCK64_SL_ANY 0x0000000300000000ULL +#define PLOCK64_WL_1 0x0000000400000000ULL +#define PLOCK64_WL_ANY 0xFFFFFFFC00000000ULL + +/* 32 bit */ +#define PLOCK32_RL_1 0x00000004 +#define PLOCK32_RL_ANY 0x0000FFFC +#define PLOCK32_SL_1 0x00010000 +#define PLOCK32_SL_ANY 0x00030000 +#define PLOCK32_WL_1 0x00040000 +#define PLOCK32_WL_ANY 0xFFFC0000 + +/* dereferences <*p> as unsigned long without causing aliasing issues */ +#define pl_deref_long(p) ({ volatile unsigned long *__plock_l = (void *)(p); *__plock_l; }) + +/* dereferences <*p> as unsigned int without causing aliasing issues */ +#define pl_deref_int(p) ({ volatile unsigned int *__plock_i = (void *)(p); *__plock_i; }) + +/* request shared read access (R), return non-zero on success, otherwise 0 */ +#define pl_try_r(lock) ( \ + (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ + unsigned long ret = pl_deref_long(lock) & PLOCK64_WL_ANY; \ + pl_barrier(); \ + if (!__builtin_expect(ret, 0)) { \ + ret = pl_xadd((lock), PLOCK64_RL_1) & PLOCK64_WL_ANY; \ + if (__builtin_expect(ret, 0)) \ + pl_sub((lock), PLOCK64_RL_1); \ + } \ + !ret; /* return value */ \ + }) : (sizeof(*(lock)) == 4) ? ({ \ + unsigned int ret = pl_deref_int(lock) & PLOCK32_WL_ANY; \ + pl_barrier(); \ + if (!__builtin_expect(ret, 0)) { \ + ret = pl_xadd((lock), PLOCK32_RL_1) & PLOCK32_WL_ANY; \ + if (__builtin_expect(ret, 0)) \ + pl_sub((lock), PLOCK32_RL_1); \ + } \ + !ret; /* return value */ \ + }) : ({ \ + void __unsupported_argument_size_for_pl_try_r__(char *,int); \ + __unsupported_argument_size_for_pl_try_r__(__FILE__,__LINE__); \ + 0; \ + }) \ +) + +/* request shared read access (R) and wait for it */ +#define pl_take_r(lock) \ + do { \ + while (__builtin_expect(pl_try_r(lock), 1) == 0) \ + pl_cpu_relax(); \ + } while (0) + +/* release the read access (R) lock */ +#define pl_drop_r(lock) ( \ + (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ + pl_sub(lock, PLOCK64_RL_1); \ + }) : (sizeof(*(lock)) == 4) ? ({ \ + pl_sub(lock, PLOCK32_RL_1); \ + }) : ({ \ + void __unsupported_argument_size_for_pl_drop_r__(char *,int); \ + __unsupported_argument_size_for_pl_drop_r__(__FILE__,__LINE__); \ + }) \ +) + +/* request a seek access (S), return non-zero on success, otherwise 0 */ +#define pl_try_s(lock) ( \ + (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ + unsigned long ret = pl_deref_long(lock); \ + pl_barrier(); \ + if (!__builtin_expect(ret & (PLOCK64_WL_ANY | PLOCK64_SL_ANY), 0)) { \ + ret = pl_xadd((lock), PLOCK64_SL_1 | PLOCK64_RL_1) & \ + (PLOCK64_WL_ANY | PLOCK64_SL_ANY); \ + if (__builtin_expect(ret, 0)) \ + pl_sub((lock), PLOCK64_SL_1 | PLOCK64_RL_1); \ + } \ + !ret; /* return value */ \ + }) : (sizeof(*(lock)) == 4) ? ({ \ + unsigned int ret = pl_deref_int(lock); \ + pl_barrier(); \ + if (!__builtin_expect(ret & (PLOCK32_WL_ANY | PLOCK32_SL_ANY), 0)) { \ + ret = pl_xadd((lock), PLOCK32_SL_1 | PLOCK32_RL_1) & \ + (PLOCK32_WL_ANY | PLOCK32_SL_ANY); \ + if (__builtin_expect(ret, 0)) \ + pl_sub((lock), PLOCK32_SL_1 | PLOCK32_RL_1); \ + } \ + !ret; /* return value */ \ + }) : ({ \ + void __unsupported_argument_size_for_pl_try_s__(char *,int); \ + __unsupported_argument_size_for_pl_try_s__(__FILE__,__LINE__); \ + 0; \ + }) \ +) + +/* request a seek access (S) and wait for it */ +#define pl_take_s(lock) \ + do { \ + while (__builtin_expect(pl_try_s(lock), 0) == 0) \ + pl_cpu_relax(); \ + } while (0) + +/* release the seek access (S) lock */ +#define pl_drop_s(lock) ( \ + (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ + pl_sub(lock, PLOCK64_SL_1 + PLOCK64_RL_1); \ + }) : (sizeof(*(lock)) == 4) ? ({ \ + pl_sub(lock, PLOCK32_SL_1 + PLOCK32_RL_1); \ + }) : ({ \ + void __unsupported_argument_size_for_pl_drop_s__(char *,int); \ + __unsupported_argument_size_for_pl_drop_s__(__FILE__,__LINE__); \ + }) \ +) + +/* drop the S lock and go back to the R lock */ +#define pl_stor(lock) ( \ + (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ + pl_sub(lock, PLOCK64_SL_1); \ + }) : (sizeof(*(lock)) == 4) ? ({ \ + pl_sub(lock, PLOCK32_SL_1); \ + }) : ({ \ + void __unsupported_argument_size_for_pl_stor__(char *,int); \ + __unsupported_argument_size_for_pl_stor__(__FILE__,__LINE__); \ + }) \ +) + +/* take the W lock under the S lock */ +#define pl_stow(lock) ( \ + (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ + unsigned long ret = pl_xadd((lock), PLOCK64_WL_1); \ + pl_barrier(); \ + while ((ret & PLOCK64_RL_ANY) != PLOCK64_RL_1) \ + ret = pl_deref_long(lock); \ + }) : (sizeof(*(lock)) == 4) ? ({ \ + unsigned int ret = pl_xadd((lock), PLOCK32_WL_1); \ + pl_barrier(); \ + while ((ret & PLOCK32_RL_ANY) != PLOCK32_RL_1) \ + ret = pl_deref_int(lock); \ + }) : ({ \ + void __unsupported_argument_size_for_pl_stow__(char *,int); \ + __unsupported_argument_size_for_pl_stow__(__FILE__,__LINE__); \ + }) \ +) + +/* drop the W lock and go back to the S lock */ +#define pl_wtos(lock) ( \ + (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ + pl_sub(lock, PLOCK64_WL_1); \ + }) : (sizeof(*(lock)) == 4) ? ({ \ + pl_sub(lock, PLOCK32_WL_1); \ + }) : ({ \ + void __unsupported_argument_size_for_pl_wtos__(char *,int); \ + __unsupported_argument_size_for_pl_wtos__(__FILE__,__LINE__); \ + }) \ +) + +/* drop the W lock and go back to the R lock */ +#define pl_wtor(lock) ( \ + (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ + pl_sub(lock, PLOCK64_WL_1 | PLOCK64_SL_1); \ + }) : (sizeof(*(lock)) == 4) ? ({ \ + pl_sub(lock, PLOCK32_WL_1 | PLOCK32_SL_1); \ + }) : ({ \ + void __unsupported_argument_size_for_pl_wtor__(char *,int); \ + __unsupported_argument_size_for_pl_wtor__(__FILE__,__LINE__); \ + }) \ +) + +/* request a write access (W), return non-zero on success, otherwise 0. + * + * Below there is something important : by taking both W and S, we will cause + * an overflow of W at 4/5 of the maximum value that can be stored into W due + * to the fact that S is 2 bits, so we're effectively adding 5 to the word + * composed by W:S. But for all words multiple of 4 bits, the maximum value is + * multiple of 15 thus of 5. So the largest value we can store with all bits + * set to one will be met by adding 5, and then adding 5 again will place value + * 1 in W and value 0 in S, so we never leave W with 0. Also, even upon such an + * overflow, there's no risk to confuse it with an atomic lock because R is not + * null since it will not have overflown. For 32-bit locks, this situation + * happens when exactly 13108 threads try to grab the lock at once, W=1, S=0 + * and R=13108. For 64-bit locks, it happens at 858993460 concurrent writers + * where W=1, S=0 and R=858993460. + */ +#define pl_try_w(lock) ( \ + (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ + unsigned long ret = pl_deref_long(lock); \ + pl_barrier(); \ + if (!__builtin_expect(ret & (PLOCK64_WL_ANY | PLOCK64_SL_ANY), 0)) { \ + ret = pl_xadd((lock), PLOCK64_WL_1 | PLOCK64_SL_1 | PLOCK64_RL_1); \ + if (__builtin_expect(ret & (PLOCK64_WL_ANY | PLOCK64_SL_ANY), 0)) { \ + /* a writer, seeker or atomic is present, let's leave */ \ + pl_sub((lock), PLOCK64_WL_1 | PLOCK64_SL_1 | PLOCK64_RL_1); \ + ret &= (PLOCK64_WL_ANY | PLOCK64_SL_ANY); /* return value */ \ + } else { \ + /* wait for all other readers to leave */ \ + while (ret) \ + ret = pl_deref_long(lock) - \ + (PLOCK64_WL_1 | PLOCK64_SL_1 | PLOCK64_RL_1); \ + ret = 0; \ + } \ + } \ + !ret; /* return value */ \ + }) : (sizeof(*(lock)) == 4) ? ({ \ + unsigned int ret = pl_deref_int(lock); \ + pl_barrier(); \ + if (!__builtin_expect(ret & (PLOCK32_WL_ANY | PLOCK32_SL_ANY), 0)) { \ + ret = pl_xadd((lock), PLOCK32_WL_1 | PLOCK32_SL_1 | PLOCK32_RL_1); \ + if (__builtin_expect(ret & (PLOCK32_WL_ANY | PLOCK32_SL_ANY), 0)) { \ + /* a writer, seeker or atomic is present, let's leave */ \ + pl_sub((lock), PLOCK32_WL_1 | PLOCK32_SL_1 | PLOCK32_RL_1); \ + ret &= (PLOCK32_WL_ANY | PLOCK32_SL_ANY); /* return value */ \ + } else { \ + /* wait for all other readers to leave */ \ + while (ret) \ + ret = pl_deref_int(lock) - \ + (PLOCK32_WL_1 | PLOCK32_SL_1 | PLOCK32_RL_1); \ + ret = 0; \ + } \ + } \ + !ret; /* return value */ \ + }) : ({ \ + void __unsupported_argument_size_for_pl_try_w__(char *,int); \ + __unsupported_argument_size_for_pl_try_w__(__FILE__,__LINE__); \ + 0; \ + }) \ +) + +/* request a seek access (W) and wait for it */ +#define pl_take_w(lock) \ + do { \ + while (__builtin_expect(pl_try_w(lock), 0) == 0) \ + pl_cpu_relax(); \ + } while (0) + +/* drop the write (W) lock entirely */ +#define pl_drop_w(lock) ( \ + (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ + pl_sub(lock, PLOCK64_WL_1 | PLOCK64_SL_1 | PLOCK64_RL_1); \ + }) : (sizeof(*(lock)) == 4) ? ({ \ + pl_sub(lock, PLOCK32_WL_1 | PLOCK32_SL_1 | PLOCK32_RL_1); \ + }) : ({ \ + void __unsupported_argument_size_for_pl_drop_w__(char *,int); \ + __unsupported_argument_size_for_pl_drop_w__(__FILE__,__LINE__); \ + }) \ +) + +/* Try to upgrade from R to S, return non-zero on success, otherwise 0. + * This lock will fail if S or W are already held. In case of failure to grab + * the lock, it MUST NOT be retried without first dropping R, or it may never + * complete due to S waiting for R to leave before upgrading to W. + */ +#define pl_try_rtos(lock) ( \ + (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ + unsigned long ret = pl_deref_long(lock); \ + pl_barrier(); \ + if (!__builtin_expect(ret & (PLOCK64_WL_ANY | PLOCK64_SL_ANY), 0)) { \ + ret = pl_xadd((lock), PLOCK64_SL_1) & \ + (PLOCK64_WL_ANY | PLOCK64_SL_ANY); \ + if (__builtin_expect(ret, 0)) \ + pl_sub((lock), PLOCK64_SL_1); \ + } \ + !ret; /* return value */ \ + }) : (sizeof(*(lock)) == 4) ? ({ \ + unsigned int ret = pl_deref_int(lock); \ + pl_barrier(); \ + if (!__builtin_expect(ret & (PLOCK32_WL_ANY | PLOCK32_SL_ANY), 0)) { \ + ret = pl_xadd((lock), PLOCK32_SL_1) & \ + (PLOCK32_WL_ANY | PLOCK32_SL_ANY); \ + if (__builtin_expect(ret, 0)) \ + pl_sub((lock), PLOCK32_SL_1); \ + } \ + !ret; /* return value */ \ + }) : ({ \ + void __unsupported_argument_size_for_pl_try_rtos__(char *,int); \ + __unsupported_argument_size_for_pl_try_rtos__(__FILE__,__LINE__); \ + 0; \ + }) \ +) + + +/* request atomic write access (A), return non-zero on success, otherwise 0. + * It's a bit tricky as we only use the W bits for this and want to distinguish + * between other atomic users and regular lock users. We have to give up if an + * S lock appears. It's possible that such a lock stays hidden in the W bits + * after an overflow, but in this case R is still held, ensuring we stay in the + * loop until we discover the conflict. The lock only return successfully if all + * readers are gone (or converted to A). + */ +#define pl_try_a(lock) ( \ + (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ + unsigned long ret = pl_deref_long(lock) & PLOCK64_SL_ANY; \ + pl_barrier(); \ + if (!__builtin_expect(ret, 0)) { \ + ret = pl_xadd((lock), PLOCK64_WL_1); \ + while (1) { \ + if (__builtin_expect(ret & PLOCK64_SL_ANY, 0)) { \ + pl_sub((lock), PLOCK64_WL_1); \ + break; /* return !ret */ \ + } \ + ret &= PLOCK64_RL_ANY; \ + if (!__builtin_expect(ret, 0)) \ + break; /* return !ret */ \ + ret = pl_deref_long(lock); \ + } \ + } \ + !ret; /* return value */ \ + }) : (sizeof(*(lock)) == 4) ? ({ \ + unsigned int ret = pl_deref_int(lock) & PLOCK32_SL_ANY; \ + pl_barrier(); \ + if (!__builtin_expect(ret, 0)) { \ + ret = pl_xadd((lock), PLOCK32_WL_1); \ + while (1) { \ + if (__builtin_expect(ret & PLOCK32_SL_ANY, 0)) { \ + pl_sub((lock), PLOCK32_WL_1); \ + break; /* return !ret */ \ + } \ + ret &= PLOCK32_RL_ANY; \ + if (!__builtin_expect(ret, 0)) \ + break; /* return !ret */ \ + ret = pl_deref_int(lock); \ + } \ + } \ + !ret; /* return value */ \ + }) : ({ \ + void __unsupported_argument_size_for_pl_try_a__(char *,int); \ + __unsupported_argument_size_for_pl_try_a__(__FILE__,__LINE__); \ + 0; \ + }) \ +) + +/* request atomic write access (A) and wait for it */ +#define pl_take_a(lock) \ + do { \ + while (__builtin_expect(pl_try_a(lock), 1) == 0) \ + pl_cpu_relax(); \ + } while (0) + +/* release atomic write access (A) lock */ +#define pl_drop_a(lock) ( \ + (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ + pl_sub(lock, PLOCK64_WL_1); \ + }) : (sizeof(*(lock)) == 4) ? ({ \ + pl_sub(lock, PLOCK32_WL_1); \ + }) : ({ \ + void __unsupported_argument_size_for_pl_drop_a__(char *,int); \ + __unsupported_argument_size_for_pl_drop_a__(__FILE__,__LINE__); \ + }) \ +) + +/* Try to upgrade from R to A, return non-zero on success, otherwise 0. + * This lock will fail if S is held or appears while waiting (typically due to + * a previous grab that was disguised as a W due to an overflow). In case of + * failure to grab the lock, it MUST NOT be retried without first dropping R, + * or it may never complete due to S waiting for R to leave before upgrading + * to W. The lock succeeds once there's no more R (ie all of them have either + * completed or were turned to A). + */ +#define pl_try_rtoa(lock) ( \ + (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ + unsigned long ret = pl_deref_long(lock) & PLOCK64_SL_ANY; \ + pl_barrier(); \ + if (!__builtin_expect(ret, 0)) { \ + ret = pl_xadd((lock), PLOCK64_WL_1 - PLOCK64_RL_1); \ + while (1) { \ + if (__builtin_expect(ret & PLOCK64_SL_ANY, 0)) { \ + pl_sub((lock), PLOCK64_WL_1 - PLOCK64_RL_1); \ + break; /* return !ret */ \ + } \ + ret &= PLOCK64_RL_ANY; \ + if (!__builtin_expect(ret, 0)) \ + break; /* return !ret */ \ + ret = pl_deref_long(lock); \ + } \ + } \ + !ret; /* return value */ \ + }) : (sizeof(*(lock)) == 4) ? ({ \ + unsigned int ret = pl_deref_int(lock) & PLOCK32_SL_ANY; \ + pl_barrier(); \ + if (!__builtin_expect(ret, 0)) { \ + ret = pl_xadd((lock), PLOCK32_WL_1 - PLOCK32_RL_1); \ + while (1) { \ + if (__builtin_expect(ret & PLOCK32_SL_ANY, 0)) { \ + pl_sub((lock), PLOCK32_WL_1 - PLOCK32_RL_1); \ + break; /* return !ret */ \ + } \ + ret &= PLOCK32_RL_ANY; \ + if (!__builtin_expect(ret, 0)) \ + break; /* return !ret */ \ + ret = pl_deref_int(lock); \ + } \ + } \ + !ret; /* return value */ \ + }) : ({ \ + void __unsupported_argument_size_for_pl_try_rtoa__(char *,int); \ + __unsupported_argument_size_for_pl_try_rtoa__(__FILE__,__LINE__); \ + 0; \ + }) \ +)