From 9e63275633b27377ff402f5bdde64584b7461a01 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 1 Jun 2006 23:18:11 +0000 Subject: [PATCH] Back-port Postgres 7.4 spinlock code into 7.3 branch. This adds previously-missing spinlock code for x86_64 and ppc64 architectures, converts the ppc/ppc64 code into gcc inlines, and provides a better spinlock backoff algorithm on all architectures. Aside from being almost identical to the community 7.4 source code, this exact patch has been in use for awhile in Red Hat's RHEL3 RPMs, so I have pretty good confidence in it. Why bother, you ask? I'm taking pity on a couple of buildfarm members that have been vainly trying to build 7.3 on these 64-bit architectures. --- src/backend/storage/lmgr/s_lock.c | 174 +++++++++++++++--------------- src/include/port/linux.h | 12 ++- src/include/storage/s_lock.h | 57 ++++++++-- 3 files changed, 143 insertions(+), 100 deletions(-) diff --git a/src/backend/storage/lmgr/s_lock.c b/src/backend/storage/lmgr/s_lock.c index a362737514e..2f4d0fc9ce0 100644 --- a/src/backend/storage/lmgr/s_lock.c +++ b/src/backend/storage/lmgr/s_lock.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/s_lock.c,v 1.9.2.1 2006/05/11 22:00:12 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/s_lock.c,v 1.9.2.2 2006/06/01 23:18:11 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -27,13 +27,15 @@ static void s_lock_stuck(volatile slock_t *lock, const char *file, int line) { +#if defined(S_LOCK_TEST) fprintf(stderr, - "\nFATAL: s_lock(%p) at %s:%d, stuck spinlock. Aborting.\n", - lock, file, line); - fprintf(stdout, - "\nFATAL: s_lock(%p) at %s:%d, stuck spinlock. Aborting.\n", + "\nStuck spinlock (%p) detected at %s:%d.\n", lock, file, line); - abort(); + exit(1); +#else + elog(PANIC, "stuck spinlock (%p) detected at %s:%d", + lock, file, line); +#endif } @@ -43,35 +45,70 @@ s_lock_stuck(volatile slock_t *lock, const char *file, int line) void s_lock(volatile slock_t *lock, const char *file, int line) { - unsigned spins = 0; - unsigned delays = 0; - struct timeval delay; - /* * We loop tightly for awhile, then delay using select() and try * again. Preferably, "awhile" should be a small multiple of the * maximum time we expect a spinlock to be held. 100 iterations seems - * about right. + * about right. In most multi-CPU scenarios, the spinlock is probably + * held by a process on another CPU and will be released before we + * finish 100 iterations. However, on a uniprocessor, the tight loop + * is just a waste of cycles, so don't iterate thousands of times. + * + * Once we do decide to block, we use randomly increasing select() + * delays. The first delay is 10 msec, then the delay randomly + * increases to about one second, after which we reset to 10 msec and + * start again. The idea here is that in the presence of heavy + * contention we need to increase the delay, else the spinlock holder + * may never get to run and release the lock. (Consider situation + * where spinlock holder has been nice'd down in priority by the + * scheduler --- it will not get scheduled until all would-be + * acquirers are sleeping, so if we always use a 10-msec sleep, there + * is a real possibility of starvation.) But we can't just clamp the + * delay to an upper bound, else it would take a long time to make a + * reasonable number of tries. + * + * We time out and declare error after NUM_DELAYS delays (thus, exactly + * that many tries). With the given settings, this will usually take + * 3 or so minutes. It seems better to fix the total number of tries + * (and thus the probability of unintended failure) than to fix the + * total time spent. * - * We use a 10 millisec select delay because that is the lower limit on - * many platforms. The timeout is figured on this delay only, and so - * the nominal 1 minute is a lower bound. + * The select() delays are measured in centiseconds (0.01 sec) because 10 + * msec is a common resolution limit at the OS level. */ #define SPINS_PER_DELAY 100 -#define DELAY_MSEC 10 -#define TIMEOUT_MSEC (60 * 1000) +#define NUM_DELAYS 1000 +#define MIN_DELAY_CSEC 1 +#define MAX_DELAY_CSEC 100 + + int spins = 0; + int delays = 0; + int cur_delay = MIN_DELAY_CSEC; + struct timeval delay; while (TAS(lock)) { if (++spins > SPINS_PER_DELAY) { - if (++delays > (TIMEOUT_MSEC / DELAY_MSEC)) + if (++delays > NUM_DELAYS) s_lock_stuck(lock, file, line); - delay.tv_sec = 0; - delay.tv_usec = DELAY_MSEC * 1000; + delay.tv_sec = cur_delay / 100; + delay.tv_usec = (cur_delay % 100) * 10000; (void) select(0, NULL, NULL, NULL, &delay); +#if defined(S_LOCK_TEST) + fprintf(stdout, "*"); + fflush(stdout); +#endif + + /* increase delay by a random fraction between 1X and 2X */ + cur_delay += (int) (cur_delay * + (((double) random()) / ((double) MAX_RANDOM_VALUE)) + 0.5); + /* wrap back to minimum delay when max is exceeded */ + if (cur_delay > MAX_DELAY_CSEC) + cur_delay = MIN_DELAY_CSEC; + spins = 0; } } @@ -111,64 +148,6 @@ _success: \n\ } #endif /* __m68k__ */ -#if defined(__APPLE__) && defined(__ppc__) -/* used in darwin. */ -/* We key off __APPLE__ here because this function differs from - * the LinuxPPC implementation only in compiler syntax. - * - * NOTE: per the Enhanced PowerPC Architecture manual, v1.0 dated 7-May-2002, - * an isync is a sufficient synchronization barrier after a lwarx/stwcx loop. - */ -static void -tas_dummy() -{ - __asm__ __volatile__( - "\ - .globl tas \n\ - .globl _tas \n\ -_tas: \n\ -tas: \n\ - lwarx r5,0,r3 \n\ - cmpwi r5,0 \n\ - bne fail \n\ - addi r5,r5,1 \n\ - stwcx. r5,0,r3 \n\ - beq success \n\ -fail: li r3,1 \n\ - blr \n\ -success: \n\ - isync \n\ - li r3,0 \n\ - blr \n\ -"); -} -#endif /* __APPLE__ && __ppc__ */ - -#if defined(__powerpc__) -/* Note: need a nice gcc constrained asm version so it can be inlined */ -static void -tas_dummy() -{ - __asm__ __volatile__( - "\ -.global tas \n\ -tas: \n\ - lwarx 5,0,3 \n\ - cmpwi 5,0 \n\ - bne fail \n\ - addi 5,5,1 \n\ - stwcx. 5,0,3 \n\ - beq success \n\ -fail: li 3,1 \n\ - blr \n\ -success: \n\ - isync \n\ - li 3,0 \n\ - blr \n\ -"); -} -#endif /* __powerpc__ */ - #if defined(__mips__) && !defined(__sgi) static void tas_dummy() @@ -263,20 +242,22 @@ tas_dummy() /* really means: extern int tas(slock_t #if defined(S_LOCK_TEST) /* - * test program for verifying a port. + * test program for verifying a port's spinlock support. */ volatile slock_t test_lock; -void +int main() { + srandom((unsigned int) time(NULL)); + S_INIT_LOCK(&test_lock); if (!S_LOCK_FREE(&test_lock)) { - printf("S_LOCK_TEST: failed, lock not initialized.\n"); - exit(1); + printf("S_LOCK_TEST: failed, lock not initialized\n"); + return 1; } S_LOCK(&test_lock); @@ -284,17 +265,34 @@ main() if (S_LOCK_FREE(&test_lock)) { printf("S_LOCK_TEST: failed, lock not locked\n"); - exit(2); + return 1; } - printf("S_LOCK_TEST: this will hang for a few minutes and then abort\n"); - printf(" with a 'stuck spinlock' message if S_LOCK()\n"); - printf(" and TAS() are working.\n"); - s_lock(&test_lock, __FILE__, __LINE__); + S_UNLOCK(&test_lock); - printf("S_LOCK_TEST: failed, lock not locked~\n"); - exit(3); + if (!S_LOCK_FREE(&test_lock)) + { + printf("S_LOCK_TEST: failed, lock not unlocked\n"); + return 1; + } + + S_LOCK(&test_lock); + + if (S_LOCK_FREE(&test_lock)) + { + printf("S_LOCK_TEST: failed, lock not re-locked\n"); + return 1; + } + + printf("S_LOCK_TEST: this will print %d stars and then\n", NUM_DELAYS); + printf(" exit with a 'stuck spinlock' message\n"); + printf(" if S_LOCK() and TAS() are working.\n"); + fflush(stdout); + + s_lock(&test_lock, __FILE__, __LINE__); + printf("S_LOCK_TEST: failed, lock not locked\n"); + return 1; } #endif /* S_LOCK_TEST */ diff --git a/src/include/port/linux.h b/src/include/port/linux.h index d4da17da45c..3d00c0aecdc 100644 --- a/src/include/port/linux.h +++ b/src/include/port/linux.h @@ -4,7 +4,7 @@ #endif -#if defined(__i386__) +#if defined(__i386__) || defined(__x86_64__) typedef unsigned char slock_t; #define HAS_TEST_AND_SET @@ -14,6 +14,11 @@ typedef unsigned char slock_t; #define HAS_TEST_AND_SET +#elif defined(__powerpc64__) +typedef unsigned long slock_t; + +#define HAS_TEST_AND_SET + #elif defined(__powerpc__) typedef unsigned int slock_t; @@ -44,4 +49,9 @@ typedef unsigned int slock_t; #define HAS_TEST_AND_SET +#elif defined(__mc68000__) +typedef unsigned char slock_t; + +#define HAS_TEST_AND_SET + #endif diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h index d412243e50f..b068645d1f3 100644 --- a/src/include/storage/s_lock.h +++ b/src/include/storage/s_lock.h @@ -63,7 +63,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: s_lock.h,v 1.101.2.1 2002/11/22 01:13:28 tgl Exp $ + * $Id: s_lock.h,v 1.101.2.2 2006/06/01 23:18:11 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -76,7 +76,7 @@ #if defined(HAS_TEST_AND_SET) -#if defined(__GNUC__) +#if defined(__GNUC__) || defined(__ICC) /************************************************************************* * All the gcc inlines */ @@ -94,7 +94,7 @@ */ -#if defined(__i386__) +#if defined(__i386__) || defined(__x86_64__) /* AMD Opteron */ #define TAS(lock) tas(lock) static __inline__ int @@ -110,10 +110,11 @@ tas(volatile slock_t *lock) return (int) _res; } -#endif /* __i386__ */ +#endif /* __i386__ || __x86_64__ */ -#ifdef __ia64__ +/* Intel Itanium */ +#if defined(__ia64__) || defined(__ia64) #define TAS(lock) tas(lock) static __inline__ int @@ -130,10 +131,10 @@ tas(volatile slock_t *lock) return (int) ret; } -#endif /* __ia64__ */ +#endif /* __ia64__ || __ia64 */ -#if defined(__arm__) || defined(__arm__) +#if defined(__arm__) || defined(__arm) #define TAS(lock) tas(lock) static __inline__ int @@ -221,6 +222,41 @@ tas(volatile slock_t *lock) #endif /* __sparc__ */ +#if defined(__ppc__) || defined(__powerpc__) || defined(__powerpc64__) +#define TAS(lock) tas(lock) +/* + * NOTE: per the Enhanced PowerPC Architecture manual, v1.0 dated 7-May-2002, + * an isync is a sufficient synchronization barrier after a lwarx/stwcx loop. + */ +static __inline__ int +tas(volatile slock_t *lock) +{ + slock_t _t; + int _res; + + __asm__ __volatile__( +" lwarx %0,0,%2 \n" +" cmpwi %0,0 \n" +" bne 1f \n" +" addi %0,%0,1 \n" +" stwcx. %0,0,%2 \n" +" beq 2f \n" +"1: li %1,1 \n" +" b 3f \n" +"2: \n" +" isync \n" +" li %1,0 \n" +"3: \n" + +: "=&r" (_t), "=r" (_res) +: "r" (lock) +: "cc", "memory" + ); + return _res; +} + +#endif /* powerpc */ + #if defined(__mc68000__) && defined(__linux__) #define TAS(lock) tas(lock) @@ -244,10 +280,9 @@ tas(volatile slock_t *lock) #endif /* defined(__mc68000__) && defined(__linux__) */ -#if defined(__ppc__) || defined(__powerpc__) +#if defined(__ppc__) || defined(__powerpc__) || defined(__powerpc64__) /* - * We currently use out-of-line assembler for TAS on PowerPC; see s_lock.c. - * S_UNLOCK is almost standard but requires a "sync" instruction. + * PowerPC S_UNLOCK is almost standard but requires a "sync" instruction. */ #define S_UNLOCK(lock) \ do \ @@ -256,7 +291,7 @@ do \ *((volatile slock_t *) (lock)) = 0; \ } while (0) -#endif /* defined(__ppc__) || defined(__powerpc__) */ +#endif /* powerpc */ #if defined(NEED_VAX_TAS_ASM) -- 2.39.5