From: Dave Hart Date: Sat, 7 Feb 2009 21:05:32 +0000 (+0000) Subject: nt_clockstuff.c, ChangeLog, ntp_iocompletionport.c: X-Git-Tag: NTP_4_2_5P160~10^2~3^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=482c409212bb23c1c4787ddf0910451d688ae4f7;p=thirdparty%2Fntp.git nt_clockstuff.c, ChangeLog, ntp_iocompletionport.c: [BUG 1124] Lock QueryPerformanceCounter() client threads to same CPU bk: 498df79cji6uRQWq_hUVte8zp7Z6dw --- diff --git a/ChangeLog b/ChangeLog index f26fece5e..21b5603ac 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,5 @@ +[BUG 1124] Lock QueryPerformanceCounter() client threads to same CPU + --- (4.2.4p6) 2009/01/08 Released by Harlan Stenn diff --git a/ports/winnt/ntpd/nt_clockstuff.c b/ports/winnt/ntpd/nt_clockstuff.c index bbb4c2036..874a78a9e 100644 --- a/ports/winnt/ntpd/nt_clockstuff.c +++ b/ports/winnt/ntpd/nt_clockstuff.c @@ -92,6 +92,7 @@ static LONGLONG ls_elapsed; static void StartClockThread(void); static void StopClockThread(void); +void lock_thread_to_processor(HANDLE); static CRITICAL_SECTION TimerCritialSection; /* lock for LastTimerCount & LastTimerTime */ @@ -507,18 +508,6 @@ DWORD WINAPI ClockThread(void *arg) (void) arg; /* not used */ - /*++++ Gerhard Junker - * see Platform SDK for QueryPerformanceCounter - * On a multiprocessor machine, it should not matter which processor is called. - * However, you can get different results on different processors due to bugs in the BIOS or the HAL. - * To specify processor affinity for a thread, use the SetThreadAffinityMask function. - * ... we will hope, the apc routine will run on the same processor - */ - - SetThreadAffinityMask(GetCurrentThread(), 1L); - - /*---- Gerhard Junker */ - if (WaitableTimerHandle != NULL) { DueTime.QuadPart = 0i64; if (SetWaitableTimer(WaitableTimerHandle, &DueTime, 1L /* ms */, TimerApcFunction, &WaitableTimerHandle, FALSE) != NO_ERROR) { @@ -577,21 +566,99 @@ static void StartClockThread(void) InitializeCriticalSection(&TimerCritialSection); TimerThreadExitRequest = CreateEvent(NULL, FALSE, FALSE, "TimerThreadExitRequest"); - ClockThreadHandle = CreateThread(NULL, 0, ClockThread, NULL, 0, &tid); - if (ClockThreadHandle != NULL) - { + ClockThreadHandle = CreateThread(NULL, 0, ClockThread, NULL, + CREATE_SUSPENDED, &tid); + + if (ClockThreadHandle != NULL) { /* remember the thread priority is only within the process class */ - if (!SetThreadPriority(ClockThreadHandle, THREAD_PRIORITY_TIME_CRITICAL)) - { -#ifdef DEBUG - printf("Error setting thread priority\n"); -#endif - } + if (!SetThreadPriority(ClockThreadHandle, THREAD_PRIORITY_TIME_CRITICAL)) { + DPRINTF(1, ("Error setting thread priority\n")); + } + + lock_thread_to_processor(ClockThreadHandle); + ResumeThread(ClockThreadHandle); + + lock_thread_to_processor(GetCurrentThread()); + + atexit( StopClockThread ); + } +} + + +void +lock_thread_to_processor(HANDLE thread) +{ + static DWORD_PTR ProcessAffinityMask; + static DWORD_PTR ThreadAffinityMask; + DWORD_PTR SystemAffinityMask; + char *cputext; + unsigned int cpu; + + if ( ! ProcessAffinityMask) { + /* + * Choose which processor to nail the main and clock threads to. + * If we have more than one, we simply choose the 2nd. + * Randomly choosing from 2 to n would be better, but in + * either case with clock and network interrupts more likely + * to be serviced by the first procecssor, let's stay away + * from it. QueryPerformanceCounter is not necessarily + * consistent across CPUs, hence the need to nail the two + * threads involved in QPC-based interpolation to the same + * CPU. + */ + + GetProcessAffinityMask( + GetCurrentProcess(), + &ProcessAffinityMask, + &SystemAffinityMask); + + /* + * respect NTPD_CPU environment variable if present + * for testing. NTPD_CPU=0 means use all CPUs, 1-64 + * means lock threads involved in interpolation to + * that CPU. Default to 2nd if more than 1. + */ + + cpu = 2; + cputext = getenv("NTPD_CPU"); + if (cputext) { + cpu = (unsigned int) atoi(cputext); + cpu = min((8 * sizeof(DWORD_PTR)), cpu); + } + + /* + * Clear all bits except the 2nd. If we have only one proc + * that leaves ThreadAffinityMask zeroed and we won't bother + * with SetThreadAffinityMask. + */ + + ThreadAffinityMask = (0 == cpu) ? 0 : (1 << (cpu - 1)); + + if (ThreadAffinityMask && + !(ThreadAffinityMask & ProcessAffinityMask)) { + + DPRINTF(1, ("Selected CPU %u (mask %x) is outside " + "process mask %x, using all CPUs.\n", + cpu, ThreadAffinityMask, + ProcessAffinityMask)); + } else { + DPRINTF(1, ("Wiring to processor %u (0 means all) " + "affinity mask %x\n", + cpu, ThreadAffinityMask)); + } + + ThreadAffinityMask &= ProcessAffinityMask; } - atexit( StopClockThread ); + if (ThreadAffinityMask && + !SetThreadAffinityMask(thread, ThreadAffinityMask)) { + + DPRINTF(1, ("Unable to wire thread to mask %x: %s\n", + ThreadAffinityMask, strerror(GetLastError()))); + } } + static void StopClockThread(void) { if ( wTimerRes ) /* if not 0 then the MM timer has been modified at startup */ diff --git a/ports/winnt/ntpd/ntp_iocompletionport.c b/ports/winnt/ntpd/ntp_iocompletionport.c index 099623879..737aa1ea1 100644 --- a/ports/winnt/ntpd/ntp_iocompletionport.c +++ b/ports/winnt/ntpd/ntp_iocompletionport.c @@ -42,6 +42,9 @@ typedef struct IoCompletionInfo { #define recv_buf buff_space.rbuf #define trans_buf buff_space.tbuf +/* in nt_clockstuff.c */ +extern void lock_thread_to_processor(HANDLE); + /* * local function definitions */ @@ -147,6 +150,15 @@ iocompletionthread(void *NotUsed) DWORD Key = 0; IoCompletionInfo * lpo = NULL; + /* + * socket and refclock receive call gettimeofday() + * so the I/O thread needs to be on the same + * processor as the main and timing threads + * to ensure consistent QueryPerformanceCounter() + * results. + */ + lock_thread_to_processor(GetCurrentThread()); + /* Set the thread priority high enough so I/O will * preempt normal recv packet processing, but not * higher than the timer sync thread.