From: Juergen Perlinger Date: Fri, 8 Aug 2014 20:47:50 +0000 (+0200) Subject: implement new logic to define difference limit and limit check with SHM clock (driver28) X-Git-Tag: NTP_4_2_7P457~2^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6ad2a3c093557e1b9ec9c8cea0183b1571cfecfd;p=thirdparty%2Fntp.git implement new logic to define difference limit and limit check with SHM clock (driver28) bk: 53e537765xlDn_f8ftAQqwGv1D2eow --- diff --git a/html/drivers/driver28.html b/html/drivers/driver28.html index bcca4b9c1..8c7fd802e 100644 --- a/html/drivers/driver28.html +++ b/html/drivers/driver28.html @@ -12,7 +12,7 @@

Shared Memory Driver

Last update: - 14-Sep-2012 18:48 + 8-Aug-2014 19:17 UTC

Synopsis

@@ -56,19 +56,33 @@

If set, the count field of the record is remembered, and the values in the record (clockTimeStampSec, clockTimeStampUSec, receiveTimeStampSec, receiveTimeStampUSec, leap, precision) are read. Then, the remembered count is compared to current value of count now in the record. If both are equal, the values read from the record are passed to ntp. If they differ, another process has modified the record while it was read out (was not able to produce this case), and failure is reported to ntp. The valid flag is cleared and count is bumped.

If not set, count is bumped

Mode-independent postprocessing

+After the time stamps have been successfully plucked from the SHM +segment, some sanity checks take place: +

The receive time stamp of the SHM data must be in the last 5 + seconds before the time the data is processed. This helps in weeding + out stale data. +
If the absolute difference between remote and local clock + exceeds the limit (either time2 or the default of 4hrs), then + the sample is discarded. This check is disabled when flag1 is + set to 1. +

gpsd

gpsd knows how to talk to many GPS devices. -It works with ntpd through the SHM driver. +It can work with ntpd through the SHM driver.

The gpsd man page suggests setting minpoll and maxpoll to 4. That was an attempt to reduce jitter. The SHM driver was fixed (ntp-4.2.5p138) to collect data each second rather than once per polling interval so that suggestion is no longer reasonable.

- + Note: The GPSD client driver (type 46) uses the gpsd + client protocol to connect and talk to gpsd, but using the + SHM driver is the ancient way to have gpsd talk to ntpd.

Clockstats

If flag4 is set when the driver is polled, a clockstats record is written. @@ -103,13 +117,19 @@ Here is a sample showing the GPS reception fading out:

time1 time

Specifies the time offset calibration factor, in seconds and fraction, with default 0.0.

time2 time -

Not used by this driver. +

Maximum allowed difference between remote and local + clock, in seconds. Values <1.0 or >86400.0 are ignored, and the + default value of 4hrs (14400s) is used instead. See also flag 1.

stratum number

Specifies the driver stratum, in decimal from 0 to 15, with default 0.

refid string

Specifies the driver reference identifier, an ASCII string from one to four characters, with default SHM.

flag1 0 | 1 -

Not used by this driver. +

Skip the difference limit check if set. Useful + for systems where the RTC backup cannot keep the time over + long periods without power and the SHM clock must be able + to force long-distance initial jumps. Check the + difference limit if cleared (default).

flag2 0 | 1

Not used by this driver.

flag3 0 | 1 diff --git a/ntpd/refclock_shm.c b/ntpd/refclock_shm.c index 1a5eb8ff1..5c32b81aa 100644 --- a/ntpd/refclock_shm.c +++ b/ntpd/refclock_shm.c @@ -41,9 +41,6 @@ * This driver supports a reference clock attached thru shared memory */ -/* Temp hack to simplify testing of the old mode. */ -#define OLDWAY 0 - /* * SHM interface definitions */ @@ -60,8 +57,8 @@ static int shm_start (int unit, struct peer *peer); static void shm_shutdown (int unit, struct peer *peer); static void shm_poll (int unit, struct peer *peer); static void shm_timer (int unit, struct peer *peer); - int shm_peek (int unit, struct peer *peer); - void shm_clockstats (int unit, struct peer *peer); +static void shm_peek (int unit, struct peer *peer); +static void shm_clockstats (int unit, struct peer *peer); /* * Transfer vector @@ -108,6 +105,9 @@ struct shmunit { int notready; /* number of peeks without data ready */ int bad; /* number of invalid samples */ int clash; /* number of access clashes while reading */ + + time_t max_delta; /* difference limit */ + time_t max_delay; /* age/stale limit */ }; @@ -214,6 +214,15 @@ shm_start( up->shm->valid = 0; up->shm->nsamples = NSAMPLES; pp->clockdesc = DESCRIPTION; + + up->max_delay = 5; + if (pp->sloppyclockflag & CLK_FLAG1) + up->max_delta = 0; + else if (pp->fudgetime2 < 1. || pp->fudgetime2 > 86400.) + up->max_delta = 4*3600; + else + up->max_delta = (time_t)floor(pp->fudgetime2 + 0.5); + return 1; } else { free(up); @@ -255,9 +264,6 @@ shm_shutdown( static void shm_timer(int unit, struct peer *peer) { - if (OLDWAY) - return; - shm_peek(unit, peer); } @@ -272,52 +278,76 @@ shm_poll( ) { struct refclockproc *pp; - int ok; + struct shmunit *up; + int major_error; pp = peer->procptr; + up = pp->unitptr; - if (OLDWAY) { - ok = shm_peek(unit, peer); - if (!ok) - return; - } + pp->polls++; + + /* get dominant reason if we have no samples at all */ + major_error = max(up->notready, up->bad); + major_error = max(major_error, up->clash); /* - * Process median filter samples. If none received, declare a - * timeout and keep going. + * Process median filter samples. If none received, see what + * happened, tell the core and keep going. */ - if (pp->coderecv == pp->codeproc) { + if (pp->coderecv != pp->codeproc) { + /* have some samples, everything OK */ + pp->lastref = pp->lastrec; + refclock_receive(peer); + } else if (NULL == up->shm) { /* is this possible at all? */ + /* we're out of business without SHM access */ + refclock_report(peer, CEVNT_FAULT); + } else if (major_error == up->clash) { + /* too many collisions is like a bad signal */ + refclock_report(peer, CEVNT_PROP); + } else if (major_error == up->bad) { + /* too much stale/bad/garbled data */ + refclock_report(peer, CEVNT_BADREPLY); + } else { + /* in any other case assume it's just a timeout */ refclock_report(peer, CEVNT_TIMEOUT); - shm_clockstats(unit, peer); - return; } - pp->lastref = pp->lastrec; - refclock_receive(peer); + /* shm_clockstats() clears the tallies, so it must be last... */ shm_clockstats(unit, peer); } /* * shm_peek - try to grab a sample */ -int shm_peek( +static void +shm_peek( int unit, struct peer *peer ) { struct refclockproc *pp; struct shmunit *up; - struct shmTime *shm; + + /* access order is important for lock-free SHM access; we + ** enforce order by treating the whole structure volatile. + ** + ** IMPORTANT NOTE: This does not protect from reordering on CPU + ** level, and it does nothing for cache consistency and + ** visibility of changes by other cores. We need atomic and/or + ** fence instructions for that. + */ + volatile struct shmTime *shm; struct timespec tvr; struct timespec tvt; + l_fp tsrcv; + l_fp tsref; unsigned int c; - int ok = 1; - unsigned cns_new, rns_new; + unsigned int cns_new, rns_new; int cnt; /* for formatting 'a_lastcode': */ struct calendar cd; - time_t tt; + time_t tt, now; vint64 ts; /* @@ -334,12 +364,15 @@ int shm_peek( } shm = up->shm; if (shm == 0) { - refclock_report(peer, CEVNT_FAULT); - return 0; + DPRINTF(1, ("%s: no SHM segment\n", + refnumtoa(&peer->srcadr))); + return; } if ( ! shm->valid) { + DPRINTF(1, ("%s: SHM not ready\n", + refnumtoa(&peer->srcadr))); up->notready++; - return 0; + return; } switch (shm->mode) { @@ -371,6 +404,8 @@ int shm_peek( ** timestamps, possibly generated by extending the old ** us-level timestamps */ + DPRINTF(2, ("%s: SHM type 0 sample\n", + refnumtoa(&peer->srcadr))); break; case 1: @@ -382,7 +417,13 @@ int shm_peek( tvt.tv_sec = shm->clockTimeStampSec; tvt.tv_nsec = shm->clockTimeStampUSec * 1000; cns_new = shm->clockTimeStampNSec; - ok = (cnt == shm->count); + if (cnt != shm->count) { + DPRINTF(1, ("%s: type 1 access clash\n", + refnumtoa(&peer->srcadr))); + msyslog (LOG_NOTICE, "SHM: access clash in shared memory"); + up->clash++; + return; + } /* See the case above for an explanation of the ** following test. @@ -396,20 +437,25 @@ int shm_peek( ** timestamps, possibly generated by extending the old ** us-level timestamps */ + DPRINTF(2, ("%s: SHM type 1 sample\n", + refnumtoa(&peer->srcadr))); break; default: + DPRINTF(1, ("%s: SHM type blooper, mode=%d\n", + refnumtoa(&peer->srcadr), shm->mode)); up->bad++; msyslog (LOG_ERR, "SHM: bad mode found in shared memory: %d", shm->mode); - return 0; + return; } + shm->valid = 0; /* format the last time code in human-readable form into * 'pp->a_lastcode'. Someone claimed: "NetBSD has incompatible * tv_sec". I can't find a base for this claim, but we can work * around that potential problem. BTW, simply casting a pointer - * is a receipe for desaster on some architectures. + * is a receipe for disaster on some architectures. */ tt = (time_t)tvt.tv_sec; ts = time_to_vint64(&tt); @@ -423,46 +469,70 @@ int shm_peek( (long)tvt.tv_nsec); pp->lencode = (c < sizeof(pp->a_lastcode)) ? c : 0; - shm->valid = 0; - if (ok) { - l_fp tsrcv = tspec_stamp_to_lfp(tvr); - l_fp tsref = tspec_stamp_to_lfp(tvt); - pp->polls++; - pp->leap = shm->leap; - peer->precision = shm->precision; - refclock_process_offset(pp, tsref, tsrcv, pp->fudgetime1); - } else { - refclock_report(peer, CEVNT_FAULT); - msyslog (LOG_NOTICE, "SHM: access clash in shared memory"); - up->clash++; - return 0; + /* check 1: age control of local time stamp */ + time(&now); + tt = now - tvr.tv_sec; + if (tt < 0 || tt > up->max_delay) { + DPRINTF(1, ("%s:SHM stale/bad receive time, delay=%llds\n", + refnumtoa(&peer->srcadr), (long long)tt)); + up->bad++; + msyslog (LOG_ERR, "SHM: stale/bad receive time, delay=%llds", + (long long)tt); + return; } + + /* check 2: delta check */ + tt = tvr.tv_sec - tvt.tv_sec - (tvr.tv_nsec < tvt.tv_nsec); + if (tt < 0) + tt = -tt; + if (up->max_delta > 0 && tt > up->max_delta) { + DPRINTF(1, ("%s: SHM diff limit exceeded, delta=%llds\n", + refnumtoa(&peer->srcadr), (long long)tt)); + up->bad++; + msyslog (LOG_ERR, "SHM: difference limit exceeded, delta=%llds\n", + (long long)tt); + return; + } + + /* if we really made it to this point... we're winners! */ + DPRINTF(2, ("%s: SHM feeding data\n", + refnumtoa(&peer->srcadr))); + tsrcv = tspec_stamp_to_lfp(tvr); + tsref = tspec_stamp_to_lfp(tvt); + pp->leap = shm->leap; + peer->precision = shm->precision; + refclock_process_offset(pp, tsref, tsrcv, pp->fudgetime1); up->good++; - return 1; } /* * shm_clockstats - dump and reset counters */ -void shm_clockstats( +static void shm_clockstats( int unit, struct peer *peer ) { struct refclockproc *pp; struct shmunit *up; - char logbuf[256]; + char logbuf[64]; + unsigned int llen; pp = peer->procptr; up = pp->unitptr; - if (!(pp->sloppyclockflag & CLK_FLAG4)) - return; - - snprintf(logbuf, sizeof(logbuf), "%3d %3d %3d %3d %3d", - up->ticks, up->good, up->notready, up->bad, up->clash); - record_clock_stats(&peer->srcadr, logbuf); - + if (pp->sloppyclockflag & CLK_FLAG4) { + /* if snprintf() returns a negative values on errors + ** (some older ones do) make sure we are NUL + ** terminated. Using an unsigned result does the trick. + */ + llen = snprintf(logbuf, sizeof(logbuf), + "%3d %3d %3d %3d %3d", + up->ticks, up->good, up->notready, + up->bad, up->clash); + logbuf[min(llen, sizeof(logbuf)-1)] = '\0'; + record_clock_stats(&peer->srcadr, logbuf); + } up->ticks = up->good = up->notready = up->bad = up->clash = 0; }