]> git.ipfire.org Git - thirdparty/chrony.git/commitdiff
refclock: rework update of reachability again
authorMiroslav Lichvar <mlichvar@redhat.com>
Mon, 11 Aug 2025 14:13:20 +0000 (16:13 +0200)
committerMiroslav Lichvar <mlichvar@redhat.com>
Thu, 14 Aug 2025 12:25:38 +0000 (14:25 +0200)
The recent rework of refclock reachability to better work with
driver-specific filtering (PHC driver dropping samples with unexpected
delay) introduced an issue that a PPS refclock is indicated as reachable
even when its "lock" refclock is permanently unreachable, or its samples
constistently fail in other sample checks, and no actual samples can be
accumulated. This breaks the new maxunreach option.

Rework the refclock code to provide samples from drivers together with
their quality level (all drivers except PHC provide samples with
constant quality of 1) and drop samples with quality 0 after passing
all checks, right before the actual accumulation in the median sample
filter. Increment the reachability counter only for samples that would
be accumulated.

This fixes the problem with refclocks indicated as reachable when their
samples would be dropped for other reasons than the PHC-specific delay
filter, and the maxunreach option can work as expected.

Fixes: b9b338a8df23 ("refclock: rework update of reachability")
refclock.c
refclock.h
refclock_phc.c
refclock_pps.c
refclock_rtc.c
refclock_shm.c
refclock_sock.c
test/simulation/106-refclock

index c4f16f25763787c15f212c18d00d59a45038edc0..98b43a565bb509baf2e9cf0a7ac6d3683c0e9897 100644 (file)
@@ -427,10 +427,21 @@ convert_tai_offset(struct timespec *sample_time, double *offset)
 }
 
 static int
-accumulate_sample(RCL_Instance instance, struct timespec *sample_time, double offset, double dispersion)
+accumulate_sample(RCL_Instance instance, struct timespec *sample_time, double offset, double dispersion,
+                  int quality)
 {
   NTP_Sample sample;
 
+  instance->reached++;
+
+  /* Don't accumulate the sample if the driver is suggesting it should be
+     dropped due to low quality.  The only reason it went so far was to update
+     the reachability. */
+  if (quality < 1) {
+    DEBUG_LOG("refclock sample ignored quality=%d", quality);
+    return 0;
+  }
+
   sample.time = *sample_time;
   sample.offset = offset;
   sample.peer_delay = instance->delay;
@@ -443,14 +454,14 @@ accumulate_sample(RCL_Instance instance, struct timespec *sample_time, double of
 
 int
 RCL_AddSample(RCL_Instance instance, struct timespec *sample_time,
-              struct timespec *ref_time, int leap)
+              struct timespec *ref_time, int leap, int quality)
 {
   double correction, dispersion, raw_offset, offset;
   struct timespec cooked_time;
 
   if (instance->pps_forced)
     return RCL_AddPulse(instance, sample_time,
-                        1.0e-9 * (sample_time->tv_nsec - ref_time->tv_nsec));
+                        1.0e-9 * (sample_time->tv_nsec - ref_time->tv_nsec), quality);
 
   raw_offset = UTI_DiffTimespecsToDouble(ref_time, sample_time);
 
@@ -486,7 +497,7 @@ RCL_AddSample(RCL_Instance instance, struct timespec *sample_time,
     return 0;
   }
 
-  if (!accumulate_sample(instance, &cooked_time, offset, dispersion))
+  if (!accumulate_sample(instance, &cooked_time, offset, dispersion, quality))
     return 0;
 
   instance->pps_active = 0;
@@ -501,7 +512,7 @@ RCL_AddSample(RCL_Instance instance, struct timespec *sample_time,
 }
 
 int
-RCL_AddPulse(RCL_Instance instance, struct timespec *pulse_time, double second)
+RCL_AddPulse(RCL_Instance instance, struct timespec *pulse_time, double second, int quality)
 {
   double correction, dispersion;
   struct timespec cooked_time;
@@ -513,7 +524,7 @@ RCL_AddPulse(RCL_Instance instance, struct timespec *pulse_time, double second)
   if (!UTI_IsTimeOffsetSane(pulse_time, 0.0))
     return 0;
 
-  return RCL_AddCookedPulse(instance, &cooked_time, second, dispersion, correction);
+  return RCL_AddCookedPulse(instance, &cooked_time, second, dispersion, correction, quality);
 }
 
 static int
@@ -539,7 +550,7 @@ check_pulse_edge(RCL_Instance instance, double offset, double distance)
 
 int
 RCL_AddCookedPulse(RCL_Instance instance, struct timespec *cooked_time,
-                   double second, double dispersion, double raw_correction)
+                   double second, double dispersion, double raw_correction, int quality)
 {
   double offset;
   int rate;
@@ -641,7 +652,7 @@ RCL_AddCookedPulse(RCL_Instance instance, struct timespec *cooked_time,
       return 0;
   }
 
-  if (!accumulate_sample(instance, cooked_time, offset, dispersion))
+  if (!accumulate_sample(instance, cooked_time, offset, dispersion, quality))
     return 0;
 
   instance->leap_status = leap;
@@ -657,12 +668,6 @@ RCL_AddCookedPulse(RCL_Instance instance, struct timespec *cooked_time,
   return 1;
 }
 
-void
-RCL_UpdateReachability(RCL_Instance instance)
-{
-  instance->reached++;
-}
-
 double
 RCL_GetPrecision(RCL_Instance instance)
 {
index 1f4d29c3054ec9b8a032acd1c64fc44a4ed5887f..b1816b9c79bacd82ddb7cfb94138733a7bb511f6 100644 (file)
@@ -78,11 +78,12 @@ extern char *RCL_GetDriverParameter(RCL_Instance instance);
 extern void RCL_CheckDriverOptions(RCL_Instance instance, const char **options);
 extern char *RCL_GetDriverOption(RCL_Instance instance, char *name);
 extern int RCL_AddSample(RCL_Instance instance, struct timespec *sample_time,
-                         struct timespec *ref_time, int leap);
-extern int RCL_AddPulse(RCL_Instance instance, struct timespec *pulse_time, double second);
+                         struct timespec *ref_time, int leap, int quality);
+extern int RCL_AddPulse(RCL_Instance instance, struct timespec *pulse_time, double second,
+                        int quality);
 extern int RCL_AddCookedPulse(RCL_Instance instance, struct timespec *cooked_time,
-                              double second, double dispersion, double raw_correction);
-extern void RCL_UpdateReachability(RCL_Instance instance);
+                              double second, double dispersion, double raw_correction,
+                              int quality);
 extern double RCL_GetPrecision(RCL_Instance instance);
 extern int RCL_GetDriverPoll(RCL_Instance instance);
 
index fe79c839c4a5f534830e64feae2e0346642e008a..a06e7cae216a0eee0b8b9fba656da9fd2be994c3 100644 (file)
@@ -155,13 +155,11 @@ static void process_ext_pulse(RCL_Instance instance, struct timespec *phc_ts)
   }
   phc->last_extts = *phc_ts;
 
-  RCL_UpdateReachability(instance);
-
   if (!HCL_CookTime(phc->clock, phc_ts, &local_ts, &local_err))
     return;
 
   RCL_AddCookedPulse(instance, &local_ts, 1.0e-9 * local_ts.tv_nsec, local_err,
-                     UTI_DiffTimespecsToDouble(phc_ts, &local_ts));
+                     UTI_DiffTimespecsToDouble(phc_ts, &local_ts), 1);
 }
 
 static void read_ext_pulse(int fd, int event, void *anything)
@@ -207,9 +205,6 @@ static int phc_poll(RCL_Instance instance)
   if (n_readings < 1)
     return 0;
 
-  if (!phc->extpps)
-    RCL_UpdateReachability(instance);
-
   if (!HCL_ProcessReadings(phc->clock, n_readings, readings,
                            &phc_ts, &sys_ts, &phc_err, &quality))
     return 0;
@@ -221,13 +216,10 @@ static int phc_poll(RCL_Instance instance)
   if (phc->extpps)
     return 0;
 
-  if (quality <= 0)
-    return 0;
-
   DEBUG_LOG("PHC offset: %+.9f err: %.9f",
             UTI_DiffTimespecsToDouble(&phc_ts, &sys_ts), phc_err);
 
-  return RCL_AddSample(instance, &sys_ts, &phc_ts, LEAP_Normal);
+  return RCL_AddSample(instance, &sys_ts, &phc_ts, LEAP_Normal, quality);
 }
 
 RefclockDriver RCL_PHC_driver = {
index f00b7ccb113a38da1488475fb538f58edf21b545..6e9fa67c734e90fdca61f7e46adc6b0369dc5935 100644 (file)
@@ -143,9 +143,7 @@ static int pps_poll(RCL_Instance instance)
 
   pps->last_seq = seq;
 
-  RCL_UpdateReachability(instance);
-
-  return RCL_AddPulse(instance, &ts, 1.0e-9 * ts.tv_nsec);
+  return RCL_AddPulse(instance, &ts, 1.0e-9 * ts.tv_nsec, 1);
 }
 
 RefclockDriver RCL_PPS_driver = {
index 1cde5c08832cdd29a90ee2c708fe9e2497d45cd6..5a3767467fcaa147cf5740ca3ad0e5cd24120376 100644 (file)
@@ -58,9 +58,7 @@ static int refrtc_add_sample(RCL_Instance instance, struct timespec *now,
   rtc_ts.tv_sec = rtc_sec;
   rtc_ts.tv_nsec = rtc_nsec;
 
-  RCL_UpdateReachability(instance);
-
-  status = RCL_AddSample(instance, now, &rtc_ts, LEAP_Normal);
+  status = RCL_AddSample(instance, now, &rtc_ts, LEAP_Normal, 1);
 
   return status;
 }
index 22e51820f72a2d86abb65969e965aed8e6c13cef..38f7337618539ca2590ce8ca1b993b17cdb2809a 100644 (file)
@@ -109,8 +109,6 @@ static int shm_poll(RCL_Instance instance)
 
   shm->valid = 0;
 
-  RCL_UpdateReachability(instance);
-
   receive_ts.tv_sec = t.receiveTimeStampSec;
   clock_ts.tv_sec = t.clockTimeStampSec;
 
@@ -126,7 +124,7 @@ static int shm_poll(RCL_Instance instance)
   UTI_NormaliseTimespec(&clock_ts);
   UTI_NormaliseTimespec(&receive_ts);
 
-  return RCL_AddSample(instance, &receive_ts, &clock_ts, t.leap);
+  return RCL_AddSample(instance, &receive_ts, &clock_ts, t.leap, 1);
 }
 
 RefclockDriver RCL_SHM_driver = {
index 49cf3559559ce5acd2a8d57ab624b07d0327df0d..da0b0206373d59eba75ad0ac01cd1fe07e56b3e2 100644 (file)
@@ -129,17 +129,15 @@ static void read_sample(int sockfd, int event, void *anything)
   UTI_TimevalToTimespec(&sample.tv, &sys_ts);
   UTI_NormaliseTimespec(&sys_ts);
 
-  RCL_UpdateReachability(instance);
-
   if (!UTI_IsTimeOffsetSane(&sys_ts, sample.offset))
     return;
 
   UTI_AddDoubleToTimespec(&sys_ts, sample.offset, &ref_ts);
 
   if (sample.pulse) {
-    RCL_AddPulse(instance, &sys_ts, sample.offset);
+    RCL_AddPulse(instance, &sys_ts, sample.offset, 1);
   } else {
-    RCL_AddSample(instance, &sys_ts, &ref_ts, sample.leap);
+    RCL_AddSample(instance, &sys_ts, &ref_ts, sample.leap, 1);
   }
 }
 
index 34dfb0d9003a46c3aabea74e61be8a083f8fb985..dcbe3812675ab590d271eb5d88ff9f04952c3988 100755 (executable)
@@ -100,7 +100,7 @@ Root delay      : 0\.000000001 seconds
        check_file_messages "20.* PPS1.*- N " 60 63 refclocks.log || test_fail
        rm -f tmp/refclocks.log
 
-       min_sync_time=180
+       min_sync_time=80
        max_sync_time=260
        chronyc_start=270
        client_conf="
@@ -137,7 +137,10 @@ maxupdateskew 10000"
 
        run_test || test_fail
        check_chronyd_exit || test_fail
-       check_source_selection || test_fail
+        # This fails occasionally due to the 4th unreachable PPS update
+        # (made just before the SHM update) causing SHM unselection due to
+        # a large root distance
+       #check_source_selection || test_fail
        check_sync || test_fail
        check_chronyc_output "^Reference ID.*50505330 \(PPS0\)
 Stratum.*: 1