From: Mike Brady <4265913+mikebrady@users.noreply.github.com>
Date: Fri, 9 Apr 2021 19:24:30 +0000 (+0100)
Subject: try to add compensation for short timing disturbances, clamp the slew rate
X-Git-Tag: 1.1-dev~38
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b741e81b5c0cb638828af86881d773de05c7aab8;p=thirdparty%2Fnqptp.git

try to add compensation for short timing disturbances, clamp the slew rate
---

diff --git a/nqptp-clock-sources.h b/nqptp-clock-sources.h
index a35d3e1..793b86c 100644
--- a/nqptp-clock-sources.h
+++ b/nqptp-clock-sources.h
@@ -28,8 +28,9 @@ enum stage {
   sync_seen,
 };
 
-#define MAX_TIMING_SAMPLES 3
+#define MAX_TIMING_SAMPLES 11
 typedef struct {
+  uint16_t sequence_number;
   uint64_t local, local_to_remote_offset;
 } timing_samples;
 
@@ -50,7 +51,6 @@ typedef struct {
   int vacant_samples; // the number of elements in the timing_samples array that are not yet used
   int next_sample_goes_here; // point to where in the timing samples array the next entries should
                              // go
-  uint32_t sample_number;
 } clock_source_private_data;
 
 int find_clock_source_record(char *sender_string, clock_source *clocks_shared_info,
diff --git a/nqptp-message-handlers.c b/nqptp-message-handlers.c
index ab11293..ad6a6d0 100644
--- a/nqptp-message-handlers.c
+++ b/nqptp-message-handlers.c
@@ -47,10 +47,11 @@ void handle_control_port_messages(char *buf, ssize_t recv_len, clock_source *clo
           if (t == -1)
             t = create_clock_source_record(new_ip, clock_info, clock_private_info,
                                            0); // don't use the mutex
+
           // if it is just about to become a timing peer, reset its sample count
           clock_private_info[t].vacant_samples = MAX_TIMING_SAMPLES;
           clock_private_info[t].next_sample_goes_here = 0;
-          clock_private_info[t].sample_number = 0;
+
           clock_info[t].flags |= (1 << clock_is_a_timing_peer);
         }
       }
@@ -119,22 +120,22 @@ void handle_announce(char *buf, ssize_t recv_len, clock_source *clock_info,
           grandmaster_clock_id = grandmaster_clock_id << 32;
           grandmaster_clock_id = grandmaster_clock_id + grandmaster_clock_id_low;
 
-          debug(2,
+          debug(1,
                 "clock_id %" PRIx64 " at:    %s, \"Announce\" message is Qualified -- See 9.3.2.5.",
                 clock_info->clock_id, clock_info->ip);
           uint32_t clockQuality = msg->announce.grandmasterClockQuality;
           uint8_t clockClass = (clockQuality >> 24) & 0xff;
           uint8_t clockAccuracy = (clockQuality >> 16) & 0xff;
           uint16_t offsetScaledLogVariance = clockQuality & 0xffff;
-          debug(2, "    grandmasterIdentity:         %" PRIx64 ".", grandmaster_clock_id);
-          debug(2, "    grandmasterPriority1:        %u.", msg->announce.grandmasterPriority1);
-          debug(2, "    grandmasterClockQuality:     0x%x.", msg->announce.grandmasterClockQuality);
-          debug(2, "        clockClass:              %u.", clockClass); // See 7.6.2.4 clockClass
-          debug(2, "        clockAccuracy:           0x%x.",
+          debug(1, "    grandmasterIdentity:         %" PRIx64 ".", grandmaster_clock_id);
+          debug(1, "    grandmasterPriority1:        %u.", msg->announce.grandmasterPriority1);
+          debug(1, "    grandmasterClockQuality:     0x%x.", msg->announce.grandmasterClockQuality);
+          debug(1, "        clockClass:              %u.", clockClass); // See 7.6.2.4 clockClass
+          debug(1, "        clockAccuracy:           0x%x.",
                 clockAccuracy); // See 7.6.2.5 clockAccuracy
-          debug(2, "        offsetScaledLogVariance: 0x%x.",
+          debug(1, "        offsetScaledLogVariance: 0x%x.",
                 offsetScaledLogVariance); // See 7.6.3 PTP variance
-          debug(2, "    grandmasterPriority2:        %u.", msg->announce.grandmasterPriority2);
+          debug(1, "    grandmasterPriority2:        %u.", msg->announce.grandmasterPriority2);
         }
         if (pthread_mutex_lock(&shared_memory->shm_mutex) != 0)
           warn("Can't acquire mutex to set_timing_peers!");
@@ -158,6 +159,75 @@ void handle_announce(char *buf, ssize_t recv_len, clock_source *clock_info,
   }
 }
 
+void handle_sync(char *buf, ssize_t recv_len, clock_source *clock_info,
+                 clock_source_private_data *clock_private_info, uint64_t reception_time) {
+
+  struct ptp_sync_message *msg = (struct ptp_sync_message *)buf;
+  // this is just to see if anything interesting comes in the SYNC package
+  // a non-zero origin timestamp
+  // or correction field would be interesting....
+  int ck;
+  int non_empty_origin_timestamp = 0;
+  for (ck = 0; ck < 10; ck++) {
+    if (msg->sync.originTimestamp[ck] != 0) {
+      non_empty_origin_timestamp = (non_empty_origin_timestamp | 1);
+    }
+  }
+  if (non_empty_origin_timestamp != 0)
+    debug(2, "Sync Origin Timestamp!");
+  if (msg->header.correctionField != 0)
+    debug(3, "correctionField: %" PRIx64 ".", msg->header.correctionField);
+
+  int discard_sync = 0;
+
+  // check if we should discard this SYNC
+  if (clock_private_info->current_stage != waiting_for_sync) {
+
+    // here, we have an unexpected SYNC. It could be because the
+    // previous transaction sequence failed for some reason
+    // But, if that is so, the SYNC will have a newer sequence number
+    // so, ignore it if it's a little older.
+
+    // If it seems a lot older in sequence number terms, then it might
+    // be the start of a completely new sequence, so if the
+    // difference is more than 40 (WAG), accept it
+
+    uint16_t new_sync_sequence_number = ntohs(msg->header.sequenceId);
+    int16_t sequence_number_difference =
+        (clock_private_info->sequence_number - new_sync_sequence_number);
+
+    if ((sequence_number_difference > 0) && (sequence_number_difference < 40))
+      discard_sync = 1;
+  }
+
+  if (discard_sync == 0) {
+    /*
+        // just check how long since the last sync, if there was one
+        clock_private_info->reception_interval = 0;
+        if (clock_private_info->t2 != 0) {
+          int16_t seq_diff = ntohs(msg->header.sequenceId) - clock_private_info->sequence_number;
+          if (seq_diff == 1) {
+            uint64_t delta = reception_time - clock_private_info->t2;
+            clock_private_info->reception_interval = delta;
+            debug(1," reception interval: %f", delta * 0.000001);
+
+          }
+        }
+        */
+    clock_private_info->sequence_number = ntohs(msg->header.sequenceId);
+    clock_private_info->t2 = reception_time;
+
+    // it turns out that we don't really need to send a Delay_Req
+    // as a Follow_Up message always comes through
+
+    // If we had hardware assisted network timing, then maybe
+    // Even then, AP2 devices don't seem to send an accurate
+    // Delay_Resp time -- it contains the same information as the Follow_Up
+
+    clock_private_info->current_stage = sync_seen;
+  }
+}
+
 void handle_follow_up(char *buf, ssize_t recv_len, clock_source *clock_info,
                       clock_source_private_data *clock_private_info, uint64_t reception_time,
                       pthread_mutex_t *shm_mutex) {
@@ -191,32 +261,54 @@ void handle_follow_up(char *buf, ssize_t recv_len, clock_source *clock_info,
     // now, if there was a valid offset previously,
     // check if the offset should be clamped
 
-    // don't clamp for the first two seconds?
-    if ((clock_info->flags & (1 << clock_is_valid)) && (clock_private_info->sample_number > 17) &&
+    if ((clock_info->flags & (1 << clock_is_valid)) &&
         (clock_private_info->vacant_samples != MAX_TIMING_SAMPLES)) {
 
-      const int64_t clamp = 1 * 1000 * 1000;
-      int64_t jitter = offset - clock_private_info->previous_estimated_offset;
-      if (jitter > clamp)
-        offset = clock_private_info->previous_estimated_offset + clamp;
-      else if (jitter < (-clamp))
-        offset = clock_private_info->previous_estimated_offset - clamp;
-      int64_t clamped_jitter = offset - clock_private_info->previous_estimated_offset;
-      debug(1, "clock: %" PRIx64 ", jitter: %+f ms, clamped_jitter: %+f ms.", clock_info->clock_id,
-            jitter * 0.000001, clamped_jitter * 0.000001);
+      /*
+      if (clock_private_info->previous_estimated_offset != 0) {
+          int64_t jitter = offset - clock_private_info->previous_estimated_offset;
+          int64_t skew = 0;
+              debug(1," reception interval is: %f", clock_private_info->reception_interval *
+      0.000001); if (clock_private_info->reception_interval != 0) { uint64_t nominal_interval =
+      125000000; // 125 ms skew = clock_private_info->reception_interval - nominal_interval; skew =
+      skew / 2; offset = offset + skew; // try to compensate is a packet arrives too early or too
+      late
+          }
+
+          int64_t compensated_jitter = offset - clock_private_info->previous_estimated_offset;
+          debug(1," jitter: %f, skew: %f, compensated_jitter: %f.", jitter * 0.000001, skew *
+      0.000001, compensated_jitter * 0.000001);
+      }
+      */
+
+      /*
+      // only clamp if in the timing peer list
+        if ((clock_info->flags & (1 << clock_is_a_timing_peer)) != 0) {
+            const int64_t clamp = 1 * 1000 * 1000; //
+            int64_t jitter = offset - clock_private_info->previous_estimated_offset;
+            if (jitter > clamp)
+              offset = clock_private_info->previous_estimated_offset + clamp;
+            else if (jitter < (-clamp))
+              offset = clock_private_info->previous_estimated_offset - clamp;
+            int64_t clamped_jitter = offset - clock_private_info->previous_estimated_offset;
+            debug(1, "clock: %" PRIx64 ", jitter: %+f ms, clamped_jitter: %+f ms.",
+      clock_info->clock_id, jitter * 0.000001, clamped_jitter * 0.000001);
+        }
+          // okay, so the offset may now have been clamped to be close to the estimated previous
+      offset
+      */
     }
 
-    // okay, so the offset may now have been clamped to be close to the estimated previous offset
-    // track our samples
+    // update our sample information
 
     clock_private_info->samples[clock_private_info->next_sample_goes_here].local =
-        clock_private_info->t2;
+        clock_private_info->t2; // this is when the Sync message arrived.
     clock_private_info->samples[clock_private_info->next_sample_goes_here].local_to_remote_offset =
         offset;
-    clock_private_info->next_sample_goes_here++;
-    clock_private_info->sample_number++;
-    if (clock_private_info->next_sample_goes_here == MAX_TIMING_SAMPLES)
-      clock_private_info->next_sample_goes_here = 0;
+    clock_private_info->samples[clock_private_info->next_sample_goes_here].sequence_number =
+        clock_private_info->sequence_number;
+
+    // if this is the very first...
     if (clock_private_info->vacant_samples == MAX_TIMING_SAMPLES) {
       clock_private_info->previous_offset = offset;
       clock_private_info->previous_estimated_offset = offset;
@@ -225,30 +317,76 @@ void handle_follow_up(char *buf, ssize_t recv_len, clock_source *clock_info,
     if (clock_private_info->vacant_samples > 0)
       clock_private_info->vacant_samples--;
 
+    // okay, so now we have our samples, including the current one.
+
+    // let's try to estimate when this Sync message _should_ have arrived.
+    // this might allow us to detect a sample that is anomalously late or early
+    // skewing the offset calculation.
+
+    int sample_count = MAX_TIMING_SAMPLES - clock_private_info->vacant_samples;
+    int64_t divergence;
+    if (sample_count > 1) {
+      int f;
+      uint64_t ts = 0;
+      for (f = 0; f < sample_count; f++) {
+        uint64_t ti = clock_private_info->samples[f].local >> 8;
+        uint16_t sequence_gap =
+            clock_private_info->sequence_number - clock_private_info->samples[f].sequence_number;
+        ti += (125000000 * (sequence_gap)) >> 8;
+        // debug(1, "ti: %f, sequence_gap: %u, sample count: %u", ti, sequence_gap, sample_count);
+        ts += ti;
+      }
+      ts = ts / sample_count;
+      ts = ts << 8;
+      // uint64_t estimated_t2 = (uint64_t)ts;
+      divergence = clock_private_info->t2 - ts;
+      // debug(1, "divergence is: %f ms. %d samples", divergence * 0.000001, sample_count);
+    }
+
     // calculate averages
 
+    // here we will correct the offset by adding the divergence to it
+
+    offset = offset + divergence;
+
+    const int64_t clamp = 1000 * 1000; // WAG
+    int64_t jitter = offset - clock_private_info->previous_estimated_offset;
+    if (jitter > clamp)
+      offset = clock_private_info->previous_estimated_offset + clamp;
+    else if (jitter < (-clamp))
+      offset = clock_private_info->previous_estimated_offset - clamp;
+    //    int64_t clamped_jitter = offset - clock_private_info->previous_estimated_offset;
+    //     debug(1, "clock: %" PRIx64 ", jitter: %+f ms, clamped_jitter: %+f ms.",
+    //     clock_info->clock_id,
+    //           jitter * 0.000001, clamped_jitter * 0.000001);
+
+    // okay, so the offset may now have been clamped to be close to the estimated previous offset
+
     uint64_t estimated_offset = offset;
 
-    // here, calculate the average offset
+    /*
+        // here, calculate the average offset
 
-    int sample_count = MAX_TIMING_SAMPLES - clock_private_info->vacant_samples;
+        // int sample_count = MAX_TIMING_SAMPLES - clock_private_info->vacant_samples;
 
-    if (sample_count > 1) {
-      int e;
-      long double offsets = 0;
-      for (e = 0; e < sample_count; e++) {
-        uint64_t ho = clock_private_info->samples[e].local_to_remote_offset;
+        if (sample_count > 1) {
+          int e;
+          long double offsets = 0;
+          for (e = 0; e < sample_count; e++) {
+            uint64_t ho = clock_private_info->samples[e].local_to_remote_offset;
 
-        offsets = offsets + 1.0 * ho;
-      }
+            offsets = offsets + 1.0 * ho;
+          }
 
-      offsets = offsets / sample_count;
-      estimated_offset = (uint64_t)offsets;
-    }
+          offsets = offsets / sample_count;
+          estimated_offset = (uint64_t)offsets;
+        }
+    */
 
     int64_t estimated_variation = estimated_offset - clock_private_info->previous_estimated_offset;
-    debug(2, "clock: %" PRIx64 ", estimated_jitter: %+f ms.", clock_info->clock_id,
-          estimated_variation * 0.000001);
+    // debug(1, "clock: %" PRIx64 ", estimated_jitter: %+f ms, divergence: %+f.",
+    // clock_info->clock_id,
+    //      estimated_variation * 0.000001, divergence * 0.000001);
 
     clock_private_info->previous_estimated_offset = estimated_offset;
 
@@ -265,6 +403,12 @@ void handle_follow_up(char *buf, ssize_t recv_len, clock_source *clock_info,
     if (rc != 0)
       warn("Can't release mutex after updating a clock!");
 
+    clock_private_info->next_sample_goes_here++;
+
+    // if we have need to wrap.
+    if (clock_private_info->next_sample_goes_here == MAX_TIMING_SAMPLES)
+      clock_private_info->next_sample_goes_here = 0;
+
   } else {
     debug(3,
           "Follow_Up %u expecting to be in state sync_seen (%u). Stage error -- "
@@ -272,61 +416,4 @@ void handle_follow_up(char *buf, ssize_t recv_len, clock_source *clock_info,
           ntohs(msg->header.sequenceId), sync_seen, clock_private_info->current_stage,
           clock_private_info->sequence_number, clock_info->ip);
   }
-}
-
-void handle_sync(char *buf, ssize_t recv_len, clock_source *clock_info,
-                 clock_source_private_data *clock_private_info, uint64_t reception_time) {
-
-  struct ptp_sync_message *msg = (struct ptp_sync_message *)buf;
-  // this is just to see if anything interesting comes in the SYNC package
-  // a non-zero origin timestamp
-  // or correction field would be interesting....
-  int ck;
-  int non_empty_origin_timestamp = 0;
-  for (ck = 0; ck < 10; ck++) {
-    if (msg->sync.originTimestamp[ck] != 0) {
-      non_empty_origin_timestamp = (non_empty_origin_timestamp | 1);
-    }
-  }
-  if (non_empty_origin_timestamp != 0)
-    debug(2, "Sync Origin Timestamp!");
-  if (msg->header.correctionField != 0)
-    debug(3, "correctionField: %" PRIx64 ".", msg->header.correctionField);
-
-  int discard_sync = 0;
-
-  // check if we should discard this SYNC
-  if (clock_private_info->current_stage != waiting_for_sync) {
-
-    // here, we have an unexpected SYNC. It could be because the
-    // previous transaction sequence failed for some reason
-    // But, if that is so, the SYNC will have a newer sequence number
-    // so, ignore it if it's a little older.
-
-    // If it seems a lot older in sequence number terms, then it might
-    // be the start of a completely new sequence, so if the
-    // difference is more than 40 (WAG), accept it
-
-    uint16_t new_sync_sequence_number = ntohs(msg->header.sequenceId);
-    int16_t sequence_number_difference =
-        (clock_private_info->sequence_number - new_sync_sequence_number);
-
-    if ((sequence_number_difference > 0) && (sequence_number_difference < 40))
-      discard_sync = 1;
-  }
-
-  if (discard_sync == 0) {
-
-    clock_private_info->sequence_number = ntohs(msg->header.sequenceId);
-    clock_private_info->t2 = reception_time;
-
-    // it turns out that we don't really need to send a Delay_Req
-    // as a Follow_Up message always comes through
-
-    // If we had hardware assisted network timing, then maybe
-    // Even then, AP2 devices don't seem to send an accurate
-    // Delay_Resp time -- it contains the same information as the Follow_Up
-
-    clock_private_info->current_stage = sync_seen;
-  }
-}
+}
\ No newline at end of file
diff --git a/nqptp.c b/nqptp.c
index 0828436..351c8a4 100644
--- a/nqptp.c
+++ b/nqptp.c
@@ -322,7 +322,7 @@ int main(void) {
               sender_port = ntohs(sa4->sin_port);
             }
 
-            if (sender_port == receiver_port) {
+            if ((sender_port == receiver_port) && (connection_ip_family == AF_INET)) {
 
               char sender_string[256];
               memset(sender_string, 0, sizeof(sender_string));