]> git.ipfire.org Git - thirdparty/kea.git/commitdiff
[#1205] Improved terminated restart procedure
authorMarcin Siodelski <marcin@isc.org>
Wed, 20 May 2020 13:21:33 +0000 (15:21 +0200)
committerMarcin Siodelski <marcin@isc.org>
Thu, 21 May 2020 18:02:03 +0000 (20:02 +0200)
The server being restarted will not transition to the terminated state if
the clocks are in sync. This prevents the servers from returning to the
terminated state when restarting the servers sequentially.

src/hooks/dhcp/high_availability/ha_service.cc
src/hooks/dhcp/high_availability/tests/ha_service_unittest.cc

index 1dbed9dfafdf3cc0424ff5d22fb36faea0bf04d9..4e5e10bfb277743977c221371eca6ded0cb6e084 100644 (file)
@@ -603,7 +603,13 @@ HAService::waitingStateHandler() {
         break;
 
     case HA_TERMINATED_ST:
-        verboseTransition(HA_TERMINATED_ST);
+        // We have checked above whether the clock skew is exceeding the threshold
+        // and we should terminate. If we're here, it means that the clock skew
+        // is acceptable. The partner may be still in the terminated state because
+        // it hasn't been restarted yet. Probably, this server is the first one
+        // being restarted after syncing the clocks. Let's just sit in the waiting
+        // state until the partner gets restarted.
+        postNextEvent(NOP_EVT);
         break;
 
     case HA_WAITING_ST:
index 8b2aa09d6303b58b4d7c17bb528595b8f275f0b6..fa0d6e2deba71b119abdd33833bd2bdad8c6fb08 100644 (file)
@@ -4175,7 +4175,7 @@ TEST_F(HAServiceStateMachineTest, stateTransitionsLoadBalancingPrimary) {
                        FinalState(HA_SYNCING_ST));
 
         testTransition(MyState(HA_WAITING_ST), PartnerState(HA_TERMINATED_ST),
-                       FinalState(HA_TERMINATED_ST));
+                       FinalState(HA_WAITING_ST));
 
         testTransition(MyState(HA_WAITING_ST), PartnerState(HA_SYNCING_ST),
                        FinalState(HA_WAITING_ST));
@@ -4220,6 +4220,45 @@ TEST_F(HAServiceStateMachineTest, terminateTransitionsLoadBalancingPrimary) {
     testTerminateTransition(MyState(HA_WAITING_ST));
 }
 
+// This test checks that the server does not transition out of the waiting state
+// to the terminated state when the server is restarted but the clock skew has
+// been corrected.
+TEST_F(HAServiceStateMachineTest, terminateNoTransitionOnRestart) {
+    partner_->startup();
+    startService(createValidConfiguration());
+
+    // Set partner's time to the current time. This guarantees that the clock
+    // skew is below 60s and there is no reason for the server to transition
+    // to the terminated state.
+    partner_->setDateTime(HttpDateTime().rfc1123Format());
+    // The partner is in the terminated state to simulate sequential restart
+    // of the two servers from the terminated state.
+    partner_->transition("terminated");
+    // This server is in the waiting state which simulates the restart case.
+    service_->transition(HA_WAITING_ST, HAService::NOP_EVT);
+    // Run the heartbeat.
+    waitForEvent(HAService::HA_HEARTBEAT_COMPLETE_EVT);
+    // The server should remain in the waiting state because the clock skew
+    // is low.
+    EXPECT_EQ(HA_WAITING_ST, service_->getCurrState())
+        << "expected that the server remains in 'waiting' state"
+        << "', but transitioned to the '"
+        << service_->getStateLabel(service_->getCurrState())
+        << "' state";
+
+    // Now, let's set the partner's time way to the past to verify that this
+    // server transitions to the 'terminated' state if the administrator
+    // failed to sync the clocks prior to the restart.
+    partner_->setDateTime("Sun, 06 Nov 1994 08:49:37 GMT");
+    // Run the heartbeat.
+    waitForEvent(HAService::HA_HEARTBEAT_COMPLETE_EVT);
+    EXPECT_EQ(HA_WAITING_ST, service_->getCurrState())
+        << "expected that the server transitions to the 'terminated' state"
+        << "', but transitioned to the '"
+        << service_->getStateLabel(service_->getCurrState())
+        << "' state";
+}
+
 // This test checks all combinations of server and partner states and the
 // resulting state to which the server transitions. This server is secondary.
 // There is another test which validates state transitions from the
@@ -4417,7 +4456,7 @@ TEST_F(HAServiceStateMachineTest, stateTransitionsLoadBalancingSecondary) {
                        FinalState(HA_WAITING_ST));
 
         testTransition(MyState(HA_WAITING_ST), PartnerState(HA_TERMINATED_ST),
-                       FinalState(HA_TERMINATED_ST));
+                       FinalState(HA_WAITING_ST));
 
         testTransition(MyState(HA_WAITING_ST), PartnerState(HA_WAITING_ST),
                        FinalState(HA_WAITING_ST));
@@ -4968,7 +5007,7 @@ TEST_F(HAServiceStateMachineTest, stateTransitionsHotStandbyPrimary) {
                        FinalState(HA_WAITING_ST));
 
         testTransition(MyState(HA_WAITING_ST), PartnerState(HA_TERMINATED_ST),
-                       FinalState(HA_TERMINATED_ST));
+                       FinalState(HA_WAITING_ST));
 
         testTransition(MyState(HA_WAITING_ST), PartnerState(HA_WAITING_ST),
                        FinalState(HA_SYNCING_ST));
@@ -5191,7 +5230,7 @@ TEST_F(HAServiceStateMachineTest, stateTransitionsHotStandbyStandby) {
                        FinalState(HA_WAITING_ST));
 
         testTransition(MyState(HA_WAITING_ST), PartnerState(HA_TERMINATED_ST),
-                       FinalState(HA_TERMINATED_ST));
+                       FinalState(HA_WAITING_ST));
 
         testTransition(MyState(HA_WAITING_ST), PartnerState(HA_WAITING_ST),
                        FinalState(HA_WAITING_ST));