extern const isc::log::MessageID HA_LEASE_UPDATES_DISABLED = "HA_LEASE_UPDATES_DISABLED";
extern const isc::log::MessageID HA_LEASE_UPDATES_ENABLED = "HA_LEASE_UPDATES_ENABLED";
extern const isc::log::MessageID HA_LEASE_UPDATE_COMMUNICATIONS_FAILED = "HA_LEASE_UPDATE_COMMUNICATIONS_FAILED";
+extern const isc::log::MessageID HA_LEASE_UPDATE_CONFLICT = "HA_LEASE_UPDATE_CONFLICT";
extern const isc::log::MessageID HA_LEASE_UPDATE_CREATE_UPDATE_FAILED_ON_PEER = "HA_LEASE_UPDATE_CREATE_UPDATE_FAILED_ON_PEER";
extern const isc::log::MessageID HA_LEASE_UPDATE_DELETE_FAILED_ON_PEER = "HA_LEASE_UPDATE_DELETE_FAILED_ON_PEER";
extern const isc::log::MessageID HA_LEASE_UPDATE_FAILED = "HA_LEASE_UPDATE_FAILED";
"HA_LEASE_UPDATES_DISABLED", "lease updates will not be sent to the partner while in %1 state",
"HA_LEASE_UPDATES_ENABLED", "lease updates will be sent to the partner while in %1 state",
"HA_LEASE_UPDATE_COMMUNICATIONS_FAILED", "%1: failed to communicate with %2: %3",
+ "HA_LEASE_UPDATE_CONFLICT", "%1: lease update to %2 returned conflict status code: %3",
"HA_LEASE_UPDATE_CREATE_UPDATE_FAILED_ON_PEER", "%1: failed to create or update the lease having type %2 for address %3, reason: %4",
"HA_LEASE_UPDATE_DELETE_FAILED_ON_PEER", "%1: failed to delete the lease having type %2 for address %3, reason: %4",
"HA_LEASE_UPDATE_FAILED", "%1: lease update to %2 failed: %3",
extern const isc::log::MessageID HA_LEASE_UPDATES_DISABLED;
extern const isc::log::MessageID HA_LEASE_UPDATES_ENABLED;
extern const isc::log::MessageID HA_LEASE_UPDATE_COMMUNICATIONS_FAILED;
+extern const isc::log::MessageID HA_LEASE_UPDATE_CONFLICT;
extern const isc::log::MessageID HA_LEASE_UPDATE_CREATE_UPDATE_FAILED_ON_PEER;
extern const isc::log::MessageID HA_LEASE_UPDATE_DELETE_FAILED_ON_PEER;
extern const isc::log::MessageID HA_LEASE_UPDATE_FAILED;
HA peer while processing a DHCP client query and sending lease update. The
client's DHCP message will be dropped.
+% HA_LEASE_UPDATE_CONFLICT %1: lease update to %2 returned conflict status code: %3
+This warning message indicates that partner returned a conflict status code
+in response to a lease update. The client's DHCP message will be dropped.
+If the server is configured to track conflicting lease updates, it may
+eventually transition to the terminated state when the configured threshold
+is exceeded.
+
% HA_LEASE_UPDATE_CREATE_UPDATE_FAILED_ON_PEER %1: failed to create or update the lease having type %2 for address %3, reason: %4
This informational message is issued when one of the leases failed to be
created or updated on the HA peer while processing the lease updates sent
CtrlChannelError(file, line, what) {}
};
+/// @brief Exception thrown when conflict status code has been returned.
+class ConflictError : public CtrlChannelError {
+public:
+ ConflictError(const char* file, size_t line, const char* what) :
+ CtrlChannelError(file, line, what) {}
+};
+
}
namespace isc {
query_filter_.serveDefaultScopes();
}
adjustNetworkState();
+ communication_state_->clearRejectedLeases();
// Log if the state machine is paused.
conditionalLogPausedState();
if (doOnEntry()) {
query_filter_.serveNoScopes();
adjustNetworkState();
+ communication_state_->clearRejectedLeases();
// Log if the state machine is paused.
conditionalLogPausedState();
if (doOnEntry()) {
query_filter_.serveNoScopes();
adjustNetworkState();
+ communication_state_->clearRejectedLeases();
// Log if the state machine is paused.
conditionalLogPausedState();
if (doOnEntry()) {
query_filter_.serveDefaultScopes();
adjustNetworkState();
+ communication_state_->clearRejectedLeases();
// In the terminated state we don't send heartbeat.
communication_state_->stopHeartbeat();
if (doOnEntry()) {
query_filter_.serveNoScopes();
adjustNetworkState();
+ communication_state_->clearRejectedLeases();
// Log if the state machine is paused.
conditionalLogPausedState();
// If not issue a warning if it's getting large.
if (!should_terminate) {
communication_state_->clockSkewShouldWarn();
+ // Check if we should terminate because the number of rejected leases
+ // has been exceeded.
+ should_terminate =
+ config_->getMaxRejectedClients() &&
+ (config_->getMaxRejectedClients() <= communication_state_->getRejectedLeasesCount());
}
return (should_terminate);
" HA peer. This is programmatic error");
}
- // There are three possible groups of errors during the lease update.
+ // There are four possible groups of errors during the lease update.
// One is the IO error causing issues in communication with the peer.
- // Another one is an HTTP parsing error. The last type of error is
- // when non-success error code is returned in the response carried
- // in the HTTP message or if the JSON response is otherwise broken.
+ // Another one is an HTTP parsing error. The third type occurs when
+ // the partner receives the command but it is invalid or there is
+ // an internal processing error. Finally, the forth type is when the
+ // conflict status code is returned in the response indicating that
+ // the lease update does not match the partner's configuration.
bool lease_update_success = true;
+ bool lease_update_conflict = false;
// Handle first two groups of errors.
if (ec || !error_str.empty()) {
} else {
- // Handle third group of errors.
try {
int rcode = 0;
auto args = verifyAsyncResponse(response, rcode);
// updates and we should log them.
logFailedLeaseUpdates(query, args);
+ } catch (const ConflictError& ex) {
+ // Handle forth group of errors.
+ lease_update_conflict = true;
+ lease_update_success = false;
+ communication_state_->reportRejectedLease(query);
+
+ LOG_WARN(ha_logger, HA_LEASE_UPDATE_CONFLICT)
+ .arg(query->getLabel())
+ .arg(config->getLogLabel())
+ .arg(ex.what());
+
} catch (const std::exception& ex) {
+ // Handle third group of errors.
LOG_WARN(ha_logger, HA_LEASE_UPDATE_FAILED)
.arg(query->getLabel())
.arg(config->getLogLabel())
// We don't care about the result of the lease update to the backup server.
// It is a best effort update.
- if ((config->getRole() != HAConfig::PeerConfig::BACKUP) && !lease_update_success) {
+ if ((config->getRole() != HAConfig::PeerConfig::BACKUP) && !lease_update_conflict && !lease_update_success) {
// If we were unable to communicate with the partner we set partner's
// state as unavailable.
communication_state_->setPartnerState("unavailable");
// Include an error code.
s << "error code " << rcode;
- if (rcode == CONTROL_RESULT_COMMAND_UNSUPPORTED) {
+ switch (rcode) {
+ case CONTROL_RESULT_COMMAND_UNSUPPORTED:
isc_throw(CommandUnsupportedError, s.str());
- } else {
+ case CONTROL_RESULT_CONFLICT:
+ isc_throw(ConflictError, s.str());
+ default:
isc_throw(CtrlChannelError, s.str());
}
}
bool shouldPartnerDown() const;
/// @brief Indicates if the server should transition to the terminated
- /// state as a result of high clock skew.
+ /// state.
+ ///
+ /// There are two reasons for the server to transition to the terminated
+ /// state. First, when the clock skew being too high. Second, when the
+ /// server monitors rejected lease updates and the maximum configured
+ /// rejected updates have been exceeded.
+ ///
+ /// If the clock skew is is higher than 30 seconds but lower than 60
+ /// seconds this method only logs a warning. In case, the clock skew
+ /// exceeds 60 seconds, this method logs a warning and returns true.
///
- /// It indicates that the server should transition to the terminated
- /// state because of the clock skew being too high. If the clock skew is
- /// is higher than 30 seconds but lower than 60 seconds this method
- /// only logs a warning. In case, the clock skew exceeds 60 seconds, this
- /// method logs a warning and returns true.
+ /// If the clock skew is acceptable the function can cause the transition
+ /// to the terminated state when the number of recorded rejected lease
+ /// updates exceeded the configured threshold.
///
/// @return true if the server should transition to the terminated state,
/// false otherwise.
if (should_fail) {
EXPECT_EQ(HA_UNAVAILABLE_ST, state->getPartnerState());
+ } else {
+ EXPECT_NE(state->getPartnerState(), HA_UNAVAILABLE_ST);
}
}
/// @brief Tests scenarios when one of the servers to which a
/// lease update is sent returns an error.
- void testSndUpdatesControlResultError() {
+ void testSendUpdatesControlResultError() {
// Instruct the server 2 to return an error as a result of receiving a command.
factory2_->getResponseCreator()->setControlResult(CONTROL_RESULT_ERROR);
/// @brief Tests scenarios when one of the servers to which a
/// lease update is sent does not authorize the local server.
- void testSndUpdatesControlResultUnauthorized() {
+ void testSendUpdatesControlResultUnauthorized() {
// Instruct the server 2 to require authentication.
factory2_->getResponseCreator()->addBasicAuth("foo", "bar");
EXPECT_FALSE(delete_request3);
}
+ /// @brief Tests scenarios when one of the servers to which a
+ /// lease update is sent returns an error.
+ void testSendUpdatesControlResultConflict() {
+ // Instruct the server 2 to return an error as a result of receiving a command.
+ factory2_->getResponseCreator()->setControlResult(CONTROL_RESULT_CONFLICT);
+ factory3_->getResponseCreator()->setControlResult(CONTROL_RESULT_CONFLICT);
+
+ // Start HTTP servers.
+ ASSERT_NO_THROW({
+ listener_->start();
+ listener2_->start();
+ listener3_->start();
+ });
+
+ testSendLeaseUpdates([] {
+ ADD_FAILURE() << "unpark function called but expected that "
+ "the packet is dropped";
+ }, false, 1);
+
+ // Ensure that the server has recorded a lease update conflict. The conflict
+ // reported by the backup server should not count.
+ EXPECT_EQ(1, service_->communication_state_->getRejectedLeasesCount());
+ }
+
/// @brief Tests scenarios when all lease updates are sent successfully.
void testSendSuccessfulUpdates6() {
// Start HTTP servers.
// Test scenario when one of the servers to which a lease update is sent
// returns an error.
TEST_F(HAServiceTest, sendUpdatesControlResultError) {
- testSndUpdatesControlResultError();
+ testSendUpdatesControlResultError();
}
// Test scenario when one of the servers to which a lease update is sent
// returns an error.
TEST_F(HAServiceTest, sendUpdatesControlResultErrorMultiThreading) {
MultiThreadingMgr::instance().setMode(true);
- testSndUpdatesControlResultError();
+ testSendUpdatesControlResultError();
}
// Test scenario when one of the servers to which a lease update is sent
// requires not provided authentication.
TEST_F(HAServiceTest, sendUpdatesControlResultUnauthorized) {
- testSndUpdatesControlResultUnauthorized();
+ testSendUpdatesControlResultUnauthorized();
}
// Test scenario when one of the servers to which a lease update is sent
// requires not provided authentication.
TEST_F(HAServiceTest, sendUpdatesControlResultUnauthorizedMultiThreading) {
MultiThreadingMgr::instance().setMode(true);
- testSndUpdatesControlResultUnauthorized();
+ testSendUpdatesControlResultUnauthorized();
+}
+
+// Test scenario when one of the servers to which a lease update is sent
+// returns a conflict status code.
+TEST_F(HAServiceTest, sendUpdatesControlResultConflict) {
+ testSendUpdatesControlResultConflict();
+}
+
+// Test scenario when one of the servers to which a lease update is sent
+// returns a conflict status code.
+TEST_F(HAServiceTest, sendUpdatesControlResultConflictMultiThreading) {
+ MultiThreadingMgr::instance().setMode(true);
+ testSendUpdatesControlResultConflict();
}
// Test scenario when all lease updates are sent successfully.
service_->runModel(HAService::NOP_EVT);
}
+ /// @brief Simulates rejected lease updates by the partner.
+ ///
+ /// Too many rejected lease updates should transition the server to the
+ /// terminated state.
+ /// @param leases_num number of simulated rejected leases.
+ void simulateRejectedLeaseUpdates(const unsigned leases_num = 100) {
+ // If the state machine is about to transition to another state,
+ // let's make sure it performs this transition.
+ service_->runModel(HAService::NOP_EVT);
+ // Simulate the rejected lease updates.
+ for (auto i = 0; i < leases_num; ++i) {
+ // Create query with random HW address.
+ Pkt4Ptr query4 = createQuery4(randomKey(HWAddr::ETHERNET_HWADDR_LEN));
+ static_cast<void>(state_->reportRejectedLease(query4));
+ }
+ // The state machine needs to react to the rejected leases.
+ service_->runModel(HAService::NOP_EVT);
+ }
+
/// @brief Checks transitions dependent on the partner's state.
///
/// This method uses @c partner_ object to control the state of the partner.
return (isDoingHeartbeat());
}
+ /// @brief Transitions the server to the specified state and checks that it
+ /// clears the rejected leases counter.
+ ///
+ /// @param my_state this server's state
+ /// @return true if the rejected leases counter has been cleared.
+ bool expectClearRejectedLeases(const MyState& my_state) {
+ simulateRejectedLeaseUpdates(1);
+ service_->verboseTransition(my_state.state_);
+ service_->runModel(TestHAService::NOP_EVT);
+ return (state_->getRejectedLeasesCount() == 0);
+ }
+
/// @brief Pointer to the communication state used in the tests.
NakedCommunicationState4Ptr state_;
/// @brief Pointer to the partner used in some tests.
EXPECT_TRUE(expectLeaseUpdates(MyState(HA_WAITING_ST), peer_config));
}
+// Verifies that the server transitions to the terminated state as a result
+// of too many rejected leases.
+TEST_F(HAServiceStateMachineTest, shouldTerminateDueToRejectedLeases) {
+ startService(createValidConfiguration());
+ service_->verboseTransition(HA_LOAD_BALANCING_ST);
+ service_->runModel(HAService::NOP_EVT);
+
+ // Simulate many rejected leases. It should cause the server to transition
+ // to the terminated state.
+ simulateRejectedLeaseUpdates(100);
+ EXPECT_EQ(HA_TERMINATED_ST, service_->getCurrState());
+}
+
+// Verifies that rejected leases are cleared upon entering certain states and
+// are not cleared upon entering other states in the load balancing mode.
+TEST_F(HAServiceStateMachineTest, clearRejectedLeaseUpdatesLoadBalancing) {
+ startService(createValidConfiguration());
+
+ EXPECT_FALSE(expectClearRejectedLeases(MyState(HA_COMMUNICATION_RECOVERY_ST)));
+ EXPECT_FALSE(expectClearRejectedLeases(MyState(HA_LOAD_BALANCING_ST)));
+ EXPECT_FALSE(expectClearRejectedLeases(MyState(HA_IN_MAINTENANCE_ST)));
+ EXPECT_TRUE(expectClearRejectedLeases(MyState(HA_PARTNER_DOWN_ST)));
+ EXPECT_FALSE(expectClearRejectedLeases(MyState(HA_PARTNER_IN_MAINTENANCE_ST)));
+ EXPECT_TRUE(expectClearRejectedLeases(MyState(HA_READY_ST)));
+ EXPECT_TRUE(expectClearRejectedLeases(MyState(HA_TERMINATED_ST)));
+ EXPECT_TRUE(expectClearRejectedLeases(MyState(HA_WAITING_ST)));
+}
+
+// Verifies that rejected leases are cleared upon entering certain states and
+// are not cleared upon entering other states in the hot standby mode.
+TEST_F(HAServiceStateMachineTest, clearRejectedLeaseUpdatesHotStandby) {
+ HAConfigPtr config_storage = createValidConfiguration(HAConfig::HOT_STANDBY);
+ config_storage->getPeerConfig("server2")->setRole("standby");
+ startService(config_storage);
+
+ EXPECT_FALSE(expectClearRejectedLeases(MyState(HA_HOT_STANDBY_ST)));
+ EXPECT_FALSE(expectClearRejectedLeases(MyState(HA_IN_MAINTENANCE_ST)));
+ EXPECT_TRUE(expectClearRejectedLeases(MyState(HA_PARTNER_DOWN_ST)));
+ EXPECT_FALSE(expectClearRejectedLeases(MyState(HA_PARTNER_IN_MAINTENANCE_ST)));
+ EXPECT_TRUE(expectClearRejectedLeases(MyState(HA_READY_ST)));
+ EXPECT_TRUE(expectClearRejectedLeases(MyState(HA_TERMINATED_ST)));
+ EXPECT_TRUE(expectClearRejectedLeases(MyState(HA_WAITING_ST)));
+}
+
+// Verifies that rejected leases are cleared upon entering certain states and
+// are not cleared upon entering other states in the passive backup mode.
+TEST_F(HAServiceStateMachineTest, clearRejectedLeaseUpdatesPassiveBackup) {
+ HAConfigPtr config_storage = createValidPassiveBackupConfiguration();
+ config_storage->getPeerConfig("server2")->setRole("backup");
+ startService(config_storage);
+
+ EXPECT_FALSE(expectClearRejectedLeases(MyState(HA_PASSIVE_BACKUP_ST)));
+}
+
} // end of anonymous namespace
" \"max-response-delay\": 1000,"
" \"max-ack-delay\": 10000,"
" \"max-unacked-clients\": 10,"
+ " \"max-rejected-clients\": 10,"
" \"wait-backup-ack\": false,"
" \"peers\": ["
" {"