From: Thomas Markwalder Date: Thu, 6 Feb 2020 19:57:07 +0000 (-0500) Subject: [#1114] Servers execute shutdown on unrecoverable DBs X-Git-Tag: Kea-1.6.2~6 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=675f6d3e4f11370e1f5a7c5cd5bee29716da2133;p=thirdparty%2Fkea.git [#1114] Servers execute shutdown on unrecoverable DBs Backprot #1108 changes to v1_6_0. Added ChangeLog entry src/bin/dhcp4/ctrl_dhcp4_srv.* ControlledDhcpv4Srv::dbLostCallback() - schedules a shutdown once retries have been exhausted/disableld src/bin/dhcp6/ctrl_dhcp6_srv.* ControlledDhcpv6Srv::dbLostCallback() - schedules a shutdown once retries have been exhausted/disableld src/lib/database/database_connection.h class DbUnrecoverableError - new exception src/lib/mysql/mysql_connection.h MySqlConnection::check_error() - throws DbUnrecoverableError instead of calling exit() src/lib/pgsql/pgsql_connection.* PgSqlConnection::checkStatementError() - throws DbUnrecoverableError instead of calling exit() --- diff --git a/ChangeLog b/ChangeLog index d4c83456b0..2190483d8b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +1662. [bug] tmark + kea-dhcp4 and kea-dhcp6 now shutdown gracefully by executing + the shutdown command, if connectivity with a backend database + has been lost and retries are either disabled or have been + exhausted. Prior to this they simply invoked exit() which + could orphan control socket files or cause segfaults unloading + the CB Cmds hook library. + (Gitlab #1114,#1108) + 1661. [bug] tmark Kea servers now detect and remove orphaned control channel sockets. This corrects a failure of the servers to restart diff --git a/src/bin/dhcp4/ctrl_dhcp4_srv.cc b/src/bin/dhcp4/ctrl_dhcp4_srv.cc index 4987613534..621d155706 100644 --- a/src/bin/dhcp4/ctrl_dhcp4_srv.cc +++ b/src/bin/dhcp4/ctrl_dhcp4_srv.cc @@ -1,4 +1,4 @@ -// Copyright (C) 2014-2019 Internet Systems Consortium, Inc. ("ISC") +// Copyright (C) 2014-2020 Internet Systems Consortium, Inc. ("ISC") // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this @@ -1017,12 +1017,14 @@ ControlledDhcpv4Srv::dbLostCallback(ReconnectCtlPtr db_reconnect_ctl) { return (false); } - // If reconnect isn't enabled, log it and return false + // If reconnect isn't enabled or we're out of retries, + // log it, schedule a shutdown, and return false if (!db_reconnect_ctl->retriesLeft() || !db_reconnect_ctl->retryInterval()) { LOG_INFO(dhcp4_logger, DHCP4_DB_RECONNECT_DISABLED) .arg(db_reconnect_ctl->retriesLeft()) .arg(db_reconnect_ctl->retryInterval()); + ControlledDhcpv4Srv::processCommand("shutdown", ConstElementPtr()); return(false); } diff --git a/src/bin/dhcp4/ctrl_dhcp4_srv.h b/src/bin/dhcp4/ctrl_dhcp4_srv.h index ae8095c2a7..8b872a5ae4 100644 --- a/src/bin/dhcp4/ctrl_dhcp4_srv.h +++ b/src/bin/dhcp4/ctrl_dhcp4_srv.h @@ -362,8 +362,8 @@ private: /// between retry attempts. /// /// If either value is zero, reconnect is presumed to be disabled and - /// the function will returns false. This instructs the DB backend - /// layer (the caller) to treat the connectivity loss as fatal. + /// the function will schedule a shutdown and return false. This instructs + /// the DB backend layer (the caller) to treat the connectivity loss as fatal. /// /// Otherwise, the function saves db_reconnect_ctl and invokes /// dbReconnect to initiate the reconnect process. diff --git a/src/bin/dhcp6/ctrl_dhcp6_srv.cc b/src/bin/dhcp6/ctrl_dhcp6_srv.cc index 9e5947606c..947410e429 100644 --- a/src/bin/dhcp6/ctrl_dhcp6_srv.cc +++ b/src/bin/dhcp6/ctrl_dhcp6_srv.cc @@ -1046,6 +1046,7 @@ ControlledDhcpv6Srv::dbLostCallback(ReconnectCtlPtr db_reconnect_ctl) { LOG_INFO(dhcp6_logger, DHCP6_DB_RECONNECT_DISABLED) .arg(db_reconnect_ctl->retriesLeft()) .arg(db_reconnect_ctl->retryInterval()); + ControlledDhcpv6Srv::processCommand("shutdown", ConstElementPtr()); return(false); } diff --git a/src/bin/dhcp6/ctrl_dhcp6_srv.h b/src/bin/dhcp6/ctrl_dhcp6_srv.h index 83a2d29ab4..3ff7e0c592 100644 --- a/src/bin/dhcp6/ctrl_dhcp6_srv.h +++ b/src/bin/dhcp6/ctrl_dhcp6_srv.h @@ -361,8 +361,8 @@ private: /// between retry attempts. /// /// If either value is zero, reconnect is presumed to be disabled and - /// the function will returns false. This instructs the DB backend - /// layer (the caller) to treat the connectivity loss as fatal. + /// the function will schedule a shutdown and return false. This instructs + /// the DB backend layer (the caller) to treat the connectivity loss as fatal. /// /// Otherwise, the function saves db_reconnect_ctl and invokes /// dbReconnect to initiate the reconnect process. diff --git a/src/lib/database/database_connection.h b/src/lib/database/database_connection.h index 22336b66bd..a4164f3341 100644 --- a/src/lib/database/database_connection.h +++ b/src/lib/database/database_connection.h @@ -1,4 +1,4 @@ -// Copyright (C) 2015-2019 Internet Systems Consortium, Inc. ("ISC") +// Copyright (C) 2015-2020 Internet Systems Consortium, Inc. ("ISC") // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this @@ -39,6 +39,14 @@ public: isc::Exception(file, line, what) {} }; +/// @brief Exception thrown when connectivity has been lost and +/// cannot be recovered. +class DbUnrecoverableError : public Exception { +public: + DbUnrecoverableError(const char* file, size_t line, const char* what) : + isc::Exception(file, line, what) {} +}; + /// @brief Invalid type exception /// /// Thrown when the factory doesn't recognize the type of the backend. diff --git a/src/lib/mysql/mysql_connection.h b/src/lib/mysql/mysql_connection.h index 929fbc16e5..5e7821d19d 100644 --- a/src/lib/mysql/mysql_connection.h +++ b/src/lib/mysql/mysql_connection.h @@ -536,8 +536,8 @@ public: /// If the error is deemed unrecoverable, such as a loss of connectivity /// with the server, the function will call invokeDbLostCallback(). If the /// invocation returns false then either there is no callback registered - /// or the callback has elected not to attempt to reconnect, and exit(-1) - /// is called; + /// or the callback has elected not to attempt to reconnect, and a + /// DbUnrecoverableError is thrown. /// /// If the invocation returns true, this indicates the calling layer will /// attempt recovery, and the function throws a DbOperationError to allow @@ -574,9 +574,10 @@ public: .arg(mysql_errno(mysql_)); // If there's no lost db callback or it returns false, - // then we're not attempting to recover so we're done + // then we're not attempting to recover so we're done. if (!invokeDbLostCallback()) { - exit (-1); + isc_throw(db::DbUnrecoverableError, + "database connectivity cannot be recovered"); } // We still need to throw so caller can error out of the current diff --git a/src/lib/pgsql/pgsql_connection.cc b/src/lib/pgsql/pgsql_connection.cc index af98f098cb..a77d6ce91c 100644 --- a/src/lib/pgsql/pgsql_connection.cc +++ b/src/lib/pgsql/pgsql_connection.cc @@ -309,9 +309,10 @@ PgSqlConnection::checkStatementError(const PgSqlResult& r, .arg(sqlstate ? sqlstate : ""); // If there's no lost db callback or it returns false, - // then we're not attempting to recover so we're done + // then we're not attempting to recover so we're done. if (!invokeDbLostCallback()) { - exit (-1); + isc_throw(db::DbUnrecoverableError, + "database connectivity cannot be recovered"); } // We still need to throw so caller can error out of the current diff --git a/src/lib/pgsql/pgsql_connection.h b/src/lib/pgsql/pgsql_connection.h index 63afe01fd0..69c1576e42 100644 --- a/src/lib/pgsql/pgsql_connection.h +++ b/src/lib/pgsql/pgsql_connection.h @@ -386,8 +386,8 @@ public: /// If the error is deemed unrecoverable, such as a loss of connectivity /// with the server, the function will call invokeDbLostCallback(). If the /// invocation returns false then either there is no callback registered - /// or the callback has elected not to attempt to reconnect, and exit(-1) - /// is called; + /// or the callback has elected not to attempt to reconnect, and a + /// DbUnrecoverableError is thrown. /// /// If the invocation returns true, this indicates the calling layer will /// attempt recovery, and the function throws a DbOperationError to allow