From: Alberto Leiva Popper Date: Tue, 30 Apr 2019 15:07:49 +0000 (-0500) Subject: Catch lock errors X-Git-Tag: v0.0.2~46^2 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=f901aeddfdffd2badfaacbd0c48010161ba0c6f3;p=thirdparty%2FFORT-validator.git Catch lock errors Though sometimes we can't do anything meaningful with the errors, but panicking is probably better than ignoring them and subtly doing the wrong thing. --- diff --git a/src/clients.c b/src/clients.c index 8604ba15..5169f2d7 100644 --- a/src/clients.c +++ b/src/clients.c @@ -1,6 +1,6 @@ #include "clients.h" -#include +#include #include "common.h" #include "log.h" #include "data_structure/uthash.h" @@ -8,11 +8,10 @@ /* * TODO uthash panics on memory allocations... * http://troydhanson.github.io/uthash/userguide.html#_out_of_memory - * TODO sem_wait(), sem_post(), sem_init() and sem_destroy() return error. */ -#define SADDR_IN(addr) ((struct sockaddr_in *)addr) -#define SADDR_IN6(addr) ((struct sockaddr_in6 *)addr) +#define SADDR_IN(addr) ((struct sockaddr_in *) addr) +#define SADDR_IN6(addr) ((struct sockaddr_in6 *) addr) struct hashable_client { struct client meat; @@ -20,19 +19,21 @@ struct hashable_client { }; /** Hash table of clients */ -struct hashable_client *table; -/** Read and Write locks */ -static sem_t rlock, wlock; -/** Readers counter */ -static unsigned int rcounter; +static struct hashable_client *table; +/** Read/write lock, which protects @table and its inhabitants. */ +static pthread_rwlock_t lock; -void +int clients_db_init(void) { + int error; + table = NULL; - sem_init(&rlock, 0, 1); - sem_init(&wlock, 0, 1); - rcounter = 0; + error = pthread_rwlock_init(&lock, NULL); + if (error) + return pr_errno(error, "pthread_rwlock_init() errored"); + + return 0; } static int @@ -83,7 +84,7 @@ clients_add(int fd, struct sockaddr_storage *addr, uint8_t rtr_version) if (error) return error; - sem_wait(&wlock); + rwlock_write_lock(&lock); HASH_FIND_INT(table, &fd, old_client); if (old_client == NULL) { @@ -99,7 +100,7 @@ clients_add(int fd, struct sockaddr_storage *addr, uint8_t rtr_version) error = -ERTR_VERSION_MISMATCH; } - sem_post(&wlock); + rwlock_unlock(&lock); if (new_client != NULL) free(new_client); @@ -112,22 +113,24 @@ clients_forget(int fd) { struct hashable_client *client; - sem_wait(&wlock); + rwlock_write_lock(&lock); HASH_FIND_INT(table, &fd, client); if (client != NULL) HASH_DEL(table, client); - sem_post(&wlock); + rwlock_unlock(&lock); } int clients_foreach(clients_foreach_cb cb, void *arg) { struct hashable_client *client; - int error = 0; + int error; - read_lock(&rlock, &wlock, &rcounter); + error = rwlock_read_lock(&lock); + if (error) + return error; for (client = table; client != NULL; client = client->hh.next) { error = cb(&client->meat, arg); @@ -135,7 +138,7 @@ clients_foreach(clients_foreach_cb cb, void *arg) break; } - read_unlock(&rlock, &wlock, &rcounter); + rwlock_unlock(&lock); return error; } @@ -145,15 +148,10 @@ clients_db_destroy(void) { struct hashable_client *node, *tmp; - sem_wait(&wlock); - HASH_ITER(hh, table, node, tmp) { HASH_DEL(table, node); free(node); } - sem_post(&wlock); - - sem_destroy(&wlock); - sem_destroy(&rlock); + pthread_rwlock_destroy(&lock); /* Nothing to do with error code */ } diff --git a/src/clients.h b/src/clients.h index d60f0bc7..9286a61a 100644 --- a/src/clients.h +++ b/src/clients.h @@ -16,7 +16,7 @@ struct client { uint8_t rtr_version; }; -void clients_db_init(void); +int clients_db_init(void); int clients_add(int, struct sockaddr_storage *, uint8_t); void clients_forget(int); diff --git a/src/common.c b/src/common.c index b41387a7..29a8913a 100644 --- a/src/common.c +++ b/src/common.c @@ -1,26 +1,66 @@ #include "common.h" +#include +#include +#include +#include "log.h" + +int +rwlock_read_lock(pthread_rwlock_t *lock) +{ + int error; + + error = pthread_rwlock_rdlock(lock); + switch (error) { + case 0: + return error; + case EAGAIN: + pr_err("There are too many threads; I can't modify the database."); + return error; + } + + /* + * EINVAL, EDEADLK and unknown nonstandard error codes. + * EINVAL, EDEADLK indicate serious programming errors. And it's + * probably safest to handle the rest the same. + * pthread_rwlock_rdlock() failing like this is akin to `if` failing; + * we're screwed badly, so let's just pull the trigger. + */ + pr_err("pthread_rwlock_rdlock() returned error code %d. This is too critical for a graceful recovery; I must die now.", + error); + exit(error); +} + void -read_lock(sem_t *read, sem_t *write, unsigned int *reader_count) +rwlock_write_lock(pthread_rwlock_t *lock) { - sem_wait(read); - (*reader_count)++; - if (*reader_count == 1) - sem_wait(write); - sem_post(read); + int error; + + /* + * POSIX says that the only available errors are EINVAL and EDEADLK. + * Both of them indicate serious programming errors. + */ + error = pthread_rwlock_wrlock(lock); + if (error) { + pr_err("pthread_rwlock_wrlock() returned error code %d. This is too critical for a graceful recovery; I must die now.", + error); + exit(error); + } } -/* - * MUST NOT be called without previously called 'read_lock' or done the same - * things that such function does. - */ void -read_unlock(sem_t *read, sem_t *write, unsigned int *reader_count) +rwlock_unlock(pthread_rwlock_t *lock) { - sem_wait(read); - (*reader_count)--; - if (*reader_count == 0) { - sem_post(write); + int error; + + /* + * POSIX says that the only available errors are EINVAL and EPERM. + * Both of them indicate serious programming errors. + */ + error = pthread_rwlock_unlock(lock); + if (error) { + pr_err("pthread_rwlock_unlock() returned error code %d. This is too critical for a graceful recovery; I must die now.", + error); + exit(error); } - sem_post(read); } diff --git a/src/common.h b/src/common.h index 0c5c367e..a5c5c596 100644 --- a/src/common.h +++ b/src/common.h @@ -24,10 +24,11 @@ #define ARRAY_LEN(array) (sizeof(array) / sizeof((array)[0])) /* - * TODO (whatever) question: why are we not using pthread_rwlock_rdlock() and - * pthread_rwlock_unlock()? + * rwlock wrappers. They are just a bunch of boilerplate, and removal of + * unrecoverable resulting error codes. */ -void read_lock(sem_t *, sem_t *, unsigned int *); -void read_unlock(sem_t *, sem_t *, unsigned int *); +int rwlock_read_lock(pthread_rwlock_t *); +void rwlock_write_lock(pthread_rwlock_t *); +void rwlock_unlock(pthread_rwlock_t *); #endif /* SRC_RTR_COMMON_H_ */ diff --git a/src/config.c b/src/config.c index f4c7aaa3..43fdded4 100644 --- a/src/config.c +++ b/src/config.c @@ -206,8 +206,10 @@ static const struct option_field options[] = { .doc = "Interval used to look for updates at VRPs location", /* * RFC 6810 and 8210: - * The cache MUST rate-limit Serial Notifies to no more frequently than - * one per minute. + * "The cache MUST rate-limit Serial Notifies to no more + * frequently than one per minute." + * We do this by not getting new information more than once per + * minute. */ .min = 60, .max = 7200, diff --git a/src/main.c b/src/main.c index 73cd3a5f..77ce3aed 100644 --- a/src/main.c +++ b/src/main.c @@ -14,13 +14,18 @@ start_rtr_server(void) { int error; - vrps_init(); - clients_db_init(); + error = vrps_init(); + if (error) + return error; + error = clients_db_init(); + if (error) + goto revert_vrps; error = rtr_listen(); rtr_cleanup(); /* TODO shouldn't this only happen on !error? */ clients_db_destroy(); +revert_vrps: vrps_destroy(); return error; } diff --git a/src/notify.c b/src/notify.c index 0d813e39..5db92afa 100644 --- a/src/notify.c +++ b/src/notify.c @@ -26,9 +26,15 @@ send_notify(struct client const *client, void *arg) return 0; /* Do not interrupt notify to other clients */ } -void +int notify_clients(void) { - uint32_t serial = get_last_serial_number(); - clients_foreach(send_notify, &serial); + uint32_t serial; + int error; + + error = get_last_serial_number(&serial); + if (error) + return error; + + return clients_foreach(send_notify, &serial); } diff --git a/src/notify.h b/src/notify.h index 3f1273e7..44710abf 100644 --- a/src/notify.h +++ b/src/notify.h @@ -1,6 +1,6 @@ #ifndef SRC_NOTIFY_H_ #define SRC_NOTIFY_H_ -void notify_clients(void); +int notify_clients(void); #endif /* SRC_NOTIFY_H_ */ diff --git a/src/rtr/db/vrps.c b/src/rtr/db/vrps.c index 97d7cbce..8dd21735 100644 --- a/src/rtr/db/vrps.c +++ b/src/rtr/db/vrps.c @@ -1,5 +1,6 @@ #include "vrps.h" +#include #include #include #include "common.h" @@ -29,11 +30,8 @@ struct state { time_t last_modified_date; } state; -/* Read and Write locks */ -static sem_t rlock, wlock; - -/* Readers counter */ -static unsigned int rcounter; +/** Read/write lock, which protects @state and its inhabitants. */ +static pthread_rwlock_t lock; static void delta_destroy(struct delta *delta) @@ -41,9 +39,11 @@ delta_destroy(struct delta *delta) deltas_destroy(delta->deltas); } -void +int vrps_init(void) { + int error; + state.base = NULL; deltas_db_init(&state.deltas); @@ -62,21 +62,21 @@ vrps_init(void) ? (state.v0_session_id - 1) : (0xFFFFu); - sem_init(&rlock, 0, 1); - sem_init(&wlock, 0, 1); - rcounter = 0; + error = pthread_rwlock_init(&lock, NULL); + if (error) { + deltas_db_cleanup(&state.deltas, delta_destroy); + return pr_errno(error, "pthread_rwlock_init() errored"); + } + + return 0; } void vrps_destroy(void) { - sem_wait(&wlock); roa_tree_put(state.base); deltas_db_cleanup(&state.deltas, delta_destroy); - sem_post(&wlock); - - sem_destroy(&wlock); - sem_destroy(&rlock); + pthread_rwlock_destroy(&lock); /* Nothing to do with error code */ } /* @@ -86,18 +86,16 @@ int vrps_update(struct roa_tree *new_tree, struct deltas *new_deltas) { struct delta new_delta; - int error; + int error = 0; - sem_wait(&wlock); + rwlock_write_lock(&lock); if (new_deltas != NULL) { new_delta.serial = state.current_serial + 1; new_delta.deltas = new_deltas; error = deltas_db_add(&state.deltas, &new_delta); - if (error) { - sem_post(&wlock); - return error; - } + if (error) + goto end; } if (state.base != NULL) @@ -106,8 +104,9 @@ vrps_update(struct roa_tree *new_tree, struct deltas *new_deltas) roa_tree_get(new_tree); state.current_serial++; - sem_post(&wlock); - return 0; +end: + rwlock_unlock(&lock); + return error; } /* @@ -116,50 +115,59 @@ vrps_update(struct roa_tree *new_tree, struct deltas *new_deltas) * * If SERIAL is received as NULL, and there's data at DB then the status will * be DIFF_AVAILABLE. + * + * This function can only fail due to critical r/w lock bugs. */ -enum delta_status -deltas_db_status(uint32_t *serial) +int +deltas_db_status(uint32_t *serial, enum delta_status *result) { struct delta *delta; - enum delta_status result; + int error; + + error = rwlock_read_lock(&lock); + if (error) + return error; - read_lock(&rlock, &wlock, &rcounter); if (state.base == NULL) { - result = DS_NO_DATA_AVAILABLE; - goto end; + *result = DS_NO_DATA_AVAILABLE; + goto rlock_succeed; } /* No serial to match, and there's data at DB */ if (serial == NULL) { - result = DS_DIFF_AVAILABLE; - goto end; + *result = DS_DIFF_AVAILABLE; + goto rlock_succeed; } /* Is the last version? */ if (*serial == state.current_serial) { - result = DS_NO_DIFF; - goto end; + *result = DS_NO_DIFF; + goto rlock_succeed; } /* Get the delta corresponding to the serial */ ARRAYLIST_FOREACH(&state.deltas, delta) if (delta->serial == *serial) { - result = DS_DIFF_AVAILABLE; - goto end; + *result = DS_DIFF_AVAILABLE; + goto rlock_succeed; } /* No match yet, release lock */ - read_unlock(&rlock, &wlock, &rcounter); + rwlock_unlock(&lock); /* The first serial isn't at deltas */ - if (*serial == START_SERIAL) - return DS_DIFF_AVAILABLE; + if (*serial == START_SERIAL) { + *result = DS_DIFF_AVAILABLE; + return 0; + } /* Reached end, diff can't be determined */ - return DS_DIFF_UNDETERMINED; -end: - read_unlock(&rlock, &wlock, &rcounter); - return result; + *result = DS_DIFF_UNDETERMINED; + return 0; + +rlock_succeed: + rwlock_unlock(&lock); + return 0; } int @@ -167,9 +175,13 @@ vrps_foreach_base_roa(vrp_foreach_cb cb, void *arg) { int error; - read_lock(&rlock, &wlock, &rcounter); + error = rwlock_read_lock(&lock); + if (error) + return error; + error = roa_tree_foreach_roa(state.base, cb, arg); - read_unlock(&rlock, &wlock, &rcounter); + + rwlock_unlock(&lock); return error; } @@ -182,9 +194,10 @@ vrps_foreach_delta_roa(uint32_t from, uint32_t to, vrp_foreach_cb cb, void *arg) int error; from_found = false; - error = 0; - read_lock(&rlock, &wlock, &rcounter); + error = rwlock_read_lock(&lock); + if (error) + return error; ARRAYLIST_FOREACH(&state.deltas, d) { if (!from_found) { @@ -199,21 +212,25 @@ vrps_foreach_delta_roa(uint32_t from, uint32_t to, vrp_foreach_cb cb, void *arg) } } - read_unlock(&rlock, &wlock, &rcounter); + rwlock_unlock(&lock); return error; } -uint32_t -get_last_serial_number(void) +int +get_last_serial_number(uint32_t *result) { - uint32_t serial; + int error; + + error = rwlock_read_lock(&lock); + if (error) + return error; - read_lock(&rlock, &wlock, &rcounter); - serial = state.current_serial - 1; - read_unlock(&rlock, &wlock, &rcounter); + *result = state.current_serial - 1; - return serial; + rwlock_unlock(&lock); + + return 0; } uint16_t diff --git a/src/rtr/db/vrps.h b/src/rtr/db/vrps.h index 6b7eed6f..7d3be17e 100644 --- a/src/rtr/db/vrps.h +++ b/src/rtr/db/vrps.h @@ -17,16 +17,16 @@ enum delta_status { DS_DIFF_AVAILABLE, }; -void vrps_init(void); +int vrps_init(void); void vrps_destroy(void); int vrps_update(struct roa_tree *, struct deltas *); -enum delta_status deltas_db_status(uint32_t *); +int deltas_db_status(uint32_t *, enum delta_status *); int vrps_foreach_base_roa(vrp_foreach_cb, void *); int vrps_foreach_delta_roa(uint32_t, uint32_t, vrp_foreach_cb, void *); -uint32_t get_last_serial_number(void); +int get_last_serial_number(uint32_t *); uint16_t get_current_session_id(uint8_t); #endif /* SRC_VRPS_H_ */ diff --git a/src/rtr/pdu_handler.c b/src/rtr/pdu_handler.c index 2b09b85e..675ca163 100644 --- a/src/rtr/pdu_handler.c +++ b/src/rtr/pdu_handler.c @@ -32,6 +32,11 @@ send_commmon_exchange(struct sender_common *common, { int error; + /* + * TODO (urgent) On certain errors, shouldn't we send error PDUs or + * something? + */ + /* Send Cache response PDU */ error = send_cache_response_pdu(common); if (error) @@ -76,12 +81,15 @@ handle_serial_query_pdu(int fd, void *pdu) if (received->header.m.session_id != session_id) return err_pdu_send(fd, version, ERR_PDU_CORRUPT_DATA, &received->header, NULL); + if (get_last_serial_number(¤t_serial) != 0) + goto critical; - current_serial = get_last_serial_number(); init_sender_common(&common, fd, version, &session_id, &received->serial_number, ¤t_serial); - updates = deltas_db_status(common.start_serial); + if (deltas_db_status(common.start_serial, &updates) != 0) + goto critical; + switch (updates) { case DS_NO_DATA_AVAILABLE: /* https://tools.ietf.org/html/rfc8210#section-8.4 */ @@ -103,6 +111,10 @@ handle_serial_query_pdu(int fd, void *pdu) warnx("Reached 'unreachable' code"); return -EINVAL; + +critical: + return err_pdu_send(fd, version, ERR_PDU_INTERNAL_ERROR, + &received->header, NULL); } int @@ -117,11 +129,14 @@ handle_reset_query_pdu(int fd, void *pdu) version = received->header.protocol_version; session_id = get_current_session_id(version); - current_serial = get_last_serial_number(); + if (get_last_serial_number(¤t_serial) != 0) + goto critical; + init_sender_common(&common, fd, version, &session_id, NULL, ¤t_serial); - updates = deltas_db_status(NULL); + if (deltas_db_status(NULL, &updates) != 0) + goto critical; switch (updates) { case DS_NO_DATA_AVAILABLE: /* https://tools.ietf.org/html/rfc8210#section-8.4 */ @@ -137,6 +152,10 @@ handle_reset_query_pdu(int fd, void *pdu) warnx("Reached 'unreachable' code"); return -EINVAL; + +critical: + return err_pdu_send(fd, version, ERR_PDU_INTERNAL_ERROR, + &received->header, NULL); } int diff --git a/src/rtr/rtr.c b/src/rtr/rtr.c index 97e616b0..2839f872 100644 --- a/src/rtr/rtr.c +++ b/src/rtr/rtr.c @@ -19,7 +19,7 @@ #include "rtr/err_pdu.h" #include "rtr/pdu.h" -/* TODO (next iteration) Support both RTR v0 an v1 */ +/* TODO (next iteration) Support both RTR v0 and v1 */ #define RTR_VERSION_SUPPORTED RTR_V0 volatile bool loop; @@ -182,6 +182,7 @@ client_thread_cb(void *param_void) uint8_t rtr_version; memcpy(¶m, param_void, sizeof(param)); + free(param_void); while (loop) { /* For each PDU... */ err = pdu_load(param.client_fd, &pdu, &meta, &rtr_version); @@ -225,7 +226,7 @@ static int handle_client_connections(int server_fd) { struct sockaddr_storage client_addr; - struct thread_param arg; + struct thread_param *arg; struct thread_node *new_thread; socklen_t sizeof_client_addr; pthread_attr_t attr; @@ -261,16 +262,29 @@ handle_client_connections(int server_fd) continue; } - arg.client_fd = client_fd; - arg.client_addr = client_addr; + arg = malloc(sizeof(struct thread_param)); + if (arg == NULL) { + warnx("Couldn't create thread_param struct"); + free(new_thread); + close(client_fd); + continue; + } + arg->client_fd = client_fd; + arg->client_addr = client_addr; pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); errno = pthread_create(&new_thread->tid, &attr, - client_thread_cb, &arg); + client_thread_cb, arg); pthread_attr_destroy(&attr); if (errno) { warn("Could not spawn the client's thread"); + /* + * It is not clear to me whether @arg should be freed + * here. We're supposed to have transferred its + * ownership to the thread. + * Maybe we should store it in @new_thread instead. + */ free(new_thread); close(client_fd); continue; diff --git a/src/updates_daemon.c b/src/updates_daemon.c index 621ce1fb..93d8869c 100644 --- a/src/updates_daemon.c +++ b/src/updates_daemon.c @@ -112,8 +112,12 @@ check_vrps_updates(void *param_void) } old_tree = validation_handler.arg; - notify_clients(); - pr_debug("Database updated successfully. Sleeping..."); + error = notify_clients(); + if (error) + pr_debug("Could not notify clients of the new VRPs. (Error code %d.) Sleeping...", + error); + else + pr_debug("Database updated successfully. Sleeping..."); sleep: sleep(config_get_validation_interval()); diff --git a/test/client_test.c b/test/client_test.c index fff3ff82..f565201e 100644 --- a/test/client_test.c +++ b/test/client_test.c @@ -43,7 +43,7 @@ START_TEST(basic_test) addr.sin_port = 1234; addr_ptr = (struct sockaddr_storage *) &addr; - clients_db_init(); + ck_assert_int_eq(0, clients_db_init()); /* * The address is actually supposed to be unique, but this is rather