]> git.ipfire.org Git - thirdparty/FORT-validator.git/commitdiff
Catch lock errors
authorAlberto Leiva Popper <ydahhrk@gmail.com>
Tue, 30 Apr 2019 15:07:49 +0000 (10:07 -0500)
committerAlberto Leiva Popper <ydahhrk@gmail.com>
Tue, 30 Apr 2019 15:26:34 +0000 (10:26 -0500)
Though sometimes we can't do anything meaningful with the errors,
but panicking is probably better than ignoring them and subtly
doing the wrong thing.

14 files changed:
src/clients.c
src/clients.h
src/common.c
src/common.h
src/config.c
src/main.c
src/notify.c
src/notify.h
src/rtr/db/vrps.c
src/rtr/db/vrps.h
src/rtr/pdu_handler.c
src/rtr/rtr.c
src/updates_daemon.c
test/client_test.c

index 8604ba15d8ff3567f815e87ad91c847e9cf4704d..5169f2d7d4867751c7d4b08989c60ce8bd572c8a 100644 (file)
@@ -1,6 +1,6 @@
 #include "clients.h"
 
-#include <errno.h>
+#include <pthread.h>
 #include "common.h"
 #include "log.h"
 #include "data_structure/uthash.h"
@@ -8,11 +8,10 @@
 /*
  * TODO uthash panics on memory allocations...
  * http://troydhanson.github.io/uthash/userguide.html#_out_of_memory
- * TODO sem_wait(), sem_post(), sem_init() and sem_destroy() return error.
  */
 
-#define SADDR_IN(addr) ((struct sockaddr_in *)addr)
-#define SADDR_IN6(addr) ((struct sockaddr_in6 *)addr)
+#define SADDR_IN(addr) ((struct sockaddr_in *) addr)
+#define SADDR_IN6(addr) ((struct sockaddr_in6 *) addr)
 
 struct hashable_client {
        struct client meat;
@@ -20,19 +19,21 @@ struct hashable_client {
 };
 
 /** Hash table of clients */
-struct hashable_client *table;
-/** Read and Write locks */
-static sem_t rlock, wlock;
-/** Readers counter */
-static unsigned int rcounter;
+static struct hashable_client *table;
+/** Read/write lock, which protects @table and its inhabitants. */
+static pthread_rwlock_t lock;
 
-void
+int
 clients_db_init(void)
 {
+       int error;
+
        table = NULL;
-       sem_init(&rlock, 0, 1);
-       sem_init(&wlock, 0, 1);
-       rcounter = 0;
+       error = pthread_rwlock_init(&lock, NULL);
+       if (error)
+               return pr_errno(error, "pthread_rwlock_init() errored");
+
+       return 0;
 }
 
 static int
@@ -83,7 +84,7 @@ clients_add(int fd, struct sockaddr_storage *addr, uint8_t rtr_version)
        if (error)
                return error;
 
-       sem_wait(&wlock);
+       rwlock_write_lock(&lock);
 
        HASH_FIND_INT(table, &fd, old_client);
        if (old_client == NULL) {
@@ -99,7 +100,7 @@ clients_add(int fd, struct sockaddr_storage *addr, uint8_t rtr_version)
                        error = -ERTR_VERSION_MISMATCH;
        }
 
-       sem_post(&wlock);
+       rwlock_unlock(&lock);
 
        if (new_client != NULL)
                free(new_client);
@@ -112,22 +113,24 @@ clients_forget(int fd)
 {
        struct hashable_client *client;
 
-       sem_wait(&wlock);
+       rwlock_write_lock(&lock);
 
        HASH_FIND_INT(table, &fd, client);
        if (client != NULL)
                HASH_DEL(table, client);
 
-       sem_post(&wlock);
+       rwlock_unlock(&lock);
 }
 
 int
 clients_foreach(clients_foreach_cb cb, void *arg)
 {
        struct hashable_client *client;
-       int error = 0;
+       int error;
 
-       read_lock(&rlock, &wlock, &rcounter);
+       error = rwlock_read_lock(&lock);
+       if (error)
+               return error;
 
        for (client = table; client != NULL; client = client->hh.next) {
                error = cb(&client->meat, arg);
@@ -135,7 +138,7 @@ clients_foreach(clients_foreach_cb cb, void *arg)
                        break;
        }
 
-       read_unlock(&rlock, &wlock, &rcounter);
+       rwlock_unlock(&lock);
 
        return error;
 }
@@ -145,15 +148,10 @@ clients_db_destroy(void)
 {
        struct hashable_client *node, *tmp;
 
-       sem_wait(&wlock);
-
        HASH_ITER(hh, table, node, tmp) {
                HASH_DEL(table, node);
                free(node);
        }
 
-       sem_post(&wlock);
-
-       sem_destroy(&wlock);
-       sem_destroy(&rlock);
+       pthread_rwlock_destroy(&lock); /* Nothing to do with error code */
 }
index d60f0bc70f1eeca41b0ebfbd1438e06e9e21fb96..9286a61ac5cb4d8b26a9de786d6da5526c5fb7a1 100644 (file)
@@ -16,7 +16,7 @@ struct client {
        uint8_t rtr_version;
 };
 
-void clients_db_init(void);
+int clients_db_init(void);
 
 int clients_add(int, struct sockaddr_storage *, uint8_t);
 void clients_forget(int);
index b41387a7147fe6e93d00bc06f7e471f0e2ae15b4..29a8913a782e792c1f56f2b849b655aa95235545 100644 (file)
@@ -1,26 +1,66 @@
 #include "common.h"
 
+#include <errno.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include "log.h"
+
+int
+rwlock_read_lock(pthread_rwlock_t *lock)
+{
+       int error;
+
+       error = pthread_rwlock_rdlock(lock);
+       switch (error) {
+       case 0:
+               return error;
+       case EAGAIN:
+               pr_err("There are too many threads; I can't modify the database.");
+               return error;
+       }
+
+       /*
+        * EINVAL, EDEADLK and unknown nonstandard error codes.
+        * EINVAL, EDEADLK indicate serious programming errors. And it's
+        * probably safest to handle the rest the same.
+        * pthread_rwlock_rdlock() failing like this is akin to `if` failing;
+        * we're screwed badly, so let's just pull the trigger.
+        */
+       pr_err("pthread_rwlock_rdlock() returned error code %d. This is too critical for a graceful recovery; I must die now.",
+           error);
+       exit(error);
+}
+
 void
-read_lock(sem_t *read, sem_t *write, unsigned int *reader_count)
+rwlock_write_lock(pthread_rwlock_t *lock)
 {
-       sem_wait(read);
-       (*reader_count)++;
-       if (*reader_count == 1)
-               sem_wait(write);
-       sem_post(read);
+       int error;
+
+       /*
+        * POSIX says that the only available errors are EINVAL and EDEADLK.
+        * Both of them indicate serious programming errors.
+        */
+       error = pthread_rwlock_wrlock(lock);
+       if (error) {
+               pr_err("pthread_rwlock_wrlock() returned error code %d. This is too critical for a graceful recovery; I must die now.",
+                   error);
+               exit(error);
+       }
 }
 
-/*
- * MUST NOT be called without previously called 'read_lock' or done the same
- * things that such function does.
- */
 void
-read_unlock(sem_t *read, sem_t *write, unsigned int *reader_count)
+rwlock_unlock(pthread_rwlock_t *lock)
 {
-       sem_wait(read);
-       (*reader_count)--;
-       if (*reader_count == 0) {
-               sem_post(write);
+       int error;
+
+       /*
+        * POSIX says that the only available errors are EINVAL and EPERM.
+        * Both of them indicate serious programming errors.
+        */
+       error = pthread_rwlock_unlock(lock);
+       if (error) {
+               pr_err("pthread_rwlock_unlock() returned error code %d. This is too critical for a graceful recovery; I must die now.",
+                   error);
+               exit(error);
        }
-       sem_post(read);
 }
index 0c5c367ee55d08609708de563af2fe903b484a0c..a5c5c59615f6ae01e9104a2f55884c3fc888c994 100644 (file)
 #define ARRAY_LEN(array) (sizeof(array) / sizeof((array)[0]))
 
 /*
- * TODO (whatever) question: why are we not using pthread_rwlock_rdlock() and
- * pthread_rwlock_unlock()?
+ * rwlock wrappers. They are just a bunch of boilerplate, and removal of
+ * unrecoverable resulting error codes.
  */
-void read_lock(sem_t *, sem_t *, unsigned int *);
-void read_unlock(sem_t *, sem_t *, unsigned int *);
+int rwlock_read_lock(pthread_rwlock_t *);
+void rwlock_write_lock(pthread_rwlock_t *);
+void rwlock_unlock(pthread_rwlock_t *);
 
 #endif /* SRC_RTR_COMMON_H_ */
index f4c7aaa3e3b06ca14f2924e204148b16ebe53457..43fdded47a69a414ab5e319c76ec43e871e8e7dc 100644 (file)
@@ -206,8 +206,10 @@ static const struct option_field options[] = {
                .doc = "Interval used to look for updates at VRPs location",
                /*
                 * RFC 6810 and 8210:
-                * The cache MUST rate-limit Serial Notifies to no more frequently than
-                * one per minute.
+                * "The cache MUST rate-limit Serial Notifies to no more
+                * frequently than one per minute."
+                * We do this by not getting new information more than once per
+                * minute.
                 */
                .min = 60,
                .max = 7200,
index 73cd3a5fd9b3377125a4242c1f2682d72b3b43a2..77ce3aedbdce95272cde746b2e63e29065422ef1 100644 (file)
@@ -14,13 +14,18 @@ start_rtr_server(void)
 {
        int error;
 
-       vrps_init();
-       clients_db_init();
+       error = vrps_init();
+       if (error)
+               return error;
+       error = clients_db_init();
+       if (error)
+               goto revert_vrps;
 
        error = rtr_listen();
        rtr_cleanup(); /* TODO shouldn't this only happen on !error? */
 
        clients_db_destroy();
+revert_vrps:
        vrps_destroy();
        return error;
 }
index 0d813e39c152cf6e23e14578faed26599b1d5fd9..5db92afadd025f1391bd2b012d361347f5cdce0c 100644 (file)
@@ -26,9 +26,15 @@ send_notify(struct client const *client, void *arg)
        return 0; /* Do not interrupt notify to other clients */
 }
 
-void
+int
 notify_clients(void)
 {
-       uint32_t serial = get_last_serial_number();
-       clients_foreach(send_notify, &serial);
+       uint32_t serial;
+       int error;
+
+       error = get_last_serial_number(&serial);
+       if (error)
+               return error;
+
+       return clients_foreach(send_notify, &serial);
 }
index 3f1273e745f76e52c9fd45cc83ca3966b9f5fed6..44710abfad38843a52ae933a2c542049e8ab6962 100644 (file)
@@ -1,6 +1,6 @@
 #ifndef SRC_NOTIFY_H_
 #define SRC_NOTIFY_H_
 
-void notify_clients(void);
+int notify_clients(void);
 
 #endif /* SRC_NOTIFY_H_ */
index 97d7cbcee9d81da34ce47ad699cf7081fe97a3de..8dd217358c6dbb525d7992c4326ac96fa9ab6d8f 100644 (file)
@@ -1,5 +1,6 @@
 #include "vrps.h"
 
+#include <pthread.h>
 #include <stdbool.h>
 #include <string.h>
 #include "common.h"
@@ -29,11 +30,8 @@ struct state {
        time_t last_modified_date;
 } state;
 
-/* Read and Write locks */
-static sem_t rlock, wlock;
-
-/* Readers counter */
-static unsigned int rcounter;
+/** Read/write lock, which protects @state and its inhabitants. */
+static pthread_rwlock_t lock;
 
 static void
 delta_destroy(struct delta *delta)
@@ -41,9 +39,11 @@ delta_destroy(struct delta *delta)
        deltas_destroy(delta->deltas);
 }
 
-void
+int
 vrps_init(void)
 {
+       int error;
+
        state.base = NULL;
 
        deltas_db_init(&state.deltas);
@@ -62,21 +62,21 @@ vrps_init(void)
            ? (state.v0_session_id - 1)
            : (0xFFFFu);
 
-       sem_init(&rlock, 0, 1);
-       sem_init(&wlock, 0, 1);
-       rcounter = 0;
+       error = pthread_rwlock_init(&lock, NULL);
+       if (error) {
+               deltas_db_cleanup(&state.deltas, delta_destroy);
+               return pr_errno(error, "pthread_rwlock_init() errored");
+       }
+
+       return 0;
 }
 
 void
 vrps_destroy(void)
 {
-       sem_wait(&wlock);
        roa_tree_put(state.base);
        deltas_db_cleanup(&state.deltas, delta_destroy);
-       sem_post(&wlock);
-
-       sem_destroy(&wlock);
-       sem_destroy(&rlock);
+       pthread_rwlock_destroy(&lock); /* Nothing to do with error code */
 }
 
 /*
@@ -86,18 +86,16 @@ int
 vrps_update(struct roa_tree *new_tree, struct deltas *new_deltas)
 {
        struct delta new_delta;
-       int error;
+       int error = 0;
 
-       sem_wait(&wlock);
+       rwlock_write_lock(&lock);
 
        if (new_deltas != NULL) {
                new_delta.serial = state.current_serial + 1;
                new_delta.deltas = new_deltas;
                error = deltas_db_add(&state.deltas, &new_delta);
-               if (error) {
-                       sem_post(&wlock);
-                       return error;
-               }
+               if (error)
+                       goto end;
        }
 
        if (state.base != NULL)
@@ -106,8 +104,9 @@ vrps_update(struct roa_tree *new_tree, struct deltas *new_deltas)
        roa_tree_get(new_tree);
        state.current_serial++;
 
-       sem_post(&wlock);
-       return 0;
+end:
+       rwlock_unlock(&lock);
+       return error;
 }
 
 /*
@@ -116,50 +115,59 @@ vrps_update(struct roa_tree *new_tree, struct deltas *new_deltas)
  *
  * If SERIAL is received as NULL, and there's data at DB then the status will
  * be DIFF_AVAILABLE.
+ *
+ * This function can only fail due to critical r/w lock bugs.
  */
-enum delta_status
-deltas_db_status(uint32_t *serial)
+int
+deltas_db_status(uint32_t *serial, enum delta_status *result)
 {
        struct delta *delta;
-       enum delta_status result;
+       int error;
+
+       error = rwlock_read_lock(&lock);
+       if (error)
+               return error;
 
-       read_lock(&rlock, &wlock, &rcounter);
        if (state.base == NULL) {
-               result = DS_NO_DATA_AVAILABLE;
-               goto end;
+               *result = DS_NO_DATA_AVAILABLE;
+               goto rlock_succeed;
        }
 
        /* No serial to match, and there's data at DB */
        if (serial == NULL) {
-               result = DS_DIFF_AVAILABLE;
-               goto end;
+               *result = DS_DIFF_AVAILABLE;
+               goto rlock_succeed;
        }
 
        /* Is the last version? */
        if (*serial == state.current_serial) {
-               result = DS_NO_DIFF;
-               goto end;
+               *result = DS_NO_DIFF;
+               goto rlock_succeed;
        }
 
        /* Get the delta corresponding to the serial */
        ARRAYLIST_FOREACH(&state.deltas, delta)
                if (delta->serial == *serial) {
-                       result = DS_DIFF_AVAILABLE;
-                       goto end;
+                       *result = DS_DIFF_AVAILABLE;
+                       goto rlock_succeed;
                }
 
        /* No match yet, release lock */
-       read_unlock(&rlock, &wlock, &rcounter);
+       rwlock_unlock(&lock);
 
        /* The first serial isn't at deltas */
-       if (*serial == START_SERIAL)
-               return DS_DIFF_AVAILABLE;
+       if (*serial == START_SERIAL) {
+               *result = DS_DIFF_AVAILABLE;
+               return 0;
+       }
 
        /* Reached end, diff can't be determined */
-       return DS_DIFF_UNDETERMINED;
-end:
-       read_unlock(&rlock, &wlock, &rcounter);
-       return result;
+       *result = DS_DIFF_UNDETERMINED;
+       return 0;
+
+rlock_succeed:
+       rwlock_unlock(&lock);
+       return 0;
 }
 
 int
@@ -167,9 +175,13 @@ vrps_foreach_base_roa(vrp_foreach_cb cb, void *arg)
 {
        int error;
 
-       read_lock(&rlock, &wlock, &rcounter);
+       error = rwlock_read_lock(&lock);
+       if (error)
+               return error;
+
        error = roa_tree_foreach_roa(state.base, cb, arg);
-       read_unlock(&rlock, &wlock, &rcounter);
+
+       rwlock_unlock(&lock);
 
        return error;
 }
@@ -182,9 +194,10 @@ vrps_foreach_delta_roa(uint32_t from, uint32_t to, vrp_foreach_cb cb, void *arg)
        int error;
 
        from_found = false;
-       error = 0;
 
-       read_lock(&rlock, &wlock, &rcounter);
+       error = rwlock_read_lock(&lock);
+       if (error)
+               return error;
 
        ARRAYLIST_FOREACH(&state.deltas, d) {
                if (!from_found) {
@@ -199,21 +212,25 @@ vrps_foreach_delta_roa(uint32_t from, uint32_t to, vrp_foreach_cb cb, void *arg)
                }
        }
 
-       read_unlock(&rlock, &wlock, &rcounter);
+       rwlock_unlock(&lock);
 
        return error;
 }
 
-uint32_t
-get_last_serial_number(void)
+int
+get_last_serial_number(uint32_t *result)
 {
-       uint32_t serial;
+       int error;
+
+       error = rwlock_read_lock(&lock);
+       if (error)
+               return error;
 
-       read_lock(&rlock, &wlock, &rcounter);
-       serial = state.current_serial - 1;
-       read_unlock(&rlock, &wlock, &rcounter);
+       *result = state.current_serial - 1;
 
-       return serial;
+       rwlock_unlock(&lock);
+
+       return 0;
 }
 
 uint16_t
index 6b7eed6fbe8ec23474621a6d9b1866f8b524f7f6..7d3be17e2c4619e7047e83d77649f6b59c42a589 100644 (file)
@@ -17,16 +17,16 @@ enum delta_status {
        DS_DIFF_AVAILABLE,
 };
 
-void vrps_init(void);
+int vrps_init(void);
 void vrps_destroy(void);
 
 int vrps_update(struct roa_tree *, struct deltas *);
-enum delta_status deltas_db_status(uint32_t *);
+int deltas_db_status(uint32_t *, enum delta_status *);
 
 int vrps_foreach_base_roa(vrp_foreach_cb, void *);
 int vrps_foreach_delta_roa(uint32_t, uint32_t, vrp_foreach_cb, void *);
 
-uint32_t get_last_serial_number(void);
+int get_last_serial_number(uint32_t *);
 uint16_t get_current_session_id(uint8_t);
 
 #endif /* SRC_VRPS_H_ */
index 2b09b85e330ce383b6b5fa8702f78d62eee1e925..675ca163422bf31109c0f7e0dff2a1c9f98b9bda 100644 (file)
@@ -32,6 +32,11 @@ send_commmon_exchange(struct sender_common *common,
 {
        int error;
 
+       /*
+        * TODO (urgent) On certain errors, shouldn't we send error PDUs or
+        * something?
+        */
+
        /* Send Cache response PDU */
        error = send_cache_response_pdu(common);
        if (error)
@@ -76,12 +81,15 @@ handle_serial_query_pdu(int fd, void *pdu)
        if (received->header.m.session_id != session_id)
                return err_pdu_send(fd, version, ERR_PDU_CORRUPT_DATA,
                    &received->header, NULL);
+       if (get_last_serial_number(&current_serial) != 0)
+               goto critical;
 
-       current_serial = get_last_serial_number();
        init_sender_common(&common, fd, version, &session_id,
            &received->serial_number, &current_serial);
 
-       updates = deltas_db_status(common.start_serial);
+       if (deltas_db_status(common.start_serial, &updates) != 0)
+               goto critical;
+
        switch (updates) {
        case DS_NO_DATA_AVAILABLE:
                /* https://tools.ietf.org/html/rfc8210#section-8.4 */
@@ -103,6 +111,10 @@ handle_serial_query_pdu(int fd, void *pdu)
 
        warnx("Reached 'unreachable' code");
        return -EINVAL;
+
+critical:
+       return err_pdu_send(fd, version, ERR_PDU_INTERNAL_ERROR,
+           &received->header, NULL);
 }
 
 int
@@ -117,11 +129,14 @@ handle_reset_query_pdu(int fd, void *pdu)
 
        version = received->header.protocol_version;
        session_id = get_current_session_id(version);
-       current_serial = get_last_serial_number();
+       if (get_last_serial_number(&current_serial) != 0)
+               goto critical;
+
        init_sender_common(&common, fd, version, &session_id, NULL,
            &current_serial);
 
-       updates = deltas_db_status(NULL);
+       if (deltas_db_status(NULL, &updates) != 0)
+               goto critical;
        switch (updates) {
        case DS_NO_DATA_AVAILABLE:
                /* https://tools.ietf.org/html/rfc8210#section-8.4 */
@@ -137,6 +152,10 @@ handle_reset_query_pdu(int fd, void *pdu)
 
        warnx("Reached 'unreachable' code");
        return -EINVAL;
+
+critical:
+       return err_pdu_send(fd, version, ERR_PDU_INTERNAL_ERROR,
+           &received->header, NULL);
 }
 
 int
index 97e616b0551d9df997533dd0e90690e5a2f57c04..2839f872cb65a816bcb2c25c0035b5ea9a3a78a9 100644 (file)
@@ -19,7 +19,7 @@
 #include "rtr/err_pdu.h"
 #include "rtr/pdu.h"
 
-/* TODO (next iteration) Support both RTR v0 an v1 */
+/* TODO (next iteration) Support both RTR v0 and v1 */
 #define RTR_VERSION_SUPPORTED  RTR_V0
 
 volatile bool loop;
@@ -182,6 +182,7 @@ client_thread_cb(void *param_void)
        uint8_t rtr_version;
 
        memcpy(&param, param_void, sizeof(param));
+       free(param_void);
 
        while (loop) { /* For each PDU... */
                err = pdu_load(param.client_fd, &pdu, &meta, &rtr_version);
@@ -225,7 +226,7 @@ static int
 handle_client_connections(int server_fd)
 {
        struct sockaddr_storage client_addr;
-       struct thread_param arg;
+       struct thread_param *arg;
        struct thread_node *new_thread;
        socklen_t sizeof_client_addr;
        pthread_attr_t attr;
@@ -261,16 +262,29 @@ handle_client_connections(int server_fd)
                        continue;
                }
 
-               arg.client_fd = client_fd;
-               arg.client_addr = client_addr;
+               arg = malloc(sizeof(struct thread_param));
+               if (arg == NULL) {
+                       warnx("Couldn't create thread_param struct");
+                       free(new_thread);
+                       close(client_fd);
+                       continue;
+               }
+               arg->client_fd = client_fd;
+               arg->client_addr = client_addr;
 
                pthread_attr_init(&attr);
                pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
                errno = pthread_create(&new_thread->tid, &attr,
-                   client_thread_cb, &arg);
+                   client_thread_cb, arg);
                pthread_attr_destroy(&attr);
                if (errno) {
                        warn("Could not spawn the client's thread");
+                       /*
+                        * It is not clear to me whether @arg should be freed
+                        * here. We're supposed to have transferred its
+                        * ownership to the thread.
+                        * Maybe we should store it in @new_thread instead.
+                        */
                        free(new_thread);
                        close(client_fd);
                        continue;
index 621ce1fbf3ff796440e7192fce74f29c623a6a52..93d8869c92cc1374e86d17146c8d593b173e8eba 100644 (file)
@@ -112,8 +112,12 @@ check_vrps_updates(void *param_void)
                }
 
                old_tree = validation_handler.arg;
-               notify_clients();
-               pr_debug("Database updated successfully. Sleeping...");
+               error = notify_clients();
+               if (error)
+                       pr_debug("Could not notify clients of the new VRPs. (Error code %d.) Sleeping...",
+                           error);
+               else
+                       pr_debug("Database updated successfully. Sleeping...");
 
 sleep:
                sleep(config_get_validation_interval());
index fff3ff82a05f0cb2a844c11a22b3db46c683ecba..f565201e5064398147fdd11c03748a4b753fff04 100644 (file)
@@ -43,7 +43,7 @@ START_TEST(basic_test)
        addr.sin_port = 1234;
        addr_ptr = (struct sockaddr_storage *) &addr;
 
-       clients_db_init();
+       ck_assert_int_eq(0, clients_db_init());
 
        /*
         * The address is actually supposed to be unique, but this is rather