From: Alberto Leiva Popper Date: Mon, 17 Jul 2023 21:01:17 +0000 (-0600) Subject: Review of RTR half of #98 X-Git-Tag: 1.6.0~3^2~2 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=d195700bbac43128e1f74fa0bef4c406450a374d;p=thirdparty%2FFORT-validator.git Review of RTR half of #98 On closer inspection, none of the error messages logged in #98 imply a problem, so I have reduced their severities, removed the stack traces and improved the error messages. Fixes the error messages half of #98. I still need to look into the alleged discrepancies with Routinator and Cloudflare. --- diff --git a/src/rtr/db/vrps.c b/src/rtr/db/vrps.c index dd1c9091..ab19c011 100644 --- a/src/rtr/db/vrps.c +++ b/src/rtr/db/vrps.c @@ -415,14 +415,13 @@ vrps_foreach_base(vrp_foreach_cb cb_roa, router_key_foreach_cb cb_rk, void *arg) if (state.base != NULL) { error = db_table_foreach_roa(state.base, cb_roa, arg); if (error) - goto unlock; + goto end; error = db_table_foreach_router_key(state.base, cb_rk, arg); } else error = -EAGAIN; -unlock: +end: rwlock_unlock(&state_lock); - return error; } diff --git a/src/rtr/pdu.c b/src/rtr/pdu.c index 7524e478..cd96237f 100644 --- a/src/rtr/pdu.c +++ b/src/rtr/pdu.c @@ -310,7 +310,7 @@ error_report_destroy(void *pdu_void) DEFINE_METADATA(serial_notify, free); DEFINE_METADATA(serial_query, free); /* handle_serial_query_pdu */ -DEFINE_METADATA(reset_query, free); +DEFINE_METADATA(reset_query, free); /* handle_reset_query_pdu */ DEFINE_METADATA(cache_response, free); DEFINE_METADATA(ipv4_prefix, free); DEFINE_METADATA(ipv6_prefix, free); diff --git a/src/rtr/pdu_handler.c b/src/rtr/pdu_handler.c index f822b557..be72736a 100644 --- a/src/rtr/pdu_handler.c +++ b/src/rtr/pdu_handler.c @@ -79,6 +79,11 @@ handle_serial_query_pdu(int fd, struct rtr_request const *request) serial_t final_serial; int error; + pr_op_debug("Serial Query. Request version/session/serial: %u/%u/%u", + query->header.protocol_version, + query->header.m.session_id, + query->serial_number); + /* * RFC 6810 and 8210: * "If [...] either the router or the cache finds that the value of the diff --git a/src/rtr/pdu_sender.c b/src/rtr/pdu_sender.c index ed0c6215..7ce70b26 100644 --- a/src/rtr/pdu_sender.c +++ b/src/rtr/pdu_sender.c @@ -28,17 +28,47 @@ set_header_values(struct pdu_header *header, uint8_t version, uint8_t type, header->m.reserved = reserved; } +static int +print_poll_failure(struct pollfd *pfd) +{ + /* + * The main polling thread already logs relevant revents in sensible + * levels (see apply_pollfds()), so we'll just whine on debug. + */ + + pr_op_debug("poll() returned revents '0x%02x'. This means", pfd->revents); + if (pfd->revents & POLLHUP) { + pr_op_debug("- 0x%02x: Peer hung up.", POLLHUP); + } + if (pfd->revents & POLLERR) { + pr_op_debug("- 0x%02x: Read end was closed, or generic error.", + POLLERR); + } + if (pfd->revents & POLLNVAL) { + /* + * In our case, this is perfectly normal. The main polling + * thread closed it while we were trying to write. Whatever. + */ + pr_op_debug("- 0x%02x: File Descriptor not open.", POLLNVAL); + } + + /* Interrupt handler thread, but no need to raise alarms. */ + return -EINVAL; +} + static int send_response(int fd, uint8_t pdu_type, unsigned char *data, size_t data_len) { struct pollfd pfd; int error; - pr_op_debug("Sending %s to client.", pdutype2str(pdu_type)); - pfd.fd = fd; pfd.events = POLLOUT; + /* + * We need to poll before writing because the socket has O_NONBLOCK set. + * (And it needs O_NONBLOCK because of the main thread's read poll.) + */ do { pfd.revents = 0; error = poll(&pfd, 1, -1); @@ -47,12 +77,12 @@ send_response(int fd, uint8_t pdu_type, unsigned char *data, size_t data_len) if (error == 0) return pr_op_err_st("poll() returned 0, even though there's no timeout."); if (pfd.revents & (POLLHUP | POLLERR | POLLNVAL)) - return pr_op_err_st("poll() returned revents %u.", pfd.revents); + return print_poll_failure(&pfd); } while (!(pfd.revents & POLLOUT)); if (write(fd, data, data_len) < 0) { error = errno; - pr_op_err_st("Error sending %s to client: %s", + pr_op_debug("Error sending %s to client: %s", pdutype2str(pdu_type), strerror(error)); return error; } @@ -117,15 +147,6 @@ send_cache_response_pdu(int fd, uint8_t version) return send_response(fd, pdu.header.pdu_type, data, len); } -static void -pr_debug_prefix4(struct ipv4_prefix_pdu *pdu) -{ - char buffer[INET_ADDRSTRLEN]; - - pr_op_debug("Encoded prefix %s/%u into a PDU.", - addr2str4(&pdu->ipv4_prefix, buffer), pdu->prefix_length); -} - static int send_ipv4_prefix_pdu(int fd, uint8_t version, struct vrp const *vrp, uint8_t flags) @@ -147,21 +168,10 @@ send_ipv4_prefix_pdu(int fd, uint8_t version, struct vrp const *vrp, len = serialize_ipv4_prefix_pdu(&pdu, data); if (len != RTRPDU_IPV4_PREFIX_LEN) pr_crit("Serialized IPv4 Prefix is %zu bytes.", len); - if (log_op_enabled(LOG_DEBUG)) - pr_debug_prefix4(&pdu); return send_response(fd, pdu.header.pdu_type, data, len); } -static void -pr_debug_prefix6(struct ipv6_prefix_pdu *pdu) -{ - char buffer[INET6_ADDRSTRLEN]; - - pr_op_debug("Encoded prefix %s/%u into a PDU.", - addr2str6(&pdu->ipv6_prefix, buffer), pdu->prefix_length); -} - static int send_ipv6_prefix_pdu(int fd, uint8_t version, struct vrp const *vrp, uint8_t flags) @@ -183,8 +193,6 @@ send_ipv6_prefix_pdu(int fd, uint8_t version, struct vrp const *vrp, len = serialize_ipv6_prefix_pdu(&pdu, data); if (len != RTRPDU_IPV6_PREFIX_LEN) pr_crit("Serialized IPv6 Prefix is %zu bytes.", len); - if (log_op_enabled(LOG_DEBUG)) - pr_debug_prefix6(&pdu); return send_response(fd, pdu.header.pdu_type, data, len); } diff --git a/src/rtr/rtr.c b/src/rtr/rtr.c index d69c7f70..40d55c36 100644 --- a/src/rtr/rtr.c +++ b/src/rtr/rtr.c @@ -454,7 +454,7 @@ accept_new_client(struct pollfd const *server_fd) * false: oh noes; close socket. */ static bool -read_until_block(int fd, struct client_request *request) +read_until_block(struct rtr_client *client, struct client_request *request) { ssize_t read_result; size_t offset; @@ -463,7 +463,7 @@ read_until_block(int fd, struct client_request *request) request->nread = 0; for (offset = 0; offset < REQUEST_BUFFER_LEN; offset += read_result) { - read_result = read(fd, &request->buffer[offset], + read_result = read(client->fd, &request->buffer[offset], REQUEST_BUFFER_LEN - offset); if (read_result == -1) { error = errno; @@ -477,7 +477,8 @@ read_until_block(int fd, struct client_request *request) if (read_result == 0) { if (offset == 0) { - pr_op_debug("Client closed the socket."); + pr_op_info("Client '%s' closed the socket.", + client->addr); return false; } @@ -500,7 +501,7 @@ __handle_client_request(struct rtr_client *client) request = pmalloc(sizeof(struct client_request)); request->client = client; - if (!read_until_block(client->fd, request)) { + if (!read_until_block(client, request)) { free(request); return false; } @@ -514,12 +515,42 @@ __handle_client_request(struct rtr_client *client) static void print_poll_failure(struct pollfd *pfd, char const *what, char const *addr) { - if (pfd->revents & POLLHUP) - pr_op_err("%s '%s' down: POLLHUP (Peer hung up)", what, addr); - if (pfd->revents & POLLERR) - pr_op_err("%s '%s' down: POLLERR (Generic error)", what, addr); - if (pfd->revents & POLLNVAL) - pr_op_err("%s '%s' down: POLLNVAL (fd not open)", what, addr); + /* + * Note, POLLHUP and POLLER are implemented somewhat differently across + * the board: http://www.greenend.org.uk/rjk/tech/poll.html + */ + + if (pfd->revents & POLLHUP) { + /* Normal; we don't have control over the client. */ + pr_op_info("%s '%s' down: Peer hung up. (Revents 0x%02x)", + what, addr, pfd->revents); + } + if (pfd->revents & POLLERR) { + /* + * The documentation of this one stinks. The UNIX spec and + * OpenBSD mostly unhelpfully define it as "An error has + * occurred," and Linux appends "read end has been closed" + * (which doesn't seem standard). + * + * I often get it when the client closes the socket while the + * handler thread is sending it data (Making it a synonym to + * POLLHUP in this case), so we can't make too much of a fuss + * when it shows up. + * + * Warning it is. + */ + pr_op_warn("%s '%s' down: Generic error. (Revents 0x%02x)", + what, addr, pfd->revents); + } + if (pfd->revents & POLLNVAL) { + /* + * Definitely suggests a programming error. + * We're the main polling thread, so nobody else should be + * closing sockets on us. + */ + pr_op_err("%s '%s' down: File Descriptor closed. (Revents 0x%02x)", + what, addr, pfd->revents); + } } static void @@ -610,8 +641,7 @@ fddb_poll(void) pr_op_info("poll() was interrupted by some signal."); goto stop; case ENOMEM: - enomem_panic(); - /* Fall through */ + enomem_panic(); /* Does not return */ case EAGAIN: goto retry; case EFAULT: