From 62eb0242dc21eb68661af4889f91a06846543d05 Mon Sep 17 00:00:00 2001 From: Alberto Leiva Popper Date: Wed, 14 Jul 2021 10:25:43 -0500 Subject: [PATCH] RTR Server: poll before writing Problem: write() was sometimes failing with EAGAIN when Fort tried to send PDUs to clients. Diagnosis: Obviously, it's because RTR client sockets now ship with O_NONBLOCK enabled. Fort wants O_NONBLOCK for reading, but not for writing. This bug was introduced in the previous commit. Solution: Make sure the socket is writable (via poll()) before calling write(). --- src/rtr/pdu_sender.c | 16 ++++++++++++++++ src/rtr/rtr.c | 8 ++++++++ 2 files changed, 24 insertions(+) diff --git a/src/rtr/pdu_sender.c b/src/rtr/pdu_sender.c index bee37a79..fc4857f2 100644 --- a/src/rtr/pdu_sender.c +++ b/src/rtr/pdu_sender.c @@ -1,6 +1,7 @@ #include "pdu_sender.h" #include +#include #include #include #include @@ -29,10 +30,25 @@ set_header_values(struct pdu_header *header, uint8_t version, uint8_t type, static int send_response(int fd, uint8_t pdu_type, unsigned char *data, size_t data_len) { + struct pollfd pfd; int error; pr_op_debug("Sending %s to client.", pdutype2str(pdu_type)); + pfd.fd = fd; + pfd.events = POLLOUT; + + do { + pfd.revents = 0; + error = poll(&pfd, 1, -1); + if (error < 0) + return pr_op_err("poll() error: %d", error); + if (error == 0) + return pr_op_err("poll() returned 0, even though there's no timeout."); + if (pfd.revents & (POLLHUP | POLLERR | POLLNVAL)) + return pr_op_err("poll() returned revents %u.", pfd.revents); + } while (!(pfd.revents & POLLOUT)); + error = write(fd, data, data_len); if (error < 0) return pr_op_errno(errno, "Error sending %s to client.", diff --git a/src/rtr/rtr.c b/src/rtr/rtr.c index c5ff459b..61560567 100644 --- a/src/rtr/rtr.c +++ b/src/rtr/rtr.c @@ -199,6 +199,14 @@ init_addrinfo(char const *hostname, char const *service, return 0; } +/* + * By the way: man 2 poll says + * + * > The operation of poll() and ppoll() is not affected by the O_NONBLOCK flag. + * + * Which appears to be untrue. If I remove this function, both client and server + * hang forever, apparently after the TCP handshake. + */ static int set_nonblock(int fd) { -- 2.47.2