From: Willy Tarreau Date: Sun, 10 Mar 2013 22:51:38 +0000 (+0100) Subject: MAJOR: listener: support inheriting a listening fd from the parent X-Git-Tag: v1.5-dev18~58 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=40aa070c51648606b89fe65408de797827e3d092;p=thirdparty%2Fhaproxy.git MAJOR: listener: support inheriting a listening fd from the parent Using the address syntax "fd@", a listener may inherit a file descriptor that the caller process has already bound and passed as this number. The fd's socket family is detected using getsockname(), and the usual initialization is performed through the existing code for that family, but the socket creation is skipped. Whether the parent has performed the listen() call or not is not important as this is detected. For UNIX sockets, we immediately clear the path after preparing a socket so that we never remove it in case an abort would happen due to a late error during startup. --- diff --git a/doc/configuration.txt b/doc/configuration.txt index 4d575856e0..86d88a9b0c 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -1616,6 +1616,9 @@ bind / [, ...] [param*] - 'ipv4@' -> address is always IPv4 - 'ipv6@' -> address is always IPv6 - 'unix@' -> address is a path to a local unix socket + - 'fd@' -> use file descriptor inherited from the + parent. The fd must be bound and may or may not already + be listening. is either a unique TCP port, or a port range for which the proxy will accept connections for the IP address specified diff --git a/src/cfgparse.c b/src/cfgparse.c index 3821edb77d..2c2faf5586 100644 --- a/src/cfgparse.c +++ b/src/cfgparse.c @@ -207,6 +207,7 @@ int str2listener(char *str, struct proxy *curproxy, struct bind_conf *bind_conf, while (next && *next) { struct sockaddr_storage ss, *ss2; + int fd = -1; str = next; /* 1) look for the end of the first address */ @@ -240,6 +241,24 @@ int str2listener(char *str, struct proxy *curproxy, struct bind_conf *bind_conf, goto fail; } } + else if (ss2->ss_family == AF_UNSPEC) { + socklen_t addr_len; + + /* We want to attach to an already bound fd whose number + * is in the addr part of ss2 when cast to sockaddr_in. + * Note that by definition there is a single listener. + * We still have to determine the address family to + * register the correct protocol. + */ + fd = ((struct sockaddr_in *)ss2)->sin_addr.s_addr; + addr_len = sizeof(*ss2); + if (getsockname(fd, (struct sockaddr *)ss2, &addr_len) == -1) { + memprintf(err, "cannot use file descriptor '%d' : %s.\n", fd, strerror(errno)); + goto fail; + } + + port = end = get_host_port(ss2); + } /* OK the address looks correct */ ss = *ss2; @@ -252,7 +271,7 @@ int str2listener(char *str, struct proxy *curproxy, struct bind_conf *bind_conf, l->frontend = curproxy; l->bind_conf = bind_conf; - l->fd = -1; + l->fd = fd; l->addr = ss; l->xprt = &raw_sock; l->state = LI_INIT; diff --git a/src/proto_tcp.c b/src/proto_tcp.c index 6bd5761795..38b9bdc2f2 100644 --- a/src/proto_tcp.c +++ b/src/proto_tcp.c @@ -566,6 +566,8 @@ int tcp_bind_listener(struct listener *listener, char *errmsg, int errlen) { __label__ tcp_return, tcp_close_return; int fd, err; + int ext, ready; + socklen_t ready_len; const char *msg = NULL; /* ensure we never return garbage */ @@ -577,7 +579,15 @@ int tcp_bind_listener(struct listener *listener, char *errmsg, int errlen) err = ERR_NONE; - if ((fd = socket(listener->addr.ss_family, SOCK_STREAM, IPPROTO_TCP)) == -1) { + /* if the listener already has an fd assigned, then we were offered the + * fd by an external process (most likely the parent), and we don't want + * to create a new socket. However we still want to set a few flags on + * the socket. + */ + fd = listener->fd; + ext = (fd >= 0); + + if (!ext && (fd = socket(listener->addr.ss_family, SOCK_STREAM, IPPROTO_TCP)) == -1) { err |= ERR_RETRYABLE | ERR_ALERT; msg = "cannot create listening socket"; goto tcp_return; @@ -595,7 +605,7 @@ int tcp_bind_listener(struct listener *listener, char *errmsg, int errlen) goto tcp_close_return; } - if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) == -1) { + if (!ext && setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) == -1) { /* not fatal but should be reported */ msg = "cannot do so_reuseaddr"; err |= ERR_ALERT; @@ -608,10 +618,11 @@ int tcp_bind_listener(struct listener *listener, char *errmsg, int errlen) /* OpenBSD supports this. As it's present in old libc versions of Linux, * it might return an error that we will silently ignore. */ - setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); + if (!ext) + setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); #endif #ifdef CONFIG_HAP_LINUX_TPROXY - if (listener->options & LI_O_FOREIGN) { + if (!ext && (listener->options & LI_O_FOREIGN)) { switch (listener->addr.ss_family) { case AF_INET: if ((setsockopt(fd, SOL_IP, IP_TRANSPARENT, &one, sizeof(one)) == -1) @@ -631,7 +642,7 @@ int tcp_bind_listener(struct listener *listener, char *errmsg, int errlen) #endif #ifdef SO_BINDTODEVICE /* Note: this might fail if not CAP_NET_RAW */ - if (listener->interface) { + if (!ext && listener->interface) { if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, listener->interface, strlen(listener->interface) + 1) == -1) { msg = "cannot bind listener to device"; @@ -675,13 +686,19 @@ int tcp_bind_listener(struct listener *listener, char *errmsg, int errlen) setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &zero, sizeof(zero)); #endif - if (bind(fd, (struct sockaddr *)&listener->addr, listener->proto->sock_addrlen) == -1) { + if (!ext && bind(fd, (struct sockaddr *)&listener->addr, listener->proto->sock_addrlen) == -1) { err |= ERR_RETRYABLE | ERR_ALERT; msg = "cannot bind socket"; goto tcp_close_return; } - if (listen(fd, listener->backlog ? listener->backlog : listener->maxconn) == -1) { + ready = 0; + ready_len = sizeof(ready); + if (getsockopt(fd, SOL_SOCKET, SO_ACCEPTCONN, &ready, &ready_len) == -1) + ready = 0; + + if (!(ext && ready) && /* only listen if not already done by external process */ + listen(fd, listener->backlog ? listener->backlog : listener->maxconn) == -1) { err |= ERR_RETRYABLE | ERR_ALERT; msg = "cannot listen to socket"; goto tcp_close_return; diff --git a/src/proto_uxst.c b/src/proto_uxst.c index 3263ea4ab1..85eb3ee861 100644 --- a/src/proto_uxst.c +++ b/src/proto_uxst.c @@ -111,6 +111,10 @@ static void destroy_uxst_socket(const char *path) struct sockaddr_un addr; int sock, ret; + /* if the path was cleared, we do nothing */ + if (!*path) + return; + /* We might have been chrooted, so we may not be able to access the * socket. In order to avoid bothering the other end, we connect with a * wrong protocol, namely SOCK_DGRAM. The return code from connect() @@ -157,6 +161,8 @@ static int uxst_bind_listener(struct listener *listener, char *errmsg, int errle struct sockaddr_un addr; const char *msg = NULL; const char *path; + int ext, ready; + socklen_t ready_len; int ret; @@ -169,6 +175,16 @@ static int uxst_bind_listener(struct listener *listener, char *errmsg, int errle path = ((struct sockaddr_un *)&listener->addr)->sun_path; + /* if the listener already has an fd assigned, then we were offered the + * fd by an external process (most likely the parent), and we don't want + * to create a new socket. However we still want to set a few flags on + * the socket. + */ + fd = listener->fd; + ext = (fd >= 0); + if (ext) + goto fd_ready; + /* 1. create socket names */ if (!path[0]) { msg = "Invalid empty name for a UNIX socket"; @@ -215,6 +231,7 @@ static int uxst_bind_listener(struct listener *listener, char *errmsg, int errle goto err_unlink_back; } + fd_ready: if (fd >= global.maxsock) { msg = "socket(): not enough free sockets, raise -n argument"; goto err_unlink_temp; @@ -225,7 +242,7 @@ static int uxst_bind_listener(struct listener *listener, char *errmsg, int errle goto err_unlink_temp; } - if (bind(fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) { + if (!ext && bind(fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) { /* note that bind() creates the socket on the file system */ msg = "cannot bind UNIX socket"; goto err_unlink_temp; @@ -236,14 +253,21 @@ static int uxst_bind_listener(struct listener *listener, char *errmsg, int errle * While it is known not to be portable on every OS, it's still useful * where it works. */ - if (((listener->bind_conf->ux.uid != -1 || listener->bind_conf->ux.gid != -1) && - (chown(tempname, listener->bind_conf->ux.uid, listener->bind_conf->ux.gid) == -1)) || - (listener->bind_conf->ux.mode != 0 && chmod(tempname, listener->bind_conf->ux.mode) == -1)) { + if (!ext && + (((listener->bind_conf->ux.uid != -1 || listener->bind_conf->ux.gid != -1) && + (chown(tempname, listener->bind_conf->ux.uid, listener->bind_conf->ux.gid) == -1)) || + (listener->bind_conf->ux.mode != 0 && chmod(tempname, listener->bind_conf->ux.mode) == -1))) { msg = "cannot change UNIX socket ownership"; goto err_unlink_temp; } - if (listen(fd, listener->backlog ? listener->backlog : listener->maxconn) < 0) { + ready = 0; + ready_len = sizeof(ready); + if (getsockopt(fd, SOL_SOCKET, SO_ACCEPTCONN, &ready, &ready_len) == -1) + ready = 0; + + if (!(ext && ready) && /* only listen if not already done by external process */ + listen(fd, listener->backlog ? listener->backlog : listener->maxconn) < 0) { msg = "cannot listen to UNIX socket"; goto err_unlink_temp; } @@ -253,13 +277,19 @@ static int uxst_bind_listener(struct listener *listener, char *errmsg, int errle * fear loosing the socket because we have a copy of it in * backname. */ - if (rename(tempname, path) < 0) { + if (!ext && rename(tempname, path) < 0) { msg = "cannot switch final and temporary UNIX sockets"; goto err_rename; } - /* 6. cleanup */ - unlink(backname); /* no need to keep this one either */ + /* 6. cleanup. If we're bound to an fd inherited from the parent, we + * want to ensure that destroy_uxst_socket() will never remove the + * path, and for this we simply clear the path to the socket. + */ + if (!ext) + unlink(backname); + else + ((struct sockaddr_un *)&listener->addr)->sun_path[0] = 0; /* the socket is now listening */ listener->fd = fd; @@ -275,13 +305,19 @@ static int uxst_bind_listener(struct listener *listener, char *errmsg, int errle if (ret < 0 && errno == ENOENT) unlink(path); err_unlink_temp: - unlink(tempname); + if (!ext) + unlink(tempname); close(fd); err_unlink_back: - unlink(backname); + if (!ext) + unlink(backname); err_return: - if (msg && errlen) - snprintf(errmsg, errlen, "%s [%s]", msg, path); + if (msg && errlen) { + if (!ext) + snprintf(errmsg, errlen, "%s [%s]", msg, path); + else + snprintf(errmsg, errlen, "%s [fd %d]", msg, fd); + } return ERR_FATAL | ERR_ALERT; } diff --git a/src/standard.c b/src/standard.c index c670be0ac0..cc22ba7fc1 100644 --- a/src/standard.c +++ b/src/standard.c @@ -632,6 +632,7 @@ static struct sockaddr_storage *str2ip(const char *str, struct sockaddr_storage * - "ipv6@" => force address to resolve as IPv6 and fail if not possible. * - "unix@" => force address to be a path to a UNIX socket even if the * path does not start with a '/' + * - "fd@" => an integer must follow, and is a file descriptor number. * * Also note that in order to avoid any ambiguity with IPv6 addresses, the ':' * is mandatory after the IP address even when no port is specified. NULL is @@ -640,6 +641,9 @@ static struct sockaddr_storage *str2ip(const char *str, struct sockaddr_storage * * If is non-null, it is used as a string prefix before any path-based * address (typically the path to a unix socket). + * + * When a file descriptor is passed, its value is put into the s_addr part of + * the address when cast to sockaddr_in and the address family is AF_UNSPEC. */ struct sockaddr_storage *str2sa_range(const char *str, int *low, int *high, char **err, const char *pfx) { @@ -677,7 +681,21 @@ struct sockaddr_storage *str2sa_range(const char *str, int *low, int *high, char else ss.ss_family = AF_UNSPEC; - if (ss.ss_family == AF_UNIX) { + if (ss.ss_family == AF_UNSPEC && strncmp(str2, "fd@", 3) == 0) { + char *endptr; + + str2 += 3; + ((struct sockaddr_in *)&ss)->sin_addr.s_addr = strtol(str2, &endptr, 10); + + if (!*str2 || *endptr) { + memprintf(err, "file descriptor '%s' is not a valid integer\n", str2); + goto out; + } + + /* we return AF_UNSPEC if we use a file descriptor number */ + ss.ss_family = AF_UNSPEC; + } + else if (ss.ss_family == AF_UNIX) { int prefix_path_len; int max_path_len;