From: Willy Tarreau Date: Wed, 19 Aug 2009 09:14:11 +0000 (+0200) Subject: [MEDIUM] remove TCP_CORK and make use of MSG_MORE instead X-Git-Tag: v1.4-dev3~32 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=6db06d3870ad4574414958574f1b5cf7a3f25997;p=thirdparty%2Fhaproxy.git [MEDIUM] remove TCP_CORK and make use of MSG_MORE instead send() supports the MSG_MORE flag on Linux, which does the same as TCP_CORK except that we don't have to remove TCP_NODELAY before and we don't need any syscall to set/remove it. This can save up to 4 syscalls around a send() (two for setting it, two for removing it), and it's much cleaner since it is not persistent. So make use of it instead. --- diff --git a/include/common/compat.h b/include/common/compat.h index dec5d7d430..8ddb72db9e 100644 --- a/include/common/compat.h +++ b/include/common/compat.h @@ -66,6 +66,11 @@ #define MSG_DONTWAIT 0 #endif +/* Only Linux defines MSG_MORE */ +#ifndef MSG_MORE +#define MSG_MORE 0 +#endif + #if defined(TPROXY) && defined(NETFILTER) #include #include diff --git a/include/types/fd.h b/include/types/fd.h index a50d076c37..3ab89feba8 100644 --- a/include/types/fd.h +++ b/include/types/fd.h @@ -63,8 +63,7 @@ enum { */ #define FD_FL_TCP 0x0001 /* socket is TCP */ #define FD_FL_TCP_NODELAY 0x0002 -#define FD_FL_TCP_CORK 0x0004 -#define FD_FL_TCP_NOLING 0x0008 /* lingering disabled */ +#define FD_FL_TCP_NOLING 0x0004 /* lingering disabled */ /* info about one given fd */ struct fdtab { diff --git a/src/stream_sock.c b/src/stream_sock.c index f563755edd..79426fb14b 100644 --- a/src/stream_sock.c +++ b/src/stream_sock.c @@ -585,28 +585,28 @@ static int stream_sock_write_loop(struct stream_interface *si, struct buffer *b) if (max > b->send_max) max = b->send_max; - -#if defined(TCP_CORK) && defined(SOL_TCP) - /* - * Check if we want to cork output before sending. This typically occurs - * when there are data left in the buffer, or when we reached the end of - * buffer but we know we will close, so we try to merge the ongoing FIN - * with the last data segment. + /* check if we want to inform the kernel that we're interested in + * sending more data after this call. We want this if : + * - we're about to close after this last send and want to merge + * the ongoing FIN with the last segment. + * - we know we can't send everything at once and must get back + * here because of unaligned data + * The test is arranged so that the most common case does only 2 + * tests. */ - if ((fdtab[si->fd].flags & (FD_FL_TCP|FD_FL_TCP_NOLING|FD_FL_TCP_CORK)) == FD_FL_TCP) { - if (unlikely((b->send_max == b->l && - (b->flags & (BF_SHUTW|BF_SHUTW_NOW|BF_HIJACK|BF_WRITE_ENA|BF_SHUTR)) == - (BF_WRITE_ENA|BF_SHUTR)))) { - /* we have to unconditionally reset TCP_NODELAY for CORK */ - setsockopt(si->fd, IPPROTO_TCP, TCP_NODELAY, (char *) &zero, sizeof(zero)); - setsockopt(si->fd, SOL_TCP, TCP_CORK, (char *) &one, sizeof(one)); - fdtab[si->fd].flags = (fdtab[si->fd].flags & ~FD_FL_TCP_NODELAY) | FD_FL_TCP_CORK; - } - } -#endif if (MSG_NOSIGNAL) { - ret = send(si->fd, b->w, max, MSG_DONTWAIT | MSG_NOSIGNAL); + unsigned int send_flag = MSG_DONTWAIT | MSG_NOSIGNAL; + + if (MSG_MORE && + (((b->flags & (BF_SHUTW|BF_SHUTW_NOW|BF_HIJACK|BF_WRITE_ENA|BF_SHUTR)) == (BF_WRITE_ENA|BF_SHUTR) && + (max == b->l)) || + (max != b->l && max != b->send_max)) + && (fdtab[si->fd].flags & FD_FL_TCP)) { + send_flag |= MSG_MORE; + } + + ret = send(si->fd, b->w, max, send_flag); } else { int skerr; socklen_t lskerr = sizeof(skerr); @@ -662,21 +662,6 @@ static int stream_sock_write_loop(struct stream_interface *si, struct buffer *b) } } /* while (1) */ - /* check if we need to uncork the output, for instance when the - * output buffer is empty but not shutr(). - */ - if (unlikely((fdtab[si->fd].flags & (FD_FL_TCP|FD_FL_TCP_NODELAY)) == FD_FL_TCP && (b->flags & BF_EMPTY))) { - if ((b->flags & (BF_SHUTW|BF_SHUTW_NOW|BF_HIJACK|BF_WRITE_ENA|BF_SHUTR)) != (BF_WRITE_ENA|BF_SHUTR)) { -#if defined(TCP_CORK) && defined(SOL_TCP) - if (fdtab[si->fd].flags & FD_FL_TCP_CORK) - setsockopt(si->fd, SOL_TCP, TCP_CORK, (char *) &zero, sizeof(zero)); -#endif - setsockopt(si->fd, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)); - fdtab[si->fd].flags = (fdtab[si->fd].flags & ~FD_FL_TCP_CORK) | FD_FL_TCP_NODELAY; - } - } - - return retval; }