]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
MEDIUM: protocol: add MPTCP per address support
authorAperence <anthony.doeraene.dev@gmail.com>
Mon, 26 Aug 2024 09:50:27 +0000 (11:50 +0200)
committerWilly Tarreau <w@1wt.eu>
Fri, 30 Aug 2024 16:53:49 +0000 (18:53 +0200)
Multipath TCP (MPTCP), standardized in RFC8684 [1], is a TCP extension
that enables a TCP connection to use different paths.

Multipath TCP has been used for several use cases. On smartphones, MPTCP
enables seamless handovers between cellular and Wi-Fi networks while
preserving established connections. This use-case is what pushed Apple
to use MPTCP since 2013 in multiple applications [2]. On dual-stack
hosts, Multipath TCP enables the TCP connection to automatically use the
best performing path, either IPv4 or IPv6. If one path fails, MPTCP
automatically uses the other path.

To benefit from MPTCP, both the client and the server have to support
it. Multipath TCP is a backward-compatible TCP extension that is enabled
by default on recent Linux distributions (Debian, Ubuntu, Redhat, ...).
Multipath TCP is included in the Linux kernel since version 5.6 [3]. To
use it on Linux, an application must explicitly enable it when creating
the socket. No need to change anything else in the application.

This attached patch adds MPTCP per address support, to be used with:

  mptcp{,4,6}@<address>[:port1[-port2]]

MPTCP v4 and v6 protocols have been added: they are mainly a copy of the
TCP ones, with small differences: names, proto, and receivers lists.

These protocols are stored in __protocol_by_family, as an alternative to
TCP, similar to what has been done with QUIC. By doing that, the size of
__protocol_by_family has not been increased, and it behaves like TCP.

MPTCP is both supported for the frontend and backend sides.

Also added an example of configuration using mptcp along with a backend
allowing to experiment with it.

Note that this is a re-implementation of Björn's work from 3 years ago
[4], when haproxy's internals were probably less ready to deal with
this, causing his work to be left pending for a while.

Currently, the TCP_MAXSEG socket option doesn't seem to be supported
with MPTCP [5]. This results in a warning when trying to set the MSS of
sockets in proto_tcp:tcp_bind_listener.

This can be resolved by adding two new variables:
sock_inet(6)_mptcp_maxseg_default that will hold the default
value of the TCP_MAXSEG option. Note that for the moment, this
will always be -1 as the option isn't supported. However, in the
future, when the support for this option will be added, it should
contain the correct value for the MSS, allowing to correctly
set the TCP_MAXSEG option.

Link: https://www.rfc-editor.org/rfc/rfc8684.html
Link: https://www.tessares.net/apples-mptcp-story-so-far/
Link: https://www.mptcp.dev
Link: https://github.com/haproxy/haproxy/issues/1028
Link: https://github.com/multipath-tcp/mptcp_net-next/issues/515
Co-authored-by: Dorian Craps <dorian.craps@student.vinci.be>
Co-authored-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
doc/configuration.txt
examples/mptcp-backend.py [new file with mode: 0644]
examples/mptcp.cfg [new file with mode: 0644]
include/haproxy/compat.h
include/haproxy/sock_inet.h
src/backend.c
src/proto_tcp.c
src/protocol.c
src/sock.c
src/sock_inet.c
src/tools.c

index 26d63e6692c386ccdd4e36789be00ec72a4a6224..15bf24e3429829796b8fa760c583ca117c32d207 100644 (file)
@@ -28279,6 +28279,27 @@ report this to the maintainers.
                                  range can or must be specified.
                                  It is considered as an alias of 'stream+ipv4@'.
 
+'mptcp@<address>[:port1[-port2]]' following <address> is considered as an IPv4
+                                  or IPv6 address depending of the syntax but
+                                  socket type and transport method is forced to
+                                  "stream", with the MPTCP protocol. Depending
+                                  on the statement using this address, a port or
+                                  a port range can or must be specified.
+
+'mptcp4@<address>[:port1[-port2]]' following <address> is always considered as
+                                   an IPv4 address but socket type and transport
+                                   method is forced to "stream", with the MPTCP
+                                   protocol. Depending on the statement using
+                                   this address, a port or port range can or
+                                   must be specified.
+
+'mptcp6@<address>[:port1[-port2]]' following <address> is always considered as
+                                   an IPv6 address but socket type and transport
+                                   method is forced to "stream", with the MPTCP
+                                   protocol. Depending on the statement using
+                                   this address, a port or port range can or
+                                   must be specified.
+
 'udp@<address>[:port1[-port2]]' following <address> is considered as an IPv4
                                 or IPv6 address depending of the syntax but
                                 socket type and transport method is forced to
diff --git a/examples/mptcp-backend.py b/examples/mptcp-backend.py
new file mode 100644 (file)
index 0000000..5237de5
--- /dev/null
@@ -0,0 +1,22 @@
+# =============================================================================
+# Example of a simple backend server using mptcp in python, used with mptcp.cfg
+# =============================================================================
+
+import socket
+
+sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM, socket.IPPROTO_MPTCP)
+sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+# dual stack IPv4/IPv6
+sock.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0)
+
+sock.bind(("::", 4331))
+sock.listen()
+
+while True:
+    (conn, address) = sock.accept()
+    req = conn.recv(1024)
+    print(F"Received request : {req}")
+    conn.send(b"HTTP/1.0 200 OK\r\n\r\nHello\n")
+    conn.close()
+
+sock.close()
diff --git a/examples/mptcp.cfg b/examples/mptcp.cfg
new file mode 100644 (file)
index 0000000..d43483d
--- /dev/null
@@ -0,0 +1,23 @@
+# You can test this configuration by running the command:
+#
+#   $ mptcpize run curl localhost:5000
+
+global
+   strict-limits  # refuse to start if insufficient FDs/memory
+   # add some process-wide tuning here if required
+
+defaults
+   mode http
+   balance roundrobin
+   timeout client 60s
+   timeout server 60s
+   timeout connect 1s
+
+frontend main
+    bind mptcp@[::]:5000
+    default_backend mptcp_backend
+
+# MPTCP is usually used on the frontend, but it is also possible
+# to enable it to communicate with the backend
+backend mptcp_backend
+    server mptcp_server mptcp@[::]:4331
index 3829060b7c4599f447c56d2e4e8f049b2911bdcf..68474fe8e8d2e67ad359484bec5f502e11561380 100644 (file)
@@ -317,6 +317,16 @@ typedef struct { } empty_t;
 #define queue _queue
 #endif
 
+/* Define a flag indicating if MPTCP is available */
+#ifdef __linux__
+#define HA_HAVE_MPTCP 1
+#endif
+
+/* only Linux defines IPPROTO_MPTCP */
+#ifndef IPPROTO_MPTCP
+#define IPPROTO_MPTCP 262
+#endif
+
 #endif /* _HAPROXY_COMPAT_H */
 
 /*
index 6f07e637a7cb6ccbab6a0581d430d4a843415213..1c3b7a3033c08c6843a6614902e690a3b56a0ed5 100644 (file)
@@ -31,6 +31,14 @@ extern int sock_inet6_v6only_default;
 extern int sock_inet_tcp_maxseg_default;
 extern int sock_inet6_tcp_maxseg_default;
 
+#ifdef HA_HAVE_MPTCP
+extern int sock_inet_mptcp_maxseg_default;
+extern int sock_inet6_mptcp_maxseg_default;
+#else 
+#define sock_inet_mptcp_maxseg_default -1
+#define sock_inet6_mptcp_maxseg_default -1
+#endif
+
 extern struct proto_fam proto_fam_inet4;
 extern struct proto_fam proto_fam_inet6;
 
index 6956d9bfe6c55f803ca20f2753c6c9594f90c8ed..e4bd465e9818439a778127413af0c6d9df517c3b 100644 (file)
@@ -1690,8 +1690,9 @@ skip_reuse:
 
        if (!srv_conn->xprt) {
                /* set the correct protocol on the output stream connector */
+
                if (srv) {
-                       if (conn_prepare(srv_conn, protocol_lookup(srv_conn->dst->ss_family, PROTO_TYPE_STREAM, 0), srv->xprt)) {
+                       if (conn_prepare(srv_conn, protocol_lookup(srv_conn->dst->ss_family, PROTO_TYPE_STREAM, srv->alt_proto), srv->xprt)) {
                                conn_free(srv_conn);
                                return SF_ERR_INTERNAL;
                        }
index cf79ffbc543aacc5180c6de40b3b79be0143cba0..39de465ef7ce91083c9cc6f12d9c8cbdfe889bf8 100644 (file)
@@ -145,6 +145,98 @@ struct protocol proto_tcpv6 = {
 
 INITCALL1(STG_REGISTER, protocol_register, &proto_tcpv6);
 
+#ifdef HA_HAVE_MPTCP
+/* Most fields are copied from proto_tcpv4 */
+struct protocol proto_mptcpv4 = {
+       .name           = "mptcpv4",
+
+       /* connection layer */
+       .xprt_type      = PROTO_TYPE_STREAM,
+       .listen         = tcp_bind_listener,
+       .enable         = tcp_enable_listener,
+       .disable        = tcp_disable_listener,
+       .add            = default_add_listener,
+       .unbind         = default_unbind_listener,
+       .suspend        = default_suspend_listener,
+       .resume         = default_resume_listener,
+       .accept_conn    = sock_accept_conn,
+       .ctrl_init      = sock_conn_ctrl_init,
+       .ctrl_close     = sock_conn_ctrl_close,
+       .connect        = tcp_connect_server,
+       .drain          = sock_drain,
+       .check_events   = sock_check_events,
+       .ignore_events  = sock_ignore_events,
+       .get_info       = tcp_get_info,
+
+       /* binding layer */
+       .rx_suspend     = tcp_suspend_receiver,
+       .rx_resume      = tcp_resume_receiver,
+
+       /* address family */
+       .fam            = &proto_fam_inet4,
+
+       /* socket layer */
+       .proto_type     = PROTO_TYPE_STREAM,
+       .sock_type      = SOCK_STREAM,
+       .sock_prot      = IPPROTO_MPTCP,                /* MPTCP specific */
+       .rx_enable      = sock_enable,
+       .rx_disable     = sock_disable,
+       .rx_unbind      = sock_unbind,
+       .rx_listening   = sock_accepting_conn,
+       .default_iocb   = sock_accept_iocb,
+#ifdef SO_REUSEPORT
+       .flags          = PROTO_F_REUSEPORT_SUPPORTED,
+#endif
+};
+
+INITCALL1(STG_REGISTER, protocol_register, &proto_mptcpv4);
+
+/* Most fields are copied from proto_tcpv6 */
+struct protocol proto_mptcpv6 = {
+       .name           = "mptcpv6",
+
+       /* connection layer */
+       .xprt_type      = PROTO_TYPE_STREAM,
+       .listen         = tcp_bind_listener,
+       .enable         = tcp_enable_listener,
+       .disable        = tcp_disable_listener,
+       .add            = default_add_listener,
+       .unbind         = default_unbind_listener,
+       .suspend        = default_suspend_listener,
+       .resume         = default_resume_listener,
+       .accept_conn    = sock_accept_conn,
+       .ctrl_init      = sock_conn_ctrl_init,
+       .ctrl_close     = sock_conn_ctrl_close,
+       .connect        = tcp_connect_server,
+       .drain          = sock_drain,
+       .check_events   = sock_check_events,
+       .ignore_events  = sock_ignore_events,
+       .get_info       = tcp_get_info,
+
+       /* binding layer */
+       .rx_suspend     = tcp_suspend_receiver,
+       .rx_resume      = tcp_resume_receiver,
+
+       /* address family */
+       .fam            = &proto_fam_inet6,
+
+       /* socket layer */
+       .proto_type     = PROTO_TYPE_STREAM,
+       .sock_type      = SOCK_STREAM,
+       .sock_prot      = IPPROTO_MPTCP,                /* MPTCP specific */
+       .rx_enable      = sock_enable,
+       .rx_disable     = sock_disable,
+       .rx_unbind      = sock_unbind,
+       .rx_listening   = sock_accepting_conn,
+       .default_iocb   = sock_accept_iocb,
+#ifdef SO_REUSEPORT
+       .flags          = PROTO_F_REUSEPORT_SUPPORTED,
+#endif
+};
+
+INITCALL1(STG_REGISTER, protocol_register, &proto_mptcpv6);
+#endif
+
 /* Binds ipv4/ipv6 address <local> to socket <fd>, unless <flags> is set, in which
  * case we try to bind <remote>. <flags> is a 2-bit field consisting of :
  *  - 0 : ignore remote address (may even be a NULL pointer)
@@ -590,12 +682,20 @@ int tcp_bind_listener(struct listener *listener, char *errmsg, int errlen)
                /* we may want to try to restore the default MSS if the socket was inherited */
                int tmpmaxseg = -1;
                int defaultmss;
+               int v4 = listener->rx.addr.ss_family == AF_INET;
                socklen_t len = sizeof(tmpmaxseg);
 
-               if (listener->rx.addr.ss_family == AF_INET)
-                       defaultmss = sock_inet_tcp_maxseg_default;
-               else
-                       defaultmss = sock_inet6_tcp_maxseg_default;
+               if (listener->rx.proto->sock_prot == IPPROTO_MPTCP) {
+                       if (v4)
+                               defaultmss = sock_inet_mptcp_maxseg_default;
+                       else
+                               defaultmss = sock_inet6_mptcp_maxseg_default;
+               } else {
+                       if (v4)
+                               defaultmss = sock_inet_tcp_maxseg_default;
+                       else
+                               defaultmss = sock_inet6_tcp_maxseg_default;
+               }
 
                getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &tmpmaxseg, &len);
                if (defaultmss > 0 &&
index f5f4940685861274f0853ec0a2c44dc15e7bf354..edf1c22ad7483d575fd639387f5ce1adc97b7a9a 100644 (file)
@@ -51,7 +51,8 @@ void protocol_register(struct protocol *proto)
        LIST_APPEND(&protocols, &proto->list);
        __protocol_by_family[sock_family]
                            [proto->proto_type]
-                           [proto->xprt_type == PROTO_TYPE_DGRAM] = proto;
+                           [proto->xprt_type == PROTO_TYPE_DGRAM ||
+                            proto->sock_prot == IPPROTO_MPTCP] = proto;
        __proto_fam_by_family[sock_family] = proto->fam;
        HA_SPIN_UNLOCK(PROTO_LOCK, &proto_lock);
 }
index aa524d886dca459a54a3fa1dc3f5c5d56500c7a8..4b872d15ed6efa41534fbc44cf16df7430eb1546 100644 (file)
@@ -279,7 +279,7 @@ int sock_create_server_socket(struct connection *conn, struct proxy *be, int *st
                        ns = __objt_server(conn->target)->netns;
        }
 #endif
-       proto = protocol_lookup(conn->dst->ss_family, PROTO_TYPE_STREAM, 0);
+       proto = protocol_lookup(conn->dst->ss_family, PROTO_TYPE_STREAM, conn->ctrl->sock_prot == IPPROTO_MPTCP);
        BUG_ON(!proto);
        sock_fd = my_socketat(ns, proto->fam->sock_domain, SOCK_STREAM, proto->sock_prot);
 
@@ -306,7 +306,8 @@ int sock_create_server_socket(struct connection *conn, struct proxy *be, int *st
        }
 
        if (fd_set_nonblock(sock_fd) == -1 ||
-               ((conn->ctrl->sock_prot == IPPROTO_TCP) && (setsockopt(sock_fd, IPPROTO_TCP, TCP_NODELAY, &one, sizeof(one)) == -1))) {
+               ((conn->ctrl->sock_prot == IPPROTO_TCP || conn->ctrl->sock_prot == IPPROTO_MPTCP) &&
+                (setsockopt(sock_fd, IPPROTO_TCP, TCP_NODELAY, &one, sizeof(one)) == -1))) {
                qfprintf(stderr,"Cannot set client socket to non blocking mode.\n");
                send_log(be, LOG_EMERG, "Cannot set client socket to non blocking mode.\n");
                close(sock_fd);
index 07364f02aeb06f4b58c7806920061da2cf24f1b3..20a9ab5984619d4988b23a4764fbb8658e1e25ab 100644 (file)
@@ -79,6 +79,12 @@ int sock_inet6_v6only_default = 0;
 int sock_inet_tcp_maxseg_default = -1;
 int sock_inet6_tcp_maxseg_default = -1;
 
+/* Default MPTCPv4/MPTCPv6 MSS settings. -1=unknown. */
+#ifdef HA_HAVE_MPTCP
+int sock_inet_mptcp_maxseg_default = -1;
+int sock_inet6_mptcp_maxseg_default = -1;
+#endif
+
 /* Compares two AF_INET sockaddr addresses. Returns 0 if they match or non-zero
  * if they do not match.
  */
@@ -496,6 +502,30 @@ static void sock_inet_prepare()
 #endif
                close(fd);
        }
+
+#ifdef HA_HAVE_MPTCP
+       fd = socket(AF_INET, SOCK_STREAM, IPPROTO_MPTCP);
+       if (fd >= 0) {
+#ifdef TCP_MAXSEG
+               /* retrieve the OS' default mss for MPTCPv4 */
+               len = sizeof(val);
+               if (getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &val, &len) == 0)
+                       sock_inet_mptcp_maxseg_default = val;
+#endif
+               close(fd);
+       }
+
+       fd = socket(AF_INET6, SOCK_STREAM, IPPROTO_MPTCP);
+       if (fd >= 0) {
+#ifdef TCP_MAXSEG
+               /* retrieve the OS' default mss for MPTCPv6 */
+               len = sizeof(val);
+               if (getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &val, &len) == 0)
+                       sock_inet6_mptcp_maxseg_default = val;
+#endif
+               close(fd);
+       }
+#endif
 }
 
 INITCALL0(STG_PREPARE, sock_inet_prepare);
index db414600f6a6ecdc6e530a85a18df160abbc5eb3..01189d3e0c200791974a6f0d373f8bc427fb11fc 100644 (file)
@@ -1069,6 +1069,13 @@ struct sockaddr_storage *str2sa_range(const char *str, int *port, int *low, int
                proto_type = PROTO_TYPE_STREAM;
                ctrl_type = SOCK_STREAM;
        }
+       else if (strncmp(str2, "mptcp4@", 7) == 0) {
+               str2 += 7;
+               ss.ss_family = AF_INET;
+               proto_type = PROTO_TYPE_STREAM;
+               ctrl_type = SOCK_STREAM;
+               alt_proto = 1;
+       }
        else if (strncmp(str2, "udp4@", 5) == 0) {
                str2 += 5;
                ss.ss_family = AF_INET;
@@ -1082,6 +1089,13 @@ struct sockaddr_storage *str2sa_range(const char *str, int *port, int *low, int
                proto_type = PROTO_TYPE_STREAM;
                ctrl_type = SOCK_STREAM;
        }
+       else if (strncmp(str2, "mptcp6@", 7) == 0) {
+               str2 += 7;
+               ss.ss_family = AF_INET6;
+               proto_type = PROTO_TYPE_STREAM;
+               ctrl_type = SOCK_STREAM;
+               alt_proto = 1;
+       }
        else if (strncmp(str2, "udp6@", 5) == 0) {
                str2 += 5;
                ss.ss_family = AF_INET6;
@@ -1095,6 +1109,13 @@ struct sockaddr_storage *str2sa_range(const char *str, int *port, int *low, int
                proto_type = PROTO_TYPE_STREAM;
                ctrl_type = SOCK_STREAM;
        }
+       else if (strncmp(str2, "mptcp@", 6) == 0) {
+               str2 += 6;
+               ss.ss_family = AF_UNSPEC;
+               proto_type = PROTO_TYPE_STREAM;
+               ctrl_type = SOCK_STREAM;
+               alt_proto = 1;
+       }
        else if (strncmp(str2, "udp@", 4) == 0) {
                str2 += 4;
                ss.ss_family = AF_UNSPEC;