From 93b227db9502f72f894c83708cd49c41925158b2 Mon Sep 17 00:00:00 2001 From: Bertrand Jacquin Date: Sat, 4 Jun 2016 15:11:10 +0100 Subject: [PATCH] MINOR: listener: add the "accept-netscaler-cip" option to the "bind" keyword When NetScaler application switch is used as L3+ switch, informations regarding the original IP and TCP headers are lost as a new TCP connection is created between the NetScaler and the backend server. NetScaler provides a feature to insert in the TCP data the original data that can then be consumed by the backend server. Specifications and documentations from NetScaler: https://support.citrix.com/article/CTX205670 https://www.citrix.com/blogs/2016/04/25/how-to-enable-client-ip-in-tcpip-option-of-netscaler/ When CIP is enabled on the NetScaler, then a TCP packet is inserted just after the TCP handshake. This is composed as: - CIP magic number : 4 bytes Both sender and receiver have to agree on a magic number so that they both handle the incoming data as a NetScaler Client IP insertion packet. - Header length : 4 bytes Defines the length on the remaining data. - IP header : >= 20 bytes if IPv4, 40 bytes if IPv6 Contains the header of the last IP packet sent by the client during TCP handshake. - TCP header : >= 20 bytes Contains the header of the last TCP packet sent by the client during TCP handshake. --- doc/configuration.txt | 51 +++-- ...netscaler-client-ip-insertion-protocol.txt | 29 +++ include/proto/connection.h | 10 + include/types/connection.h | 15 +- include/types/listener.h | 2 + src/connection.c | 200 ++++++++++++++++++ src/listener.c | 26 +++ src/session.c | 9 + 8 files changed, 323 insertions(+), 19 deletions(-) create mode 100644 doc/netscaler-client-ip-insertion-protocol.txt diff --git a/doc/configuration.txt b/doc/configuration.txt index 8b35a02338..8a8055dff3 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -9736,6 +9736,18 @@ provided immediately after the setting name. The currently supported settings are the following ones. +accept-netscaler-cip + Enforces the use of the NetScaler Client IP insertion protocol over any + connection accepted by any of the TCP sockets declared on the same line. The + NetScaler Client IP insertion protocol dictates the layer 3/4 addresses of + the incoming connection to be used everywhere an address is used, with the + only exception of "tcp-request connection" rules which will only see the + real connection address. Logs will reflect the addresses indicated in the + protocol, unless it is violated, in which case the real address will still + be used. This keyword combined with support from external components can be + used as an efficient and reliable alternative to the X-Forwarded-For + mechanism which is not always reliable and not even always usable. + accept-proxy Enforces the use of the PROXY protocol over any connection accepted by any of the sockets declared on the same line. Versions 1 and 2 of the PROXY protocol @@ -10794,16 +10806,18 @@ send-proxy connection established to this server. The PROXY protocol informs the other end about the layer 3/4 addresses of the incoming connection, so that it can know the client's address or the public address it accessed to, whatever the - upper layer protocol. For connections accepted by an "accept-proxy" listener, - the advertised address will be used. Only TCPv4 and TCPv6 address families - are supported. Other families such as Unix sockets, will report an UNKNOWN - family. Servers using this option can fully be chained to another instance of - haproxy listening with an "accept-proxy" setting. This setting must not be - used if the server isn't aware of the protocol. When health checks are sent - to the server, the PROXY protocol is automatically used when this option is - set, unless there is an explicit "port" or "addr" directive, in which case an - explicit "check-send-proxy" directive would also be needed to use the PROXY - protocol. See also the "accept-proxy" option of the "bind" keyword. + upper layer protocol. For connections accepted by an "accept-proxy" or + "accept-netscaler-cip" listener, the advertised address will be used. Only + TCPv4 and TCPv6 address families are supported. Other families such as + Unix sockets, will report an UNKNOWN family. Servers using this option can + fully be chained to another instance of haproxy listening with an + "accept-proxy" setting. This setting must not be used if the server isn't + aware of the protocol. When health checks are sent to the server, the PROXY + protocol is automatically used when this option is set, unless there is an + explicit "port" or "addr" directive, in which case an explicit + "check-send-proxy" directive would also be needed to use the PROXY protocol. + See also the "accept-proxy" and "accept-netscaler-cip" option of the "bind" + keyword. Supported in default-server: No @@ -12904,9 +12918,10 @@ src : ip IP and works on both IPv4 and IPv6 tables. On IPv6 tables, IPv4 addresses are mapped to their IPv6 equivalent, according to RFC 4291. Note that it is the TCP-level source address which is used, and not the address of a client - behind a proxy. However if the "accept-proxy" bind directive is used, it can - be the address of a client behind another PROXY-protocol compatible component - for all rule sets except "tcp-request connection" which sees the real address. + behind a proxy. However if the "accept-proxy" or "accept-netscaler-cip" bind + directive is used, it can be the address of a client behind another + PROXY-protocol compatible component for all rule sets except + "tcp-request connection" which sees the real address. Example: # add an HTTP header in requests with the originating address' country @@ -14362,8 +14377,9 @@ Detailed fields description : connection to haproxy. If the connection was accepted on a UNIX socket instead, the IP address would be replaced with the word "unix". Note that when the connection is accepted on a socket configured with "accept-proxy" - and the PROXY protocol is correctly used, then the logs will reflect the - forwarded connection's information. + and the PROXY protocol is correctly used, or with a "accept-netscaler-cip" + and the NetScaler Client IP insetion protocol is correctly used, then the + logs will reflect the forwarded connection's information. - "client_port" is the TCP port of the client which initiated the connection. If the connection was accepted on a UNIX socket instead, the port would be @@ -14542,8 +14558,9 @@ Detailed fields description : connection to haproxy. If the connection was accepted on a UNIX socket instead, the IP address would be replaced with the word "unix". Note that when the connection is accepted on a socket configured with "accept-proxy" - and the PROXY protocol is correctly used, then the logs will reflect the - forwarded connection's information. + and the PROXY protocol is correctly used, or with a "accept-netscaler-cip" + and the NetScaler Client IP insetion protocol is correctly used, then the + logs will reflect the forwarded connection's information. - "client_port" is the TCP port of the client which initiated the connection. If the connection was accepted on a UNIX socket instead, the port would be diff --git a/doc/netscaler-client-ip-insertion-protocol.txt b/doc/netscaler-client-ip-insertion-protocol.txt new file mode 100644 index 0000000000..6f77f6522c --- /dev/null +++ b/doc/netscaler-client-ip-insertion-protocol.txt @@ -0,0 +1,29 @@ +When NetScaler application switch is used as L3+ switch, informations +regarding the original IP and TCP headers are lost as a new TCP +connection is created between the NetScaler and the backend server. + +NetScaler provides a feature to insert in the TCP data the original data +that can then be consumed by the backend server. + +Specifications and documentations from NetScaler: + https://support.citrix.com/article/CTX205670 + https://www.citrix.com/blogs/2016/04/25/how-to-enable-client-ip-in-tcpip-option-of-netscaler/ + +When CIP is enabled on the NetScaler, then a TCP packet is inserted just after +the TCP handshake. This is composed as: + + - CIP magic number : 4 bytes + Both sender and receiver have to agree on a magic number so that + they both handle the incoming data as a NetScaler Client IP insertion + packet. + + - Header length : 4 bytes + Defines the length on the remaining data. + + - IP header : >= 20 bytes if IPv4, 40 bytes if IPv6 + Contains the header of the last IP packet sent by the client during TCP + handshake. + + - TCP header : >= 20 bytes + Contains the header of the last TCP packet sent by the client during TCP + handshake. diff --git a/include/proto/connection.h b/include/proto/connection.h index f50329c380..ef078add2f 100644 --- a/include/proto/connection.h +++ b/include/proto/connection.h @@ -45,6 +45,9 @@ int make_proxy_line(char *buf, int buf_len, struct server *srv, struct connectio int make_proxy_line_v1(char *buf, int buf_len, struct sockaddr_storage *src, struct sockaddr_storage *dst); int make_proxy_line_v2(char *buf, int buf_len, struct server *srv, struct connection *remote); +/* receive a NetScaler Client IP insertion header over a connection */ +int conn_recv_netscaler_cip(struct connection *conn, int flag); + /* raw send() directly on the socket */ int conn_sock_send(struct connection *conn, const void *buf, int len, int flags); @@ -570,6 +573,13 @@ static inline const char *conn_err_code_str(struct connection *c) case CO_ER_PRX_NOT_HDR: return "Received something which does not look like a PROXY protocol header"; case CO_ER_PRX_BAD_HDR: return "Received an invalid PROXY protocol header"; case CO_ER_PRX_BAD_PROTO: return "Received an unhandled protocol in the PROXY protocol header"; + + case CO_ER_CIP_EMPTY: return "Connection closed while waiting for NetScaler Client IP header"; + case CO_ER_CIP_ABORT: return "Connection error while waiting for NetScaler Client IP header"; + case CO_ER_CIP_TRUNCATED: return "Truncated NetScaler Client IP header received"; + case CO_ER_CIP_BAD_MAGIC: return "Received an invalid NetScaler Client IP magic number"; + case CO_ER_CIP_BAD_PROTO: return "Received an unhandled protocol in the NetScaler Client IP header"; + case CO_ER_SSL_EMPTY: return "Connection closed during SSL handshake"; case CO_ER_SSL_ABORT: return "Connection error during SSL handshake"; case CO_ER_SSL_TIMEOUT: return "Timeout during SSL handshake"; diff --git a/include/types/connection.h b/include/types/connection.h index dfbff6a6f2..292ca2bd64 100644 --- a/include/types/connection.h +++ b/include/types/connection.h @@ -32,6 +32,10 @@ #include #include +#include +#include +#include + /* referenced below */ struct connection; struct buffer; @@ -107,10 +111,10 @@ enum { CO_FL_SEND_PROXY = 0x01000000, /* send a valid PROXY protocol header */ CO_FL_SSL_WAIT_HS = 0x02000000, /* wait for an SSL handshake to complete */ CO_FL_ACCEPT_PROXY = 0x04000000, /* receive a valid PROXY protocol header */ - /* unused : 0x08000000 */ + CO_FL_ACCEPT_CIP = 0x08000000, /* receive a valid NetScaler Client IP header */ /* below we have all handshake flags grouped into one */ - CO_FL_HANDSHAKE = CO_FL_SEND_PROXY | CO_FL_SSL_WAIT_HS | CO_FL_ACCEPT_PROXY, + CO_FL_HANDSHAKE = CO_FL_SEND_PROXY | CO_FL_SSL_WAIT_HS | CO_FL_ACCEPT_PROXY | CO_FL_ACCEPT_CIP, /* when any of these flags is set, polling is defined by socket-layer * operations, as opposed to data-layer. Transport is explicitly not @@ -156,6 +160,13 @@ enum { CO_ER_PRX_BAD_HDR, /* bad PROXY protocol header */ CO_ER_PRX_BAD_PROTO, /* unsupported protocol in PROXY header */ + CO_ER_CIP_EMPTY, /* nothing received in NetScaler Client IP header */ + CO_ER_CIP_ABORT, /* client abort during NetScaler Client IP header */ + CO_ER_CIP_TIMEOUT, /* timeout while waiting for a NetScaler Client IP header */ + CO_ER_CIP_TRUNCATED, /* truncated NetScaler Client IP header */ + CO_ER_CIP_BAD_MAGIC, /* bad magic number in NetScaler Client IP header */ + CO_ER_CIP_BAD_PROTO, /* unsupported protocol in NetScaler Client IP header */ + CO_ER_SSL_EMPTY, /* client closed during SSL handshake */ CO_ER_SSL_ABORT, /* client abort during SSL handshake */ CO_ER_SSL_TIMEOUT, /* timeout during SSL handshake */ diff --git a/include/types/listener.h b/include/types/listener.h index 4da6cacb47..afe2ad8435 100644 --- a/include/types/listener.h +++ b/include/types/listener.h @@ -92,6 +92,7 @@ enum li_state { #define LI_O_TCP_FO 0x0100 /* enable TCP Fast Open (linux >= 3.7) */ #define LI_O_V6ONLY 0x0200 /* bind to IPv6 only on Linux >= 2.4.21 */ #define LI_O_V4V6 0x0400 /* bind to IPv4/IPv6 on Linux >= 2.4.21 */ +#define LI_O_ACC_CIP 0x0800 /* find the proxied address in the NetScaler Client IP header */ /* Note: if a listener uses LI_O_UNLIMITED, it is highly recommended that it adds its own * maxconn setting to the global.maxsock value so that its resources are reserved. @@ -151,6 +152,7 @@ struct bind_conf { int level; /* stats access level (ACCESS_LVL_*) */ struct list by_fe; /* next binding for the same frontend, or NULL */ struct list listeners; /* list of listeners using this bind config */ + uint32_t ns_cip_magic; /* Excepted NetScaler Client IP magic number */ char *arg; /* argument passed to "bind" for better error reporting */ char *file; /* file where the section appears */ int line; /* line where the section appears */ diff --git a/src/connection.c b/src/connection.c index 5515188c6b..358c9bcb1f 100644 --- a/src/connection.c +++ b/src/connection.c @@ -62,6 +62,10 @@ void conn_fd_handler(int fd) if (unlikely(conn->flags & CO_FL_ERROR)) goto leave; + if (conn->flags & CO_FL_ACCEPT_CIP) + if (!conn_recv_netscaler_cip(conn, CO_FL_ACCEPT_CIP)) + goto leave; + if (conn->flags & CO_FL_ACCEPT_PROXY) if (!conn_recv_proxy(conn, CO_FL_ACCEPT_PROXY)) goto leave; @@ -624,6 +628,202 @@ int conn_recv_proxy(struct connection *conn, int flag) return 0; } +/* This handshake handler waits a NetScaler Client IP insertion header + * at the beginning of the raw data stream. The header looks like this: + * + * 4 bytes: CIP magic number + * 4 bytes: Header length + * 20+ bytes: Header of the last IP packet sent by the client during + * TCP handshake. + * 20+ bytes: Header of the last TCP packet sent by the client during + * TCP handshake. + * + * This line MUST be at the beginning of the buffer and MUST NOT be + * fragmented. + * + * The header line is small and in all cases smaller than the smallest normal + * TCP MSS. So it MUST always be delivered as one segment, which ensures we + * can safely use MSG_PEEK and avoid buffering. + * + * Once the data is fetched, the values are set in the connection's address + * fields, and data are removed from the socket's buffer. The function returns + * zero if it needs to wait for more data or if it fails, or 1 if it completed + * and removed itself. + */ +int conn_recv_netscaler_cip(struct connection *conn, int flag) +{ + char *line; + uint32_t cip_magic; + uint32_t cip_len; + uint8_t ip_v; + + /* we might have been called just after an asynchronous shutr */ + if (conn->flags & CO_FL_SOCK_RD_SH) + goto fail; + + if (!conn_ctrl_ready(conn)) + goto fail; + + if (!fd_recv_ready(conn->t.sock.fd)) + return 0; + + do { + trash.len = recv(conn->t.sock.fd, trash.str, trash.size, MSG_PEEK); + if (trash.len < 0) { + if (errno == EINTR) + continue; + if (errno == EAGAIN) { + fd_cant_recv(conn->t.sock.fd); + return 0; + } + goto recv_abort; + } + } while (0); + + if (!trash.len) { + /* client shutdown */ + conn->err_code = CO_ER_CIP_EMPTY; + goto fail; + } + + /* Fail if buffer length is not large enough to contain + * CIP magic, CIP length */ + if (trash.len < 8) + goto missing; + + line = trash.str; + + cip_magic = ntohl(*(uint32_t *)line); + cip_len = ntohl(*(uint32_t *)(line+4)); + + /* Decode a possible NetScaler Client IP request, fail early if + * it does not match */ + if (cip_magic != objt_listener(conn->target)->bind_conf->ns_cip_magic) + goto bad_magic; + + /* Fail if buffer length is not large enough to contain + * CIP magic, CIP length, minimal IP header */ + if (trash.len < 28) + goto missing; + + line += 8; + + /* Get IP version from the first four bits */ + ip_v = (*line & 0xf0) >> 4; + + if (ip_v == 4) { + struct ip *hdr_ip4; + struct tcphdr *hdr_tcp; + + hdr_ip4 = (struct ip *)line; + + if (trash.len < (8 + ntohs(hdr_ip4->ip_len))) { + /* Fail if buffer length is not large enough to contain + * CIP magic, CIP length, IPv4 header */ + goto missing; + } else if (hdr_ip4->ip_p != IPPROTO_TCP) { + /* The protocol does not include a TCP header */ + conn->err_code = CO_ER_CIP_BAD_PROTO; + goto fail; + } else if (trash.len < (28 + ntohs(hdr_ip4->ip_len))) { + /* Fail if buffer length is not large enough to contain + * CIP magic, CIP length, IPv4 header, TCP header */ + goto missing; + } + + hdr_tcp = (struct tcphdr *)(line + (hdr_ip4->ip_hl * 4)); + + /* update the session's addresses and mark them set */ + ((struct sockaddr_in *)&conn->addr.from)->sin_family = AF_INET; + ((struct sockaddr_in *)&conn->addr.from)->sin_addr.s_addr = hdr_ip4->ip_src.s_addr; + ((struct sockaddr_in *)&conn->addr.from)->sin_port = hdr_tcp->source; + + ((struct sockaddr_in *)&conn->addr.to)->sin_family = AF_INET; + ((struct sockaddr_in *)&conn->addr.to)->sin_addr.s_addr = hdr_ip4->ip_dst.s_addr; + ((struct sockaddr_in *)&conn->addr.to)->sin_port = hdr_tcp->dest; + + conn->flags |= CO_FL_ADDR_FROM_SET | CO_FL_ADDR_TO_SET; + } + else if (ip_v == 6) { + struct ip6_hdr *hdr_ip6; + struct tcphdr *hdr_tcp; + + hdr_ip6 = (struct ip6_hdr *)line; + + if (trash.len < 28) { + /* Fail if buffer length is not large enough to contain + * CIP magic, CIP length, IPv6 header */ + goto missing; + } else if (hdr_ip6->ip6_nxt != IPPROTO_TCP) { + /* The protocol does not include a TCP header */ + conn->err_code = CO_ER_CIP_BAD_PROTO; + goto fail; + } else if (trash.len < 48) { + /* Fail if buffer length is not large enough to contain + * CIP magic, CIP length, IPv6 header, TCP header */ + goto missing; + } + + hdr_tcp = (struct tcphdr *)(line + sizeof(struct ip6_hdr)); + + /* update the session's addresses and mark them set */ + ((struct sockaddr_in6 *)&conn->addr.from)->sin6_family = AF_INET6; + ((struct sockaddr_in6 *)&conn->addr.from)->sin6_addr = hdr_ip6->ip6_src; + ((struct sockaddr_in6 *)&conn->addr.from)->sin6_port = hdr_tcp->source; + + ((struct sockaddr_in6 *)&conn->addr.to)->sin6_family = AF_INET6; + ((struct sockaddr_in6 *)&conn->addr.to)->sin6_addr = hdr_ip6->ip6_dst; + ((struct sockaddr_in6 *)&conn->addr.to)->sin6_port = hdr_tcp->dest; + + conn->flags |= CO_FL_ADDR_FROM_SET | CO_FL_ADDR_TO_SET; + } + else { + /* The protocol does not match something known (IPv4/IPv6) */ + conn->err_code = CO_ER_CIP_BAD_PROTO; + goto fail; + } + + line += cip_len; + trash.len = line - trash.str; + + /* remove the NetScaler Client IP header from the request. For this + * we re-read the exact line at once. If we don't get the exact same + * result, we fail. + */ + do { + int len2 = recv(conn->t.sock.fd, trash.str, trash.len, 0); + if (len2 < 0 && errno == EINTR) + continue; + if (len2 != trash.len) + goto recv_abort; + } while (0); + + conn->flags &= ~flag; + return 1; + + missing: + /* Missing data. Since we're using MSG_PEEK, we can only poll again if + * we have not read anything. Otherwise we need to fail because we won't + * be able to poll anymore. + */ + conn->err_code = CO_ER_CIP_TRUNCATED; + goto fail; + + bad_magic: + conn->err_code = CO_ER_CIP_BAD_MAGIC; + goto fail; + + recv_abort: + conn->err_code = CO_ER_CIP_ABORT; + conn->flags |= CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH; + goto fail; + + fail: + __conn_sock_stop_both(conn); + conn->flags |= CO_FL_ERROR; + return 0; +} + int make_proxy_line(char *buf, int buf_len, struct server *srv, struct connection *remote) { int ret = 0; diff --git a/src/listener.c b/src/listener.c index 59385f04fb..c2ce41329c 100644 --- a/src/listener.c +++ b/src/listener.c @@ -618,6 +618,31 @@ static int bind_parse_accept_proxy(char **args, int cur_arg, struct proxy *px, s return 0; } +/* parse the "accept-netscaler-cip" bind keyword */ +static int bind_parse_accept_netscaler_cip(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err) +{ + struct listener *l; + uint32_t val; + + if (!*args[cur_arg + 1]) { + memprintf(err, "'%s' : missing value", args[cur_arg]); + return ERR_ALERT | ERR_FATAL; + } + + val = atol(args[cur_arg + 1]); + if (val <= 0) { + memprintf(err, "'%s' : invalid value %d, must be > 0", args[cur_arg], val); + return ERR_ALERT | ERR_FATAL; + } + + list_for_each_entry(l, &conf->listeners, by_bind) { + l->options |= LI_O_ACC_CIP; + conf->ns_cip_magic = val; + } + + return 0; +} + /* parse the "backlog" bind keyword */ static int bind_parse_backlog(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err) { @@ -814,6 +839,7 @@ static struct acl_kw_list acl_kws = {ILH, { * not enabled. */ static struct bind_kw_list bind_kws = { "ALL", { }, { + { "accept-netscaler-cip", bind_parse_accept_netscaler_cip, 1 }, /* enable NetScaler Client IP insertion protocol */ { "accept-proxy", bind_parse_accept_proxy, 0 }, /* enable PROXY protocol */ { "backlog", bind_parse_backlog, 1 }, /* set backlog of listening socket */ { "id", bind_parse_id, 1 }, /* set id of listening socket */ diff --git a/src/session.c b/src/session.c index fdb24045b5..0c2336459c 100644 --- a/src/session.c +++ b/src/session.c @@ -142,6 +142,12 @@ int session_accept_fd(struct listener *l, int cfd, struct sockaddr_storage *addr conn_sock_want_recv(cli_conn); } + /* wait for a NetScaler client IP insertion protocol header */ + if (l->options & LI_O_ACC_CIP) { + cli_conn->flags |= CO_FL_ACCEPT_CIP; + conn_sock_want_recv(cli_conn); + } + conn_data_want_recv(cli_conn); if (conn_xprt_init(cli_conn) < 0) goto out_free_conn; @@ -346,6 +352,7 @@ static void session_kill_embryonic(struct session *sess) /* with "option dontlognull", we don't log connections with no transfer */ if (!conn->err_code || conn->err_code == CO_ER_PRX_EMPTY || conn->err_code == CO_ER_PRX_ABORT || + conn->err_code == CO_ER_CIP_EMPTY || conn->err_code == CO_ER_CIP_ABORT || conn->err_code == CO_ER_SSL_EMPTY || conn->err_code == CO_ER_SSL_ABORT) log = 0; } @@ -354,6 +361,8 @@ static void session_kill_embryonic(struct session *sess) if (!conn->err_code && (task->state & TASK_WOKEN_TIMER)) { if (conn->flags & CO_FL_ACCEPT_PROXY) conn->err_code = CO_ER_PRX_TIMEOUT; + else if (conn->flags & CO_FL_ACCEPT_CIP) + conn->err_code = CO_ER_CIP_TIMEOUT; else if (conn->flags & CO_FL_SSL_WAIT_HS) conn->err_code = CO_ER_SSL_TIMEOUT; } -- 2.39.5