From cbf547d0f8d72a6bb9b0b972e4300cc19819e914 Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Thu, 15 Sep 2005 17:18:42 -0700 Subject: [PATCH] Add ipvs fix for ip_vs_ftp and persistent connections from Julian Anastasov --- queue/ipvs-ip_vs_ftp-breaks-connections.patch | 253 ++++++++++++++++++ queue/series | 1 + 2 files changed, 254 insertions(+) create mode 100644 queue/ipvs-ip_vs_ftp-breaks-connections.patch diff --git a/queue/ipvs-ip_vs_ftp-breaks-connections.patch b/queue/ipvs-ip_vs_ftp-breaks-connections.patch new file mode 100644 index 00000000000..74a5f55f624 --- /dev/null +++ b/queue/ipvs-ip_vs_ftp-breaks-connections.patch @@ -0,0 +1,253 @@ +From stable-bounces@linux.kernel.org Thu Sep 15 14:13:43 2005 +From: Julian Anastasov +To: stable@kernel.org +Subject: [PATCH] ipvs: ip_vs_ftp breaks connections using persistence + + ip_vs_ftp when loaded can create NAT connections with unknown +client port for passive FTP. For such expectations we lookup with +cport=0 on incoming packet but it matches the format of the persistence +templates causing packets to other persistent virtual servers to be +forwarded to real server without creating connection. Later the +reply packets are treated as foreign and not SNAT-ed. + + If the IPVS box serves both FTP and other services (eg. HTTP) +for the time we wait for first packet for the FTP data connections with +unknown client port (there can be many), other HTTP connections +that have nothing common to the FTP conn break, i.e. HTTP client +sends SYN to the virtual IP but the SYN+ACK is not NAT-ed properly +in IPVS box and the client box returns RST to real server IP. I.e. +the result can be 10% broken HTTP traffic if 10% of the time +there are passive FTP connections in connecting state. It hurts +only IPVS connections. + + This patch changes the connection lookup for packets from +clients: + +* introduce IP_VS_CONN_F_TEMPLATE connection flag to mark the +connection as template +* create new connection lookup function just for templates - ip_vs_ct_in_get +* make sure ip_vs_conn_in_get hits only connections with +IP_VS_CONN_F_NO_CPORT flag set when s_port is 0. By this way +we avoid returning template when looking for cport=0 (ftp) + +Signed-off-by: Julian Anastasov +Signed-off-by: Chris Wright +--- + include/net/ip_vs.h | 3 +++ + net/ipv4/ipvs/ip_vs_conn.c | 41 ++++++++++++++++++++++++++++++++++++++--- + net/ipv4/ipvs/ip_vs_core.c | 16 ++++++++-------- + net/ipv4/ipvs/ip_vs_sync.c | 20 ++++++++++++++------ + 4 files changed, 63 insertions(+), 17 deletions(-) + +Index: linux-2.6.13.y/include/net/ip_vs.h +=================================================================== +--- linux-2.6.13.y.orig/include/net/ip_vs.h ++++ linux-2.6.13.y/include/net/ip_vs.h +@@ -84,6 +84,7 @@ + #define IP_VS_CONN_F_IN_SEQ 0x0400 /* must do input seq adjust */ + #define IP_VS_CONN_F_SEQ_MASK 0x0600 /* in/out sequence mask */ + #define IP_VS_CONN_F_NO_CPORT 0x0800 /* no client port set yet */ ++#define IP_VS_CONN_F_TEMPLATE 0x1000 /* template, not connection */ + + /* Move it to better place one day, for now keep it unique */ + #define NFC_IPVS_PROPERTY 0x10000 +@@ -740,6 +741,8 @@ enum { + + extern struct ip_vs_conn *ip_vs_conn_in_get + (int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port); ++extern struct ip_vs_conn *ip_vs_ct_in_get ++(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port); + extern struct ip_vs_conn *ip_vs_conn_out_get + (int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port); + +Index: linux-2.6.13.y/net/ipv4/ipvs/ip_vs_conn.c +=================================================================== +--- linux-2.6.13.y.orig/net/ipv4/ipvs/ip_vs_conn.c ++++ linux-2.6.13.y/net/ipv4/ipvs/ip_vs_conn.c +@@ -196,6 +196,7 @@ static inline struct ip_vs_conn *__ip_vs + list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { + if (s_addr==cp->caddr && s_port==cp->cport && + d_port==cp->vport && d_addr==cp->vaddr && ++ ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && + protocol==cp->protocol) { + /* HIT */ + atomic_inc(&cp->refcnt); +@@ -227,6 +228,40 @@ struct ip_vs_conn *ip_vs_conn_in_get + return cp; + } + ++/* Get reference to connection template */ ++struct ip_vs_conn *ip_vs_ct_in_get ++(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port) ++{ ++ unsigned hash; ++ struct ip_vs_conn *cp; ++ ++ hash = ip_vs_conn_hashkey(protocol, s_addr, s_port); ++ ++ ct_read_lock(hash); ++ ++ list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { ++ if (s_addr==cp->caddr && s_port==cp->cport && ++ d_port==cp->vport && d_addr==cp->vaddr && ++ cp->flags & IP_VS_CONN_F_TEMPLATE && ++ protocol==cp->protocol) { ++ /* HIT */ ++ atomic_inc(&cp->refcnt); ++ goto out; ++ } ++ } ++ cp = NULL; ++ ++ out: ++ ct_read_unlock(hash); ++ ++ IP_VS_DBG(7, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", ++ ip_vs_proto_name(protocol), ++ NIPQUAD(s_addr), ntohs(s_port), ++ NIPQUAD(d_addr), ntohs(d_port), ++ cp?"hit":"not hit"); ++ ++ return cp; ++} + + /* + * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab. +@@ -367,7 +402,7 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, s + atomic_read(&dest->refcnt)); + + /* Update the connection counters */ +- if (cp->cport || (cp->flags & IP_VS_CONN_F_NO_CPORT)) { ++ if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) { + /* It is a normal connection, so increase the inactive + connection counter because it is in TCP SYNRECV + state (inactive) or other protocol inacive state */ +@@ -406,7 +441,7 @@ static inline void ip_vs_unbind_dest(str + atomic_read(&dest->refcnt)); + + /* Update the connection counters */ +- if (cp->cport || (cp->flags & IP_VS_CONN_F_NO_CPORT)) { ++ if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) { + /* It is a normal connection, so decrease the inactconns + or activeconns counter */ + if (cp->flags & IP_VS_CONN_F_INACTIVE) { +@@ -776,7 +811,7 @@ void ip_vs_random_dropentry(void) + ct_write_lock_bh(hash); + + list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { +- if (!cp->cport && !(cp->flags & IP_VS_CONN_F_NO_CPORT)) ++ if (cp->flags & IP_VS_CONN_F_TEMPLATE) + /* connection template */ + continue; + +Index: linux-2.6.13.y/net/ipv4/ipvs/ip_vs_core.c +=================================================================== +--- linux-2.6.13.y.orig/net/ipv4/ipvs/ip_vs_core.c ++++ linux-2.6.13.y/net/ipv4/ipvs/ip_vs_core.c +@@ -242,10 +242,10 @@ ip_vs_sched_persist(struct ip_vs_service + if (ports[1] == svc->port) { + /* Check if a template already exists */ + if (svc->port != FTPPORT) +- ct = ip_vs_conn_in_get(iph->protocol, snet, 0, ++ ct = ip_vs_ct_in_get(iph->protocol, snet, 0, + iph->daddr, ports[1]); + else +- ct = ip_vs_conn_in_get(iph->protocol, snet, 0, ++ ct = ip_vs_ct_in_get(iph->protocol, snet, 0, + iph->daddr, 0); + + if (!ct || !ip_vs_check_template(ct)) { +@@ -271,14 +271,14 @@ ip_vs_sched_persist(struct ip_vs_service + iph->daddr, + ports[1], + dest->addr, dest->port, +- 0, ++ IP_VS_CONN_F_TEMPLATE, + dest); + else + ct = ip_vs_conn_new(iph->protocol, + snet, 0, + iph->daddr, 0, + dest->addr, 0, +- 0, ++ IP_VS_CONN_F_TEMPLATE, + dest); + if (ct == NULL) + return NULL; +@@ -297,10 +297,10 @@ ip_vs_sched_persist(struct ip_vs_service + * port zero template: + */ + if (svc->fwmark) +- ct = ip_vs_conn_in_get(IPPROTO_IP, snet, 0, ++ ct = ip_vs_ct_in_get(IPPROTO_IP, snet, 0, + htonl(svc->fwmark), 0); + else +- ct = ip_vs_conn_in_get(iph->protocol, snet, 0, ++ ct = ip_vs_ct_in_get(iph->protocol, snet, 0, + iph->daddr, 0); + + if (!ct || !ip_vs_check_template(ct)) { +@@ -325,14 +325,14 @@ ip_vs_sched_persist(struct ip_vs_service + snet, 0, + htonl(svc->fwmark), 0, + dest->addr, 0, +- 0, ++ IP_VS_CONN_F_TEMPLATE, + dest); + else + ct = ip_vs_conn_new(iph->protocol, + snet, 0, + iph->daddr, 0, + dest->addr, 0, +- 0, ++ IP_VS_CONN_F_TEMPLATE, + dest); + if (ct == NULL) + return NULL; +Index: linux-2.6.13.y/net/ipv4/ipvs/ip_vs_sync.c +=================================================================== +--- linux-2.6.13.y.orig/net/ipv4/ipvs/ip_vs_sync.c ++++ linux-2.6.13.y/net/ipv4/ipvs/ip_vs_sync.c +@@ -297,16 +297,24 @@ static void ip_vs_process_message(const + + p = (char *)buffer + sizeof(struct ip_vs_sync_mesg); + for (i=0; inr_conns; i++) { ++ unsigned flags; ++ + s = (struct ip_vs_sync_conn *)p; +- cp = ip_vs_conn_in_get(s->protocol, +- s->caddr, s->cport, +- s->vaddr, s->vport); ++ flags = ntohs(s->flags); ++ if (!(flags & IP_VS_CONN_F_TEMPLATE)) ++ cp = ip_vs_conn_in_get(s->protocol, ++ s->caddr, s->cport, ++ s->vaddr, s->vport); ++ else ++ cp = ip_vs_ct_in_get(s->protocol, ++ s->caddr, s->cport, ++ s->vaddr, s->vport); + if (!cp) { + cp = ip_vs_conn_new(s->protocol, + s->caddr, s->cport, + s->vaddr, s->vport, + s->daddr, s->dport, +- ntohs(s->flags), NULL); ++ flags, NULL); + if (!cp) { + IP_VS_ERR("ip_vs_conn_new failed\n"); + return; +@@ -315,11 +323,11 @@ static void ip_vs_process_message(const + } else if (!cp->dest) { + /* it is an entry created by the synchronization */ + cp->state = ntohs(s->state); +- cp->flags = ntohs(s->flags) | IP_VS_CONN_F_HASHED; ++ cp->flags = flags | IP_VS_CONN_F_HASHED; + } /* Note that we don't touch its state and flags + if it is a normal entry. */ + +- if (ntohs(s->flags) & IP_VS_CONN_F_SEQ_MASK) { ++ if (flags & IP_VS_CONN_F_SEQ_MASK) { + opt = (struct ip_vs_sync_conn_options *)&s[1]; + memcpy(&cp->in_seq, opt, sizeof(*opt)); + p += FULL_CONN_SIZE; diff --git a/queue/series b/queue/series index e090ff58dca..b2d8e84baaf 100644 --- a/queue/series +++ b/queue/series @@ -1,3 +1,4 @@ yenta-oops-fix.patch fix-de_thread-BUG_ON.patch ipv6-fix-per-socket-multicast-filtering.patch +ipvs-ip_vs_ftp-breaks-connections.patch -- 2.47.3