bool new_with_sport_idx = false;
u32 new_peer_gen_num = 0;
int new_npaths;
+ bool fan_out;
new_npaths = conn->c_npaths;
spin_lock_irqsave(&cp0->cp_lock, flags);
conn->c_cp0_mprds_catchup_tx_seq = cp0->cp_next_tx_seq;
spin_unlock_irqrestore(&cp0->cp_lock, flags);
+ fan_out = true;
+ } else {
+ fan_out = false;
}
+
/* if RDS_EXTHDR_NPATHS was not found, default to a single-path */
conn->c_npaths = max_t(int, new_npaths, 1);
if (conn->c_npaths > 1 &&
conn->c_trans->conn_slots_available)
- conn->c_trans->conn_slots_available(conn);
+ conn->c_trans->conn_slots_available(conn, fan_out);
}
/* rds_start_mprds() will synchronously start multiple paths when appropriate.
cpath = &conn->c_path[0];
}
+ /* If we're multipath capable and path 0 is down, queue reconnect
+ * and send a ping. This initiates the multipath handshake through
+ * rds_send_probe(), which sends RDS_EXTHDR_NPATHS to the peer,
+ * starting multipath capability negotiation.
+ */
+ if (conn->c_trans->t_mp_capable &&
+ !rds_conn_path_up(&conn->c_path[0])) {
+ /* Ensures that only one request is queued. And
+ * rds_send_ping() ensures that only one ping is
+ * outstanding.
+ */
+ if (!test_and_set_bit(RDS_RECONNECT_PENDING,
+ &conn->c_path[0].cp_flags))
+ queue_delayed_work(conn->c_path[0].cp_wq,
+ &conn->c_path[0].cp_conn_w, 0);
+ rds_send_ping(conn, 0);
+ }
+
rm->m_conn_path = cpath;
/* Parse any control messages the user may have included. */
tcp_sock_set_keepintvl(sock->sk, keepidle);
}
-/* rds_tcp_accept_one_path(): if accepting on cp_index > 0, make sure the
- * client's ipaddr < server's ipaddr. Otherwise, close the accepted
- * socket and force a reconneect from smaller -> larger ip addr. The reason
- * we special case cp_index 0 is to allow the rds probe ping itself to itself
- * get through efficiently.
- */
-static struct rds_tcp_connection *
-rds_tcp_accept_one_path(struct rds_connection *conn, struct socket *sock)
+static int
+rds_tcp_get_peer_sport(struct socket *sock)
{
union {
struct sockaddr_storage storage;
struct sockaddr_in sin;
struct sockaddr_in6 sin6;
} saddr;
- int sport, npaths, i_min, i_max, i;
+ int sport;
- if (conn->c_with_sport_idx &&
- kernel_getpeername(sock, &saddr.addr) >= 0) {
- /* cp->cp_index is encoded in lowest bits of source-port */
+ if (kernel_getpeername(sock, &saddr.addr) >= 0) {
switch (saddr.addr.sa_family) {
case AF_INET:
sport = ntohs(saddr.sin.sin_port);
sport = -1;
}
+ return sport;
+}
+
+/* rds_tcp_accept_one_path(): if accepting on cp_index > 0, make sure the
+ * client's ipaddr < server's ipaddr. Otherwise, close the accepted
+ * socket and force a reconneect from smaller -> larger ip addr. The reason
+ * we special case cp_index 0 is to allow the rds probe ping itself to itself
+ * get through efficiently.
+ */
+static struct rds_tcp_connection *
+rds_tcp_accept_one_path(struct rds_connection *conn, struct socket *sock)
+{
+ int sport, npaths, i_min, i_max, i;
+
+ if (conn->c_with_sport_idx)
+ /* cp->cp_index is encoded in lowest bits of source-port */
+ sport = rds_tcp_get_peer_sport(sock);
+ else
+ sport = -1;
+
npaths = max_t(int, 1, conn->c_npaths);
if (sport >= 0) {
return NULL;
}
-void rds_tcp_conn_slots_available(struct rds_connection *conn)
+void rds_tcp_conn_slots_available(struct rds_connection *conn, bool fan_out)
{
struct rds_tcp_connection *tc;
struct rds_tcp_net *rtn;
+ struct socket *sock;
+ int sport, npaths;
if (rds_destroy_pending(conn))
return;
if (!rtn)
return;
+ sock = tc->t_sock;
+
+ /* During fan-out, check that the connection we already
+ * accepted in slot#0 carried the proper source port modulo.
+ */
+ if (fan_out && conn->c_with_sport_idx && sock &&
+ rds_addr_cmp(&conn->c_laddr, &conn->c_faddr) > 0) {
+ /* cp->cp_index is encoded in lowest bits of source-port */
+ sport = rds_tcp_get_peer_sport(sock);
+ npaths = max_t(int, 1, conn->c_npaths);
+ if (sport >= 0 && sport % npaths != 0)
+ /* peer initiated with a non-#0 lane first */
+ rds_conn_path_drop(conn->c_path, 0);
+ }
+
/* As soon as a connection went down,
* it is safe to schedule a "rds_tcp_accept_one"
* attempt even if there are no connections pending: