]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Fix] Prevent race conditions and fd reuse bugs in fuzzy TCP connections
authorVsevolod Stakhov <vsevolod@rspamd.com>
Wed, 22 Oct 2025 07:31:48 +0000 (08:31 +0100)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Wed, 22 Oct 2025 10:17:46 +0000 (11:17 +0100)
Fix critical race conditions in TCP connection management for parallel message processing:

1. Add connection to pool BEFORE starting event watcher to prevent duplicate connections
   when multiple tasks try to connect simultaneously
2. Close fd and set to -1 immediately on connection failure to prevent fd reuse bugs
3. Create fuzzy_tcp_connection_close() helper to ensure consistent cleanup
4. Set conn->fd = -1 after close in connection_free to prevent double-close

These changes prevent crashes when processing thousands of messages in parallel where:
- Multiple tasks create duplicate connections to same upstream
- OS reuses fd numbers after close, causing wrong socket operations
- Event handlers access stale fd values after connection cleanup

src/plugins/fuzzy_check.c

index 052ad092c4f34b50f693fca9ad79a6773994de21..a744d2c2093c44d9712c3dec6bc99d5ea886759d 100644 (file)
@@ -535,6 +535,7 @@ static void fuzzy_tcp_read_handler(struct fuzzy_tcp_connection *conn);
 static gboolean fuzzy_tcp_send_command(struct fuzzy_tcp_connection *conn,
                                                                           GPtrArray *commands,
                                                                           struct fuzzy_client_session *session);
+static void fuzzy_tcp_connection_close(struct fuzzy_tcp_connection *conn);
 static void fuzzy_tcp_connection_cleanup(struct fuzzy_tcp_connection *conn, const char *reason);
 static gboolean fuzzy_check_session_is_completed(struct fuzzy_client_session *session);
 
@@ -558,6 +559,7 @@ fuzzy_tcp_connection_free(struct fuzzy_tcp_connection *conn)
        if (conn->fd != -1) {
                rspamd_ev_watcher_stop(conn->event_loop, &conn->ev);
                close(conn->fd);
+               conn->fd = -1;
        }
 
        if (conn->write_queue) {
@@ -699,14 +701,16 @@ fuzzy_tcp_connect_async(struct fuzzy_rule *rule,
                conn->encrypted = FALSE;
        }
 
+       /* Store in connection pool array BEFORE starting event watcher
+        * This prevents race condition where another task might create duplicate connection
+        * Array takes ownership of initial reference */
+       g_ptr_array_add(rule->tcp_connections, conn);
+
        /* Initialize event watcher for connection establishment (wait for write) */
        rspamd_ev_watcher_init(&conn->ev, fd, EV_WRITE,
                                                   fuzzy_tcp_io_handler, conn);
        rspamd_ev_watcher_start(conn->event_loop, &conn->ev, rule->tcp_timeout);
 
-       /* Store in connection pool array - array takes ownership of initial reference */
-       g_ptr_array_add(rule->tcp_connections, conn);
-
        msg_info_task("fuzzy_tcp: initiating connection to %s for rule %s",
                                  rspamd_inet_address_to_string_pretty(addr),
                                  rule->name);
@@ -785,6 +789,25 @@ fuzzy_tcp_get_or_create_connection(struct fuzzy_rule *rule,
        return conn;
 }
 
+/**
+ * Close connection socket and mark as failed
+ * Stops event watcher, closes fd, marks connection as failed
+ * Should be called before fuzzy_tcp_connection_cleanup
+ */
+static void
+fuzzy_tcp_connection_close(struct fuzzy_tcp_connection *conn)
+{
+       if (conn->fd != -1) {
+               rspamd_ev_watcher_stop(conn->event_loop, &conn->ev);
+               close(conn->fd);
+               conn->fd = -1;
+       }
+       conn->failed = TRUE;
+       conn->last_failure = rspamd_get_calendar_ticks();
+       conn->connecting = FALSE;
+       conn->connected = FALSE;
+}
+
 /**
  * Cleanup pending requests for a failed connection
  * Removes all pending commands associated with this connection
@@ -960,11 +983,8 @@ fuzzy_tcp_io_handler(int fd, short what, gpointer ud)
                                 elapsed,
                                 conn->connecting,
                                 conn->connected);
-               conn->failed = TRUE;
-               conn->last_failure = rspamd_get_calendar_ticks();
-               conn->connecting = FALSE;
+               fuzzy_tcp_connection_close(conn);
                fuzzy_tcp_connection_cleanup(conn, "connection timeout");
-               rspamd_ev_watcher_stop(conn->event_loop, &conn->ev);
                rspamd_upstream_fail(conn->server, TRUE, "connection timeout");
                FUZZY_TCP_RELEASE(conn);
                return;
@@ -979,11 +999,8 @@ fuzzy_tcp_io_handler(int fd, short what, gpointer ud)
                                                 conn->rule->name,
                                                 rspamd_upstream_name(conn->server),
                                                 strerror(errno));
-                               conn->failed = TRUE;
-                               conn->last_failure = rspamd_get_calendar_ticks();
-                               conn->connecting = FALSE;
+                               fuzzy_tcp_connection_close(conn);
                                fuzzy_tcp_connection_cleanup(conn, "getsockopt failed");
-                               rspamd_ev_watcher_stop(conn->event_loop, &conn->ev);
                                rspamd_upstream_fail(conn->server, TRUE, "getsockopt failed");
                                FUZZY_TCP_RELEASE(conn);
                                return;
@@ -996,11 +1013,8 @@ fuzzy_tcp_io_handler(int fd, short what, gpointer ud)
                                                 conn->rule->name,
                                                 rspamd_upstream_name(conn->server),
                                                 strerror(so_error));
-                               conn->failed = TRUE;
-                               conn->last_failure = rspamd_get_calendar_ticks();
-                               conn->connecting = FALSE;
+                               fuzzy_tcp_connection_close(conn);
                                fuzzy_tcp_connection_cleanup(conn, error_buf);
-                               rspamd_ev_watcher_stop(conn->event_loop, &conn->ev);
                                rspamd_upstream_fail(conn->server, TRUE, strerror(so_error));
                                FUZZY_TCP_RELEASE(conn);
                                return;
@@ -1086,20 +1100,16 @@ fuzzy_tcp_write_handler(struct fuzzy_tcp_connection *conn)
                                        conn->rule->name,
                                        rspamd_upstream_name(conn->server),
                                        strerror(errno));
-                       conn->failed = TRUE;
-                       conn->last_failure = rspamd_get_calendar_ticks();
+                       fuzzy_tcp_connection_close(conn);
                        fuzzy_tcp_connection_cleanup(conn, error_buf);
-                       rspamd_ev_watcher_stop(conn->event_loop, &conn->ev);
                        return;
                }
                else if (r == 0) {
                        msg_info("fuzzy_tcp: connection closed by %s for rule %s",
                                         rspamd_upstream_name(conn->server),
                                         conn->rule->name);
-                       conn->failed = TRUE;
-                       conn->last_failure = rspamd_get_calendar_ticks();
+                       fuzzy_tcp_connection_close(conn);
                        fuzzy_tcp_connection_cleanup(conn, "connection closed by peer");
-                       rspamd_ev_watcher_stop(conn->event_loop, &conn->ev);
                        return;
                }
 
@@ -1377,10 +1387,8 @@ fuzzy_tcp_read_handler(struct fuzzy_tcp_connection *conn)
        if (available_space == 0) {
                msg_err("fuzzy_tcp: read buffer full for rule %s, closing connection",
                                conn->rule->name);
-               conn->failed = TRUE;
-               conn->last_failure = rspamd_get_calendar_ticks();
+               fuzzy_tcp_connection_close(conn);
                fuzzy_tcp_connection_cleanup(conn, "read buffer overflow");
-               rspamd_ev_watcher_stop(conn->event_loop, &conn->ev);
                return;
        }
 
@@ -1394,20 +1402,16 @@ fuzzy_tcp_read_handler(struct fuzzy_tcp_connection *conn)
                rspamd_snprintf(error_buf, sizeof(error_buf), "read error: %s", strerror(errno));
                msg_err("fuzzy_tcp: read error for rule %s: %s",
                                conn->rule->name, strerror(errno));
-               conn->failed = TRUE;
-               conn->last_failure = rspamd_get_calendar_ticks();
+               fuzzy_tcp_connection_close(conn);
                fuzzy_tcp_connection_cleanup(conn, error_buf);
-               rspamd_ev_watcher_stop(conn->event_loop, &conn->ev);
                return;
        }
        else if (r == 0) {
                msg_info("fuzzy_tcp: connection closed by %s for rule %s",
                                 rspamd_upstream_name(conn->server),
                                 conn->rule->name);
-               conn->failed = TRUE;
-               conn->last_failure = rspamd_get_calendar_ticks();
+               fuzzy_tcp_connection_close(conn);
                fuzzy_tcp_connection_cleanup(conn, "connection closed by peer");
-               rspamd_ev_watcher_stop(conn->event_loop, &conn->ev);
                return;
        }
 
@@ -1456,10 +1460,8 @@ fuzzy_tcp_read_handler(struct fuzzy_tcp_connection *conn)
                        msg_err("fuzzy_tcp: invalid frame length %d from %s, closing",
                                        (int) frame_len,
                                        rspamd_upstream_name(conn->server));
-                       conn->failed = TRUE;
-                       conn->last_failure = rspamd_get_calendar_ticks();
+                       fuzzy_tcp_connection_close(conn);
                        fuzzy_tcp_connection_cleanup(conn, error_buf);
-                       rspamd_ev_watcher_stop(conn->event_loop, &conn->ev);
                        return;
                }