]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
af_unix: Refine wait_for_unix_gc().
authorKuniyuki Iwashima <kuniyu@google.com>
Sat, 15 Nov 2025 02:08:36 +0000 (02:08 +0000)
committerJakub Kicinski <kuba@kernel.org>
Wed, 19 Nov 2025 03:19:31 +0000 (19:19 -0800)
unix_tot_inflight is a poor metric, only telling the number of
inflight AF_UNXI sockets, and we should use unix_graph_state instead.

Also, if the receiver is catching up with the passed fds, the
sender does not need to schedule GC.

GC only helps unreferenced cyclic SCM_RIGHTS references, and in
such a situation, the malicious sendmsg() will continue to call
wait_for_unix_gc() and hit the UNIX_INFLIGHT_SANE_USER condition.

Let's make only malicious users schedule GC and wait for it to
finish if a cyclic reference exists during the previous GC run.

Then, sane users will pay almost no cost for wait_for_unix_gc().

Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20251115020935.2643121-6-kuniyu@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
net/unix/garbage.c

index 280b9b07b1c0fc5e1f968a58887ed7c1c8113c02..a6929226d40deca7ca1ff90a1aefd591b0902f45 100644 (file)
@@ -543,7 +543,7 @@ static void unix_walk_scc(struct sk_buff_head *hitlist)
        list_replace_init(&unix_visited_vertices, &unix_unvisited_vertices);
        swap(unix_vertex_unvisited_index, unix_vertex_grouped_index);
 
-       unix_graph_cyclic_sccs = cyclic_sccs;
+       WRITE_ONCE(unix_graph_cyclic_sccs, cyclic_sccs);
        WRITE_ONCE(unix_graph_state,
                   cyclic_sccs ? UNIX_GRAPH_CYCLIC : UNIX_GRAPH_NOT_CYCLIC);
 }
@@ -577,7 +577,7 @@ static void unix_walk_scc_fast(struct sk_buff_head *hitlist)
 
        list_replace_init(&unix_visited_vertices, &unix_unvisited_vertices);
 
-       unix_graph_cyclic_sccs = cyclic_sccs;
+       WRITE_ONCE(unix_graph_cyclic_sccs, cyclic_sccs);
        WRITE_ONCE(unix_graph_state,
                   cyclic_sccs ? UNIX_GRAPH_CYCLIC : UNIX_GRAPH_NOT_CYCLIC);
 }
@@ -629,19 +629,12 @@ void unix_schedule_gc(void)
        queue_work(system_dfl_wq, &unix_gc_work);
 }
 
-#define UNIX_INFLIGHT_TRIGGER_GC 16000
-#define UNIX_INFLIGHT_SANE_USER (SCM_MAX_FD * 8)
+#define UNIX_INFLIGHT_SANE_USER                (SCM_MAX_FD * 8)
 
 static void wait_for_unix_gc(struct scm_fp_list *fpl)
 {
-       /* If number of inflight sockets is insane,
-        * force a garbage collect right now.
-        *
-        * Paired with the WRITE_ONCE() in unix_inflight(),
-        * unix_notinflight(), and __unix_gc().
-        */
-       if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC)
-               unix_schedule_gc();
+       if (READ_ONCE(unix_graph_state) == UNIX_GRAPH_NOT_CYCLIC)
+               return;
 
        /* Penalise users who want to send AF_UNIX sockets
         * but whose sockets have not been received yet.
@@ -649,6 +642,8 @@ static void wait_for_unix_gc(struct scm_fp_list *fpl)
        if (READ_ONCE(fpl->user->unix_inflight) < UNIX_INFLIGHT_SANE_USER)
                return;
 
-       if (READ_ONCE(gc_in_progress))
+       unix_schedule_gc();
+
+       if (READ_ONCE(unix_graph_cyclic_sccs))
                flush_work(&unix_gc_work);
 }