]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
afs: Fix possible infinite loop with unresponsive servers
authorMarc Dionne <marc.dionne@auristor.com>
Mon, 23 Sep 2024 15:07:49 +0000 (16:07 +0100)
committerChristian Brauner <brauner@kernel.org>
Fri, 27 Sep 2024 16:29:19 +0000 (18:29 +0200)
A return code of 0 from afs_wait_for_one_fs_probe is an indication
that the endpoint state attached to the operation is stale and has
been superseded.  In that case the iteration needs to be restarted
so that the newer probe result state gets used.

Failure to do so can result in an tight infinite loop around the
iterate_address label, where all addresses are thought to be responsive
and have been tried, with nothing to refresh the endpoint state.

Fixes: 495f2ae9e355 ("afs: Fix fileserver rotation")
Reported-by: Markus Suvanto <markus.suvanto@gmail.com>
Link: https://lists.infradead.org/pipermail/linux-afs/2024-July/008628.html
cc: linux-afs@lists.infradead.org
Signed-off-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/20240906134019.131553-1-marc.dionne@auristor.com/
Link: https://lore.kernel.org/r/20240923150756.902363-6-dhowells@redhat.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
fs/afs/fs_probe.c
fs/afs/rotate.c

index 580de4adaaf652bf09d53854dba3a5ebd9a39e48..b516d05b0fefc5dcf84cc11c6ebeed795f061e89 100644 (file)
@@ -506,10 +506,10 @@ int afs_wait_for_one_fs_probe(struct afs_server *server, struct afs_endpoint_sta
        finish_wait(&server->probe_wq, &wait);
 
 dont_wait:
-       if (estate->responsive_set & ~exclude)
-               return 1;
        if (test_bit(AFS_ESTATE_SUPERSEDED, &estate->flags))
                return 0;
+       if (estate->responsive_set & ~exclude)
+               return 1;
        if (is_intr && signal_pending(current))
                return -ERESTARTSYS;
        if (timo == 0)
index ed09d4d4c2112189bfa724d577769e5ae8f15bdd..d612983d6f38e6390c9f3379b53349ce5db8d229 100644 (file)
@@ -632,8 +632,10 @@ iterate_address:
 wait_for_more_probe_results:
        error = afs_wait_for_one_fs_probe(op->server, op->estate, op->addr_tried,
                                          !(op->flags & AFS_OPERATION_UNINTR));
-       if (!error)
+       if (error == 1)
                goto iterate_address;
+       if (!error)
+               goto restart_from_beginning;
 
        /* We've now had a failure to respond on all of a server's addresses -
         * immediately probe them again and consider retrying the server.
@@ -644,10 +646,13 @@ wait_for_more_probe_results:
                error = afs_wait_for_one_fs_probe(op->server, op->estate, op->addr_tried,
                                                  !(op->flags & AFS_OPERATION_UNINTR));
                switch (error) {
-               case 0:
+               case 1:
                        op->flags &= ~AFS_OPERATION_RETRY_SERVER;
-                       trace_afs_rotate(op, afs_rotate_trace_retry_server, 0);
+                       trace_afs_rotate(op, afs_rotate_trace_retry_server, 1);
                        goto retry_server;
+               case 0:
+                       trace_afs_rotate(op, afs_rotate_trace_retry_server, 0);
+                       goto restart_from_beginning;
                case -ERESTARTSYS:
                        afs_op_set_error(op, error);
                        goto failed;