]> git.ipfire.org Git - thirdparty/bind9.git/commitdiff
Fix lock-order-inversion (potential deadlock) in dns_resolver_createfetch
authorOndřej Surý <ondrej@sury.org>
Mon, 19 Apr 2021 08:01:32 +0000 (10:01 +0200)
committerOndřej Surý <ondrej@sury.org>
Mon, 19 Apr 2021 20:31:37 +0000 (22:31 +0200)
There's a lock-order-inversion when running `zone_maintenance()` from
the timer while shutting down the server `shutdown_server()`.  This only
happens when the taskmgr scheduling is more relaxed and paralellized,
but the issue is real nevertheless.

The associated ThreadSanitizer warning:

    WARNING: ThreadSanitizer: lock-order-inversion (potential deadlock)
      Cycle in lock order graph: M1 (0x000000000001) => M2 (0x000000000000) => M1

      Mutex M2 acquired here while holding mutex M1 in thread T1:
#0 pthread_mutex_lock <null>
#1 dns_view_findzonecut lib/dns/view.c:1326:2
#2 fctx_create lib/dns/resolver.c:5144:13
#3 dns_resolver_createfetch lib/dns/resolver.c:10977:12
#4 zone_refreshkeys lib/dns/zone.c:10830:13
#5 zone_maintenance lib/dns/zone.c:11065:5
#6 zone_timer lib/dns/zone.c:14652:2
#7 task_run lib/isc/task.c:857:5
#8 isc_task_run lib/isc/task.c:944:10
#9 isc__nm_async_task lib/isc/netmgr/netmgr.c:730:24
#10 process_netievent lib/isc/netmgr/netmgr.c
#11 process_queue lib/isc/netmgr/netmgr.c:885:8
#12 process_tasks_queue lib/isc/netmgr/netmgr.c:756:10
#13 process_queues lib/isc/netmgr/netmgr.c:772:7
#14 async_cb lib/isc/netmgr/netmgr.c:671:2
#15 uv__async_io /home/ondrej/Projects/tsan/libuv/src/unix/async.c:163:5
#16 uv__io_poll /home/ondrej/Projects/tsan/libuv/src/unix/linux-core.c:462:11
#17 uv_run /home/ondrej/Projects/tsan/libuv/src/unix/core.c:392:5
#18 nm_thread lib/isc/netmgr/netmgr.c:597:11
#19 isc__trampoline_run lib/isc/trampoline.c:184:11

      Mutex M1 previously acquired by the same thread here:
#0 pthread_mutex_lock <null>
#1 zone_refreshkeys lib/dns/zone.c:10717:2
#2 zone_maintenance lib/dns/zone.c:11065:5
#3 zone_timer lib/dns/zone.c:14652:2
#4 task_run lib/isc/task.c:857:5
#5 isc_task_run lib/isc/task.c:944:10
#6 isc__nm_async_task lib/isc/netmgr/netmgr.c:730:24
#7 process_netievent lib/isc/netmgr/netmgr.c
#8 process_queue lib/isc/netmgr/netmgr.c:885:8
#9 process_tasks_queue lib/isc/netmgr/netmgr.c:756:10
#10 process_queues lib/isc/netmgr/netmgr.c:772:7
#11 async_cb lib/isc/netmgr/netmgr.c:671:2
#12 uv__async_io /home/ondrej/Projects/tsan/libuv/src/unix/async.c:163:5
#13 uv__io_poll /home/ondrej/Projects/tsan/libuv/src/unix/linux-core.c:462:11
#14 uv_run /home/ondrej/Projects/tsan/libuv/src/unix/core.c:392:5
#15 nm_thread lib/isc/netmgr/netmgr.c:597:11
#16 isc__trampoline_run lib/isc/trampoline.c:184:11

      Mutex M1 acquired here while holding mutex M2 in thread T2:
#0 pthread_mutex_lock <null>
#1 dns_zone_flush lib/dns/zone.c:11443:2
#2 view_flushanddetach lib/dns/view.c:657:5
#3 dns_view_flushanddetach lib/dns/view.c:690:2
#4 shutdown_server bin/named/server.c:10056:4
#5 task_run lib/isc/task.c:857:5
#6 isc_task_run lib/isc/task.c:944:10
#7 isc__nm_async_task lib/isc/netmgr/netmgr.c:730:24
#8 process_netievent lib/isc/netmgr/netmgr.c
#9 process_queue lib/isc/netmgr/netmgr.c:885:8
#10 process_tasks_queue lib/isc/netmgr/netmgr.c:756:10
#11 process_queues lib/isc/netmgr/netmgr.c:772:7
#12 async_cb lib/isc/netmgr/netmgr.c:671:2
#13 uv__async_io /home/ondrej/Projects/tsan/libuv/src/unix/async.c:163:5
#14 uv__io_poll /home/ondrej/Projects/tsan/libuv/src/unix/linux-core.c:462:11
#15 uv_run /home/ondrej/Projects/tsan/libuv/src/unix/core.c:392:5
#16 nm_thread lib/isc/netmgr/netmgr.c:597:11
#17 isc__trampoline_run lib/isc/trampoline.c:184:11

      Mutex M2 previously acquired by the same thread here:
#0 pthread_mutex_lock <null>
#1 view_flushanddetach lib/dns/view.c:645:3
#2 dns_view_flushanddetach lib/dns/view.c:690:2
#3 shutdown_server bin/named/server.c:10056:4
#4 task_run lib/isc/task.c:857:5
#5 isc_task_run lib/isc/task.c:944:10
#6 isc__nm_async_task lib/isc/netmgr/netmgr.c:730:24
#7 process_netievent lib/isc/netmgr/netmgr.c
#8 process_queue lib/isc/netmgr/netmgr.c:885:8
#9 process_tasks_queue lib/isc/netmgr/netmgr.c:756:10
#10 process_queues lib/isc/netmgr/netmgr.c:772:7
#11 async_cb lib/isc/netmgr/netmgr.c:671:2
#12 uv__async_io /home/ondrej/Projects/tsan/libuv/src/unix/async.c:163:5
#13 uv__io_poll /home/ondrej/Projects/tsan/libuv/src/unix/linux-core.c:462:11
#14 uv_run /home/ondrej/Projects/tsan/libuv/src/unix/core.c:392:5
#15 nm_thread lib/isc/netmgr/netmgr.c:597:11
#16 isc__trampoline_run lib/isc/trampoline.c:184:11

      Thread T2 (running) created by main thread at:
#0 pthread_create <null>
#1 isc_thread_create lib/isc/pthreads/thread.c:79:8
#2 isc_nm_start lib/isc/netmgr/netmgr.c:303:3
#3 create_managers bin/named/main.c:957:15
#4 setup bin/named/main.c:1267:11
#5 main bin/named/main.c:1558:2

      Thread T2 (running) created by main thread at:
#0 pthread_create <null>
#1 isc_thread_create lib/isc/pthreads/thread.c:79:8
#2 isc_nm_start lib/isc/netmgr/netmgr.c:303:3
#3 create_managers bin/named/main.c:957:15
#4 setup bin/named/main.c:1267:11
#5 main bin/named/main.c:1558:2

    SUMMARY: ThreadSanitizer: lock-order-inversion (potential deadlock) in __interceptor_pthread_mutex_lock

(cherry picked from commit 25d27851d8e66a6775654a67817ae53aa0e87317)

lib/dns/zone.c

index fa6ed8ab5b61dc843af8ef6accbe208507d6a173..a742bcb9cf23cb1a8e23a96c33b5b9a02b28dd32 100644 (file)
@@ -10760,6 +10760,7 @@ zone_refreshkeys(dns_zone_t *zone) {
 #ifdef ENABLE_AFL
                if (!dns_fuzzing_resolver) {
 #endif /* ifdef ENABLE_AFL */
+                       UNLOCK_ZONE(zone);
                        result = dns_resolver_createfetch(
                                zone->view->resolver, kname,
                                dns_rdatatype_dnskey, NULL, NULL, NULL, NULL, 0,
@@ -10769,6 +10770,7 @@ zone_refreshkeys(dns_zone_t *zone) {
                                0, NULL, zone->task, keyfetch_done, kfetch,
                                &kfetch->dnskeyset, &kfetch->dnskeysigset,
                                &kfetch->fetch);
+                       LOCK_ZONE(zone);
 #ifdef ENABLE_AFL
                } else {
                        result = ISC_R_FAILURE;