]> git.ipfire.org Git - thirdparty/bind9.git/commitdiff
Fix a race condition in xfrin_recv_done() when calling xfrin_reset()
authorAram Sargsyan <aram@isc.org>
Wed, 4 Mar 2026 16:25:33 +0000 (16:25 +0000)
committerAram Sargsyan <aram@isc.org>
Fri, 3 Apr 2026 11:01:34 +0000 (11:01 +0000)
When the xfrin_recv_done() function decides to retry the transfer
using AXFR because of a previous error, it calls the xfrin_reset()
function which calls dns_db_closeversion() on 'xfr->ver'. The problem
is that the ixfr processing of a previous message could be still
in process in a worker thread, which then can use freed 'xfr->ver'.

If there is an ongoing worker thread delay the AXFR retry until after
the worker thread has finished its work.

lib/dns/xfrin.c

index 20bd836ef437dc85e5349c0cf912d16e7be4f205..eca48700a92e8a36f6a2baf24b02112a0c4a1235 100644 (file)
@@ -141,7 +141,7 @@ struct dns_xfrin {
 
        _Atomic xfrin_state_t state;
        uint32_t expireopt;
-       bool edns, expireoptset;
+       bool edns, expireoptset, retry_axfr;
        atomic_bool is_ixfr;
 
        /*
@@ -270,6 +270,10 @@ xfrin_idledout(void *);
 static void
 xfrin_minratecheck(void *);
 static void
+xfrin_reset(dns_xfrin_t *xfr);
+static void
+xfrin_ixfrcleanup(dns_xfrin_t *xfr);
+static void
 xfrin_fail(dns_xfrin_t *xfr, isc_result_t result, const char *msg);
 static isc_result_t
 render(dns_message_t *msg, isc_mem_t *mctx, isc_buffer_t *buf);
@@ -627,7 +631,9 @@ ixfr_apply_done(void *arg) {
        CHECK(result);
 
        /* Reschedule */
-       if (!cds_wfcq_empty(&xfr->diff_head, &xfr->diff_tail)) {
+       if (!xfr->retry_axfr &&
+           !cds_wfcq_empty(&xfr->diff_head, &xfr->diff_tail))
+       {
                isc_work_enqueue(xfr->loop, ixfr_apply, ixfr_apply_done, work);
                return;
        }
@@ -637,7 +643,18 @@ cleanup:
 
        isc_mem_put(xfr->mctx, work, sizeof(*work));
 
-       if (result == ISC_R_SUCCESS) {
+       /*
+        * Don't retry with AXFR (even if it was requested) because there was
+        * an error or the transfer is shutting down. In case if it _was_ an
+        * error, xfrin_fail() will return a special result code which will
+        * still result in AXFR retry from the initiator of the transfer after
+        * the failure has been is logged.
+        */
+       if (result != ISC_R_SUCCESS) {
+               xfr->retry_axfr = false;
+       }
+
+       if (!xfr->retry_axfr && result == ISC_R_SUCCESS) {
                dns_db_closeversion(xfr->db, &xfr->ver, true);
                dns_zone_markdirty(xfr->zone);
 
@@ -647,7 +664,21 @@ cleanup:
        } else {
                dns_db_closeversion(xfr->db, &xfr->ver, false);
 
-               xfrin_fail(xfr, result, "failed while processing responses");
+               if (result != ISC_R_SUCCESS) {
+                       xfrin_fail(xfr, result,
+                                  "failed while processing responses");
+               }
+       }
+
+       if (xfr->retry_axfr) {
+               xfr->reqtype = dns_rdatatype_soa;
+               atomic_store(&xfr->state, XFRST_SOAQUERY);
+
+               xfrin_reset(xfr);
+               result = xfrin_start(xfr);
+               if (result != ISC_R_SUCCESS) {
+                       xfrin_fail(xfr, result, "failed setting up socket");
+               }
        }
 
        dns_xfrin_detach(&xfr);
@@ -1175,13 +1206,18 @@ xfrin_cancelio(dns_xfrin_t *xfr) {
 static void
 xfrin_reset(dns_xfrin_t *xfr) {
        REQUIRE(VALID_XFRIN(xfr));
+       REQUIRE(!xfr->diff_running);
 
        xfrin_log(xfr, ISC_LOG_INFO, "resetting");
 
+       xfr->retry_axfr = false;
+
        if (xfr->lasttsig != NULL) {
                isc_buffer_free(&xfr->lasttsig);
        }
 
+       xfrin_ixfrcleanup(xfr);
+
        dns_diff_clear(&xfr->diff);
        xfr->ixfr.diffs = 0;
 
@@ -1844,6 +1880,11 @@ xfrin_recv_done(isc_result_t result, isc_region_t *region, void *arg) {
                {
                        xfr->edns = false;
                        dns_message_detach(&msg);
+                       /*
+                        * With these states (see the conditions above) the diff
+                        * process can't be currently in the running state, so
+                        * it is safe to reset the 'xfr' and retry right away.
+                        */
                        xfrin_reset(xfr);
                        goto try_again;
                } else if (result == ISC_R_SUCCESS &&
@@ -1873,6 +1914,12 @@ xfrin_recv_done(isc_result_t result, isc_region_t *region, void *arg) {
        try_axfr:
                LIBDNS_XFRIN_RECV_TRY_AXFR(xfr, xfr->info, result);
                dns_message_detach(&msg);
+               /* If there is a running worker thread then delay the retry. */
+               if (xfr->diff_running) {
+                       xfr->retry_axfr = true;
+                       dns_xfrin_detach(&xfr);
+                       return;
+               }
                xfrin_reset(xfr);
                xfr->reqtype = dns_rdatatype_soa;
                atomic_store(&xfr->state, XFRST_SOAQUERY);
@@ -2070,6 +2117,19 @@ cleanup:
        dns_xfrin_detach(&xfr);
 }
 
+static void
+xfrin_ixfrcleanup(dns_xfrin_t *xfr) {
+       struct cds_wfcq_node *node, *next;
+       __cds_wfcq_for_each_blocking_safe(&xfr->diff_head, &xfr->diff_tail,
+                                         node, next) {
+               ixfr_apply_data_t *data =
+                       caa_container_of(node, ixfr_apply_data_t, wfcq_node);
+               /* We need to clear and free all data chunks */
+               dns_diff_clear(&data->diff);
+               isc_mem_put(xfr->mctx, data, sizeof(*data));
+       }
+}
+
 static void
 xfrin_destroy(dns_xfrin_t *xfr) {
        uint64_t msecs, persec;
@@ -2120,15 +2180,7 @@ xfrin_destroy(dns_xfrin_t *xfr) {
                  sep, expireopt);
 
        /* Cleanup unprocessed IXFR data */
-       struct cds_wfcq_node *node, *next;
-       __cds_wfcq_for_each_blocking_safe(&xfr->diff_head, &xfr->diff_tail,
-                                         node, next) {
-               ixfr_apply_data_t *data =
-                       caa_container_of(node, ixfr_apply_data_t, wfcq_node);
-               /* We need to clear and free all data chunks */
-               dns_diff_clear(&data->diff);
-               isc_mem_put(xfr->mctx, data, sizeof(*data));
-       }
+       xfrin_ixfrcleanup(xfr);
 
        /* Cleanup unprocessed AXFR data */
        dns_diff_clear(&xfr->diff);