From 7045590d8a0af9a16ab01e96340060b3bf9dd72c Mon Sep 17 00:00:00 2001 From: Emeric Brun Date: Wed, 20 Oct 2021 10:49:53 +0200 Subject: [PATCH] BUG/MAJOR: dns: attempt to lock globaly for msg waiter list instead of use barrier The barrier is insufficient here to protect the waiters list as we can definitely catch situations where ds->waiter shows an inconsistency whereby the element is not attached when entering the "if" block and is already attached when attaching it later. This patch uses a larger lock to maintain consistency. Without it the code would crash in 30-180 minutes under heavy stress, always showing the same problem (ds->waiter->n->p != &ds->waiter). Now it seems to always resist, suggesting that this was indeed the problem. This will have to be backported to 2.4. --- src/dns.c | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/src/dns.c b/src/dns.c index 433b554ad7..63bb52f25f 100644 --- a/src/dns.c +++ b/src/dns.c @@ -175,18 +175,9 @@ ssize_t dns_recv_nameserver(struct dns_nameserver *ns, void *data, size_t size) ds->rx_msg.len = 0; - /* This barrier is here to ensure that all data is - * stored if the appctx detect the elem is out of the list */ - __ha_barrier_store(); - LIST_DEL_INIT(&ds->waiter); if (ds->appctx) { - /* This second barrier is here to ensure that - * the waked up appctx won't miss that the - * elem is removed from the list */ - __ha_barrier_store(); - /* awake appctx because it may have other * message to receive */ @@ -625,7 +616,10 @@ read: * Note: we need a load barrier here to not miss the * delete from the list */ - __ha_barrier_load(); + + /* lock the dns_stream_server containing lists heads */ + HA_SPIN_LOCK(DNS_LOCK, &ds->dss->lock); + if (!LIST_INLIST(&ds->waiter)) { while (1) { uint16_t query_id; @@ -701,18 +695,12 @@ read: pool_free(dns_query_pool, query); ds->onfly_queries--; - /* lock the dns_stream_server containing lists heads */ - HA_SPIN_LOCK(DNS_LOCK, &ds->dss->lock); - /* the dns_session is also added in queue of the * wait_sess list where the task processing * response will pop available responses */ LIST_APPEND(&ds->dss->wait_sess, &ds->waiter); - /* lock the dns_stream_server containing lists heads */ - HA_SPIN_UNLOCK(DNS_LOCK, &ds->dss->lock); - /* awake the task processing the responses */ task_wakeup(ds->dss->task_rsp, TASK_WOKEN_INIT); @@ -722,13 +710,14 @@ read: if (!LIST_INLIST(&ds->waiter)) { /* there is no more pending data to read and the con was closed by the server side */ if (!co_data(si_oc(si)) && (si_oc(si)->flags & CF_SHUTW)) { + HA_SPIN_UNLOCK(DNS_LOCK, &ds->dss->lock); goto close; } } } - + HA_SPIN_UNLOCK(DNS_LOCK, &ds->dss->lock); return; close: si_shutw(si); -- 2.39.5