From: Emeric Brun Date: Fri, 23 Apr 2021 10:21:26 +0000 (+0200) Subject: BUG/MEDIUM: peers: re-work refcnt on table to protect against flush X-Git-Tag: v2.4-dev17~2 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2cc201f97e409f926b9221fdafe7431877ba1dc6;p=thirdparty%2Fhaproxy.git BUG/MEDIUM: peers: re-work refcnt on table to protect against flush In proxy.c, when process is stopping we try to flush tables content using 'stktable_trash_oldest'. A check on a counter "table->syncing" was made to verify if there is no pending resync in progress. But using multiple threads this counter can be increased by an other thread only after some delay, so the content of some tables can be trashed earlier and won't be pushed to the new process (after reload, some tables appear reset and others don't). This patch re-names the counter "table->syncing" to "table->refcnt" and the counter is increased during configuration parsing (registering a table to a peer section) to protect tables during runtime and until resync of a new process has succeeded or failed. The inc/dec operations are now made using atomic operations because multiple peer sections could refer to the same table in futur. This fix addresses github #1216. This patch should be backported on all branches multi-thread support (v >= 1.8) --- diff --git a/include/haproxy/stick_table-t.h b/include/haproxy/stick_table-t.h index bae32d9106..237bfbe682 100644 --- a/include/haproxy/stick_table-t.h +++ b/include/haproxy/stick_table-t.h @@ -192,7 +192,8 @@ struct stktable { unsigned int localupdate; unsigned int commitupdate;/* used to identify the latest local updates pending for sync */ - unsigned int syncing; /* number of sync tasks watching this table now */ + unsigned int refcnt; /* number of local peer over all peers sections + attached to this table */ union { struct peers *p; /* sync peers */ char *name; diff --git a/src/peers.c b/src/peers.c index dd1d2596e4..89d193d247 100644 --- a/src/peers.c +++ b/src/peers.c @@ -3038,9 +3038,6 @@ struct task *process_peer_sync(struct task * task, void *context, unsigned int s /* add DO NOT STOP flag if not present */ _HA_ATOMIC_INC(&jobs); peers->flags |= PEERS_F_DONOTSTOP; - ps = peers->local; - for (st = ps->tables; st ; st = st->next) - st->table->syncing++; /* disconnect all connected peers to process a local sync * this must be done only the first time we are switching @@ -3066,7 +3063,7 @@ struct task *process_peer_sync(struct task * task, void *context, unsigned int s _HA_ATOMIC_DEC(&jobs); peers->flags &= ~PEERS_F_DONOTSTOP; for (st = ps->tables; st ; st = st->next) - st->table->syncing--; + HA_ATOMIC_DEC(&st->table->refcnt); } } else if (!ps->appctx) { @@ -3092,7 +3089,7 @@ struct task *process_peer_sync(struct task * task, void *context, unsigned int s _HA_ATOMIC_DEC(&jobs); peers->flags &= ~PEERS_F_DONOTSTOP; for (st = ps->tables; st ; st = st->next) - st->table->syncing--; + HA_ATOMIC_DEC(&st->table->refcnt); } } } @@ -3306,6 +3303,13 @@ void peers_register_table(struct peers *peers, struct stktable *table) id = curpeer->tables->local_id; st->local_id = id + 1; + /* If peer is local we inc table + * refcnt to protect against flush + * until this process pushed all + * table content to the new one + */ + if (curpeer->local) + HA_ATOMIC_INC(&st->table->refcnt); curpeer->tables = st; } @@ -3493,8 +3497,8 @@ static int peers_dump_peer(struct buffer *msg, struct stream_interface *si, stru st->last_acked, st->last_pushed, st->last_get, st->teaching_origin, st->update); chunk_appendf(&trash, "\n table:%p id=%s update=%u localupdate=%u" - " commitupdate=%u syncing=%u", - t, t->id, t->update, t->localupdate, t->commitupdate, t->syncing); + " commitupdate=%u refcnt=%u", + t, t->id, t->update, t->localupdate, t->commitupdate, t->refcnt); if (flags & PEERS_SHOW_F_DICT) { chunk_appendf(&trash, "\n TX dictionary cache:"); count = 0; diff --git a/src/proxy.c b/src/proxy.c index c3859e6056..c209b23cee 100644 --- a/src/proxy.c +++ b/src/proxy.c @@ -1851,7 +1851,13 @@ struct task *manage_proxy(struct task *t, void *context, unsigned int state) * However we protect tables that are being synced to peers. */ if (unlikely(stopping && p->disabled && p->table && p->table->current)) { - if (!p->table->syncing) { + + if (!p->table->refcnt) { + /* !table->refcnt means there + * is no more pending full resync + * to push to a new process and + * we are free to flush the table. + */ stktable_trash_oldest(p->table, p->table->current); pool_gc(NULL); }