]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
BUG/MEDIUM: peers: re-work refcnt on table to protect against flush
authorEmeric Brun <ebrun@haproxy.com>
Fri, 23 Apr 2021 10:21:26 +0000 (12:21 +0200)
committerWilly Tarreau <w@1wt.eu>
Fri, 23 Apr 2021 16:03:06 +0000 (18:03 +0200)
In proxy.c, when process is stopping we try to flush tables content
using 'stktable_trash_oldest'. A check on a counter "table->syncing" was
made to verify if there is no pending resync in progress.
But using multiple threads this counter can be increased by an other thread
only after some delay, so the content of some tables can be trashed earlier and
won't be pushed to the new process (after reload, some tables appear reset and
others don't).

This patch re-names the counter "table->syncing" to "table->refcnt" and
the counter is increased during configuration parsing (registering a table to
a peer section) to protect tables during runtime and until resync of a new
process has succeeded or failed.

The inc/dec operations are now made using atomic operations
because multiple peer sections could refer to the same table in futur.

This fix addresses github #1216.

This patch should be backported on all branches multi-thread support (v >= 1.8)

include/haproxy/stick_table-t.h
src/peers.c
src/proxy.c

index bae32d9106df76a2f81b747afeaa088a9123fe94..237bfbe682bf5c39e5c62f5910a2d62dc2eba9a0 100644 (file)
@@ -192,7 +192,8 @@ struct stktable {
        unsigned int localupdate;
        unsigned int commitupdate;/* used to identify the latest local updates
                                     pending for sync */
-       unsigned int syncing;     /* number of sync tasks watching this table now */
+       unsigned int refcnt;     /* number of local peer over all peers sections
+                                   attached to this table */
        union {
                struct peers *p; /* sync peers */
                char *name;
index dd1d2596e46d2d4e2cf90eab50d4d46e2d6eee2b..89d193d24737f92b77372fa416e366ee88797b5c 100644 (file)
@@ -3038,9 +3038,6 @@ struct task *process_peer_sync(struct task * task, void *context, unsigned int s
                                /* add DO NOT STOP flag if not present */
                                _HA_ATOMIC_INC(&jobs);
                                peers->flags |= PEERS_F_DONOTSTOP;
-                               ps = peers->local;
-                               for (st = ps->tables; st ; st = st->next)
-                                       st->table->syncing++;
 
                                /* disconnect all connected peers to process a local sync
                                 * this must be done only the first time we are switching
@@ -3066,7 +3063,7 @@ struct task *process_peer_sync(struct task * task, void *context, unsigned int s
                                _HA_ATOMIC_DEC(&jobs);
                                peers->flags &= ~PEERS_F_DONOTSTOP;
                                for (st = ps->tables; st ; st = st->next)
-                                       st->table->syncing--;
+                                       HA_ATOMIC_DEC(&st->table->refcnt);
                        }
                }
                else if (!ps->appctx) {
@@ -3092,7 +3089,7 @@ struct task *process_peer_sync(struct task * task, void *context, unsigned int s
                                        _HA_ATOMIC_DEC(&jobs);
                                        peers->flags &= ~PEERS_F_DONOTSTOP;
                                        for (st = ps->tables; st ; st = st->next)
-                                               st->table->syncing--;
+                                               HA_ATOMIC_DEC(&st->table->refcnt);
                                }
                        }
                }
@@ -3306,6 +3303,13 @@ void peers_register_table(struct peers *peers, struct stktable *table)
                        id = curpeer->tables->local_id;
                st->local_id = id + 1;
 
+               /* If peer is local we inc table
+                * refcnt to protect against flush
+                * until this process pushed all
+                * table content to the new one
+                */
+               if (curpeer->local)
+                       HA_ATOMIC_INC(&st->table->refcnt);
                curpeer->tables = st;
        }
 
@@ -3493,8 +3497,8 @@ static int peers_dump_peer(struct buffer *msg, struct stream_interface *si, stru
                                      st->last_acked, st->last_pushed, st->last_get,
                                      st->teaching_origin, st->update);
                        chunk_appendf(&trash, "\n              table:%p id=%s update=%u localupdate=%u"
-                                     " commitupdate=%u syncing=%u",
-                                     t, t->id, t->update, t->localupdate, t->commitupdate, t->syncing);
+                                     " commitupdate=%u refcnt=%u",
+                                     t, t->id, t->update, t->localupdate, t->commitupdate, t->refcnt);
                        if (flags & PEERS_SHOW_F_DICT) {
                                chunk_appendf(&trash, "\n        TX dictionary cache:");
                                count = 0;
index c3859e6056ec78810a261b3fe5af78f3be273350..c209b23ceea632417214cfeb87fc36f75de5d273 100644 (file)
@@ -1851,7 +1851,13 @@ struct task *manage_proxy(struct task *t, void *context, unsigned int state)
         * However we protect tables that are being synced to peers.
         */
        if (unlikely(stopping && p->disabled && p->table && p->table->current)) {
-               if (!p->table->syncing) {
+
+               if (!p->table->refcnt) {
+                       /* !table->refcnt means there
+                        * is no more pending full resync
+                        * to push to a new process and
+                        * we are free to flush the table.
+                        */
                        stktable_trash_oldest(p->table, p->table->current);
                        pool_gc(NULL);
                }