From: Andrew Tridgell Date: Mon, 18 Dec 2006 05:01:11 +0000 (+1100) Subject: added request_dmaster and reply_dmaster logic X-Git-Tag: tevent-0.9.20~348^2~2994 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=ab7185c2ce8f0b0d8b33494b7ec61bea2b47b6e3;p=thirdparty%2Fsamba.git added request_dmaster and reply_dmaster logic ctdb will now move the dmaster role between nodes after CTDB_MAX_LACOUNT consecutive accesses by the same node. (This used to be ctdb commit af87f587d8f70192ecac0125054bf9583a4849a7) --- diff --git a/ctdb/common/ctdb.c b/ctdb/common/ctdb.c index d50a1570ad7..e4150f83ca9 100644 --- a/ctdb/common/ctdb.c +++ b/ctdb/common/ctdb.c @@ -185,6 +185,14 @@ static void ctdb_recv_pkt(struct ctdb_context *ctdb, uint8_t *data, uint32_t len ctdb_reply_redirect(ctdb, hdr); break; + case CTDB_REQ_DMASTER: + ctdb_request_dmaster(ctdb, hdr); + break; + + case CTDB_REPLY_DMASTER: + ctdb_reply_dmaster(ctdb, hdr); + break; + default: printf("Packet with unknown operation %d\n", hdr->operation); talloc_free(hdr); diff --git a/ctdb/common/ctdb_call.c b/ctdb/common/ctdb_call.c index 406e8cd8f09..8424480d437 100644 --- a/ctdb/common/ctdb_call.c +++ b/ctdb/common/ctdb_call.c @@ -17,7 +17,10 @@ License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - +/* + see http://wiki.samba.org/index.php/Samba_%26_Clustering for + protocol design and packet details +*/ #include "includes.h" #include "lib/events/events.h" #include "system/network.h" @@ -162,6 +165,109 @@ static void ctdb_call_send_redirect(struct ctdb_context *ctdb, talloc_free(r); } +/* + send a dmaster request (give another node the dmaster for a record) + + This is always sent to the lmaster, which ensures that the lmaster + always knows who the dmaster is. The lmaster will then send a + CTDB_REPLY_DMASTER to the new dmaster +*/ +static void ctdb_call_send_dmaster(struct ctdb_context *ctdb, + struct ctdb_req_call *c, + struct ctdb_ltdb_header *header, + TDB_DATA *key, TDB_DATA *data) +{ + struct ctdb_req_dmaster *r; + struct ctdb_node *node; + int len; + + len = sizeof(*r) + key->dsize + data->dsize; + r = talloc_size(ctdb, len); + r->hdr.length = len; + r->hdr.operation = CTDB_REQ_DMASTER; + r->hdr.destnode = ctdb_lmaster(ctdb, key); + r->hdr.srcnode = ctdb->vnn; + r->hdr.reqid = c->hdr.reqid; + r->dmaster = header->laccessor; + r->keylen = key->dsize; + r->datalen = data->dsize; + memcpy(&r->data[0], key->dptr, key->dsize); + memcpy(&r->data[key->dsize], data->dptr, data->dsize); + + node = ctdb->nodes[r->hdr.destnode]; + + if (r->hdr.destnode == ctdb->vnn && !(ctdb->flags & CTDB_FLAG_SELF_CONNECT)) { + /* we are the lmaster - don't send to ourselves */ + ctdb_request_dmaster(ctdb, &r->hdr); + } else { + ctdb->methods->queue_pkt(node, (uint8_t *)r, r->hdr.length); + + /* update the ltdb to record the new dmaster */ + header->dmaster = r->hdr.destnode; + ctdb_ltdb_store(ctdb, *key, header, *data); + } + + talloc_free(r); +} + + +/* + called when a CTDB_REQ_DMASTER packet comes in + + this comes into the lmaster for a record when the current dmaster + wants to give up the dmaster role and give it to someone else +*/ +void ctdb_request_dmaster(struct ctdb_context *ctdb, struct ctdb_req_header *hdr) +{ + struct ctdb_req_dmaster *c = (struct ctdb_req_dmaster *)hdr; + struct ctdb_reply_dmaster *r; + TDB_DATA key, data; + struct ctdb_ltdb_header header; + int ret; + struct ctdb_node *node; + + key.dptr = c->data; + key.dsize = c->keylen; + data.dptr = c->data + c->keylen; + data.dsize = c->datalen; + + /* fetch the current record */ + ret = ctdb_ltdb_fetch(ctdb, key, &header, &data); + if (ret != 0) { + ctdb_fatal(ctdb, "ctdb_req_dmaster failed to fetch record"); + return; + } + + /* its a protocol error if the sending node is not the current dmaster */ + if (header.dmaster != hdr->srcnode) { + ctdb_fatal(ctdb, "dmaster request from non-master"); + return; + } + + header.dmaster = c->dmaster; + if (ctdb_ltdb_store(ctdb, key, &header, data) != 0) { + ctdb_fatal(ctdb, "ctdb_req_dmaster unable to update dmaster"); + return; + } + + /* send the CTDB_REPLY_DMASTER */ + r = talloc_size(ctdb, sizeof(*r) + data.dsize); + r->hdr.length = sizeof(*r) + data.dsize; + r->hdr.operation = CTDB_REPLY_DMASTER; + r->hdr.destnode = c->dmaster; + r->hdr.srcnode = ctdb->vnn; + r->hdr.reqid = hdr->reqid; + r->datalen = data.dsize; + memcpy(&r->data[0], data.dptr, data.dsize); + + node = ctdb->nodes[r->hdr.destnode]; + + ctdb->methods->queue_pkt(node, (uint8_t *)r, r->hdr.length); + + talloc_free(r); +} + + /* called when a CTDB_REQ_CALL packet comes in */ @@ -196,6 +302,15 @@ void ctdb_request_call(struct ctdb_context *ctdb, struct ctdb_req_header *hdr) return; } + /* if this nodes has done enough consecutive calls on the same record + then give them the record */ + if (header.laccessor == c->hdr.srcnode && + header.lacount >= CTDB_MAX_LACOUNT) { + ctdb_call_send_dmaster(ctdb, c, &header, &key, &data); + talloc_free(data.dptr); + return; + } + ctdb_call_local(ctdb, key, &header, &data, c->callid, call_data.dsize?&call_data:NULL, &reply_data, c->hdr.srcnode); @@ -227,9 +342,11 @@ struct ctdb_call_state { struct ctdb_req_call *c; struct ctdb_node *node; const char *errmsg; + TDB_DATA call_data; TDB_DATA reply_data; TDB_DATA key; int redirect_count; + struct ctdb_ltdb_header header; }; @@ -257,6 +374,42 @@ void ctdb_reply_call(struct ctdb_context *ctdb, struct ctdb_req_header *hdr) state->state = CTDB_CALL_DONE; } +/* + called when a CTDB_REPLY_DMASTER packet comes in + + This packet comes in from the lmaster response to a CTDB_REQ_CALL + request packet. It means that the current dmaster wants to give us + the dmaster role +*/ +void ctdb_reply_dmaster(struct ctdb_context *ctdb, struct ctdb_req_header *hdr) +{ + struct ctdb_reply_dmaster *c = (struct ctdb_reply_dmaster *)hdr; + struct ctdb_call_state *state; + TDB_DATA data; + + state = idr_find(ctdb->idr, hdr->reqid); + + data.dptr = c->data; + data.dsize = c->datalen; + + talloc_steal(state, c); + + /* we're now the dmaster - update our local ltdb with new header + and data */ + state->header.dmaster = ctdb->vnn; + + if (ctdb_ltdb_store(ctdb, state->key, &state->header, data) != 0) { + ctdb_fatal(ctdb, "ctdb_reply_dmaster store failed\n"); + return; + } + + ctdb_call_local(ctdb, state->key, &state->header, &data, state->c->callid, + state->call_data.dsize?&state->call_data:NULL, + &state->reply_data, ctdb->vnn); + + state->state = CTDB_CALL_DONE; +} + /* called when a CTDB_REPLY_ERROR packet comes in @@ -293,7 +446,7 @@ void ctdb_reply_redirect(struct ctdb_context *ctdb, struct ctdb_req_header *hdr) /* don't allow for too many redirects */ if (state->redirect_count++ == CTDB_MAX_REDIRECT) { - c->dmaster = ctdb_lmaster(ctdb, state->key); + c->dmaster = ctdb_lmaster(ctdb, &state->key); } /* send it off again */ @@ -404,12 +557,15 @@ struct ctdb_call_state *ctdb_call_send(struct ctdb_context *ctdb, memcpy(&state->c->data[0], key.dptr, key.dsize); if (call_data) { memcpy(&state->c->data[key.dsize], call_data->dptr, call_data->dsize); + state->call_data.dptr = &state->c->data[key.dsize]; + state->call_data.dsize = call_data->dsize; } state->key.dptr = &state->c->data[0]; state->key.dsize = key.dsize; - state->node = ctdb->nodes[header.dmaster]; - state->state = CTDB_CALL_WAIT; + state->node = ctdb->nodes[header.dmaster]; + state->state = CTDB_CALL_WAIT; + state->header = header; talloc_set_destructor(state, ctdb_call_destructor); diff --git a/ctdb/common/ctdb_ltdb.c b/ctdb/common/ctdb_ltdb.c index 1bd1bb54a91..881cf48630b 100644 --- a/ctdb/common/ctdb_ltdb.c +++ b/ctdb/common/ctdb_ltdb.c @@ -44,9 +44,9 @@ int ctdb_attach(struct ctdb_context *ctdb, const char *name, int tdb_flags, /* return the lmaster given a key */ -uint32_t ctdb_lmaster(struct ctdb_context *ctdb, TDB_DATA key) +uint32_t ctdb_lmaster(struct ctdb_context *ctdb, const TDB_DATA *key) { - return ctdb_hash(&key) % ctdb->num_nodes; + return ctdb_hash(key) % ctdb->num_nodes; } @@ -59,7 +59,7 @@ static void ltdb_initial_header(struct ctdb_context *ctdb, { header->rsn = 0; /* initial dmaster is the lmaster */ - header->dmaster = ctdb_lmaster(ctdb, key); + header->dmaster = ctdb_lmaster(ctdb, &key); header->laccessor = header->dmaster; header->lacount = 0; } diff --git a/ctdb/common/ctdb_util.c b/ctdb/common/ctdb_util.c index 64053a5d995..c0911180991 100644 --- a/ctdb/common/ctdb_util.c +++ b/ctdb/common/ctdb_util.c @@ -46,6 +46,15 @@ void ctdb_set_error(struct ctdb_context *ctdb, const char *fmt, ...) } +/* + a fatal internal error occurred - no hope for recovery +*/ +void ctdb_fatal(struct ctdb_context *ctdb, const char *msg) +{ + fprintf(stderr, "ctdb fatal error: '%s'\n", msg); + abort(); +} + /* parse a IP:port pair */ @@ -78,7 +87,7 @@ bool ctdb_same_address(struct ctdb_address *a1, struct ctdb_address *a2) /* hash function for mapping data to a VNN - taken from tdb */ -uint32_t ctdb_hash(TDB_DATA *key) +uint32_t ctdb_hash(const TDB_DATA *key) { uint32_t value; /* Used to compute the hash value. */ uint32_t i; /* Used to cycle through random values. */ diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index b58d55d8ac1..0d95f00b271 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -104,6 +104,10 @@ struct ctdb_context { /* max number of redirects before we ask the lmaster */ #define CTDB_MAX_REDIRECT 2 +/* number of consecutive calls from the same node before we give them + the record */ +#define CTDB_MAX_LACOUNT 7 + /* the extended header for records in the ltdb */ @@ -164,18 +168,36 @@ struct ctdb_reply_redirect { uint32_t dmaster; }; +struct ctdb_req_dmaster { + struct ctdb_req_header hdr; + uint32_t dmaster; + uint32_t keylen; + uint32_t datalen; + uint8_t data[0]; +}; + +struct ctdb_reply_dmaster { + struct ctdb_req_header hdr; + uint32_t datalen; + uint8_t data[0]; +}; + /* internal prototypes */ void ctdb_set_error(struct ctdb_context *ctdb, const char *fmt, ...); +void ctdb_fatal(struct ctdb_context *ctdb, const char *msg); bool ctdb_same_address(struct ctdb_address *a1, struct ctdb_address *a2); int ctdb_parse_address(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, const char *str, struct ctdb_address *address); -uint32_t ctdb_hash(TDB_DATA *key); +uint32_t ctdb_hash(const TDB_DATA *key); void ctdb_request_call(struct ctdb_context *ctdb, struct ctdb_req_header *hdr); +void ctdb_request_dmaster(struct ctdb_context *ctdb, struct ctdb_req_header *hdr); +void ctdb_reply_dmaster(struct ctdb_context *ctdb, struct ctdb_req_header *hdr); void ctdb_reply_call(struct ctdb_context *ctdb, struct ctdb_req_header *hdr); void ctdb_reply_error(struct ctdb_context *ctdb, struct ctdb_req_header *hdr); +void ctdb_reply_redirect(struct ctdb_context *ctdb, struct ctdb_req_header *hdr); -uint32_t ctdb_lmaster(struct ctdb_context *ctdb, TDB_DATA key); +uint32_t ctdb_lmaster(struct ctdb_context *ctdb, const TDB_DATA *key); int ctdb_ltdb_fetch(struct ctdb_context *ctdb, TDB_DATA key, struct ctdb_ltdb_header *header, TDB_DATA *data); int ctdb_ltdb_store(struct ctdb_context *ctdb, TDB_DATA key,