From: Peter Somogyi Date: Mon, 4 Dec 2006 13:02:08 +0000 (+0100) Subject: Added infiniband transport implementation(incomplete) and interface. X-Git-Tag: tevent-0.9.20~348^2~3003^2~2 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=71dcef563592db3243cbec12c7a3f64275b14ad2;p=thirdparty%2Fsamba.git Added infiniband transport implementation(incomplete) and interface. (This used to be ctdb commit b53356124141b6419a2cd3652a8dd4389fe8e46b) --- diff --git a/ctdb/ib/ibwrapper.c b/ctdb/ib/ibwrapper.c new file mode 100644 index 00000000000..a918e1c0e66 --- /dev/null +++ b/ctdb/ib/ibwrapper.c @@ -0,0 +1,202 @@ +/* + * Unix SMB/CIFS implementation. + * Wrap Infiniband calls. + * + * Copyright (C) Sven Oehme 2006 + * + * Major code contributions by Peter Somogyi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "ibwrapper.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "lib/events/events.h" + +#include "ibwrapper_internal.h" + + +#define IBW_LASTERR_BUFSIZE 512 +static char ibw_lasterr[IBW_LASTERR_BUFSIZE]; + +static ibw_mr *ibw_alloc_mr(ibw_ctx_priv *pctx) +{ +} + +static int ibwctx_destruct(void *ptr) +{ + ibw_ctx *pctx = talloc_get_type(ptr, ibw_ctx); + assert(pctx!=NULL); + + /* free memory regions */ + + return 0; +} + +int ibw_process_event(ibw_ctx *ctx, int fd_index); + +static void ibw_process_cm_event(struct event_context *ev, + struct fd_event *fde, uint16_t flags, void *private_data) +{ + if (fde-> +} + +static int ibw_process_init_attrs(ibw_initattr *attr, int nattr, ibw_opts *opts) +{ + int i; + char *name, *value; + + for(i=0; iopts.dev_name = talloc_strdup(ctx, value); + else if (strcmp(name, "rx_depth")==0) + opts->rx_depth = atoi(value); + else if (strcmp(name, "mtu")==0) + opts->mtu = atoi(value); + else { + sprintf(ibw_lasterr, "ibw_init: unknown name %s\n", name); + return -1; + } + } + return 0; +} + +ibw_ctx *ibw_init(ibw_initattr *attr, int nattr, + void *ctx_userdata, + ibw_connstate_fn_t ibw_connstate, + ibw_receive_fn_t ibw_receive) +{ + ibw_ctx *ctx = talloc_zero(NULL, ibw_ctx); + ibw_ctx_priv *pctx; + int rc; + ibw_event_ud *event_priv; + + memset(ibw_lasterr, 0, IBW_LASTERR_BUFSIZE); + + assert(ctx!=NULL); + ibw_lasterr[0] = '\0'; + talloc_set_destructor(ctx, ibwctx_destruct); + ctx->userdata = userdata; + + pctx = talloc_zero(ctx, ibw_ctx_priv); + ctx->internal = (void *)pctx; + assert(pctx!=NULL); + + pctx->connstate_func = ibw_connstate; + pctx->receive_func = ibw_receive; + + assert((pctx->ectx = event_context_init(ctx))!=NULL); + + /* process attributes */ + if (ibw_process_init_attrs(attr, nattr, pctx->opts)) + goto cleanup; + + /* initialize CM stuff */ + pctx->cm_channel = rdma_create_event_channel(); + if (!pctx->cm_channel) { + ret = errno; + sprintf(ibw_lasterr, "rdma_create_event_channel error %d\n", ret); + goto cleanup; + } + + event_priv = talloc_zero(ctx, ibw_event_ud); + event_priv->ctx = ctx; + event_priv->id = IBWET_CM; + + pctx->cm_channel_event = event_add_fd(pctx->ectx, pctx, + pctx->cm_channel->fd, EVENT_FD_READ, ibw_process_cm_event, event_priv); + + rc = rdma_create_id(pctx->cm_channel, &pctx->cm_id, cb, RDMA_PS_TCP); + if (rc) { + rc = errno; + sprintf(ibw_lasterr, "rdma_create_id error %d\n", rc); + goto cleanup; + } + DEBUG(10, "created cm_id %p\n", pctx->cm_id); + + /* allocate ib memory regions */ + + return ctx; + +cleanup: + if (ctx) + talloc_free(ctx); + + return NULL; +} + +int ibw_stop(ibw_ctx *ctx) +{ + ibw_ctx_priv *pctx = (ibw_ctx_priv *)ctx->internal; + +} + +int ibw_bind(ibw_ctx *ctx, struct sockaddr_in *my_addr) +{ + ibw_ctx_priv *pctx = (ibw_ctx_priv *)ctx->internal; +} + +int ibw_listen(ibw_ctx *ctx, int backlog) +{ + ibw_ctx_priv *pctx = (ibw_ctx_priv *)ctx->internal; +} + +int ibw_accept(ibw_ctx *ctx, void *conn_userdata) +{ + ibw_ctx_priv *pctx = (ibw_ctx_priv *)ctx->internal; +} + +int ibw_connect(ibw_ctx *ctx, struct sockaddr_in *serv_addr, void *conn_userdata) +{ + ibw_ctx_priv *pctx = (ibw_ctx_priv *)ctx->internal; +} + +void ibw_disconnect(ibw_conn *conn) +{ + ibw_ctx_priv *pctx = (ibw_ctx_priv *)ctx->internal; +} + +int ibw_process_event(ibw_ctx *ctx, ...) +{ + ibw_ctx_priv *pctx = (ibw_ctx_priv *)ctx->internal; +} + +int ibw_alloc_send_buf(ibw_conn *conn, void **buf, void **key, int n) +{ +} + +int ibw_send(ibw_conn *conn, void *buf, void *key, int n) +{ +} + +const char *ibw_getLastError() +{ + return ibw_lasterr; +} diff --git a/ctdb/ib/ibwrapper.h b/ctdb/ib/ibwrapper.h new file mode 100644 index 00000000000..8183919020f --- /dev/null +++ b/ctdb/ib/ibwrapper.h @@ -0,0 +1,194 @@ +/* + * Unix SMB/CIFS implementation. + * Wrap Infiniband calls. + * + * Copyright (C) Sven Oehme 2006 + * + * Major code contributions by Peter Somogyi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* Server communication state */ +typedef enum { + IBWS_INIT = 0, /* ctx start - after ibw_init */ + IBWS_READY, /* after ibw_bind & ibw_listen */ + IBWS_CONNECT_REQUEST, /* after [IBWS_READY + incoming request] */ + /* => [(ibw_accept)IBWS_READY | (ibw_disconnect)STOPPED | ERROR] */ + IBWS_STOPPED, /* normal stop <= ibw_disconnect+(IBWS_READY | IBWS_CONNECT_REQUEST) */ + IBWS_ERROR /* abnormal state; ibw_stop must be called after this */ +} ibw_state_ctx; + +/* Connection state */ +typedef struct _ibw_ctx { + void *ctx_userdata; /* see ibw_init */ + + ibw_state_ctx state; + void *internal; +} ibw_ctx; + +typedef enum { + IBWC_INIT = 0, /* conn start - internal state */ + IBWC_CONNECTED, /* after ibw_accept or ibw_connect */ + IBWC_DISCONNECTED, /* after ibw_disconnect */ + IBWC_ERROR +} ibw_state_conn; + +typedef struct _ibw_conn { + ibw_ctx *ctx; + ibw_state_conn state; + + void *conn_userdata; /* see ibw_connect and ibw_accept */ + void *internal; +} ibw_conn; + +/* + * (name, value) pair for array param of ibw_init + */ +typedef struct _ibw_initattr { + const char *name; + const char *value; +} ibw_initattr; + +/* + * Callback function definition which should inform you about + * connection state change + * This callback is invoked from within ibw_process_event. + * Both and can be NULL if their state didn't change. + * Return nonzero on error. + */ +typedef int (*ibw_connstate_fn_t)(ibw_ctx *ctx, ibw_conn *conn); + +/* + * Callback function definition which should process incoming packets + * This callback is invoked from within ibw_process_event. + * Return nonzero on error. + * + * Important: you mustn't store buf pointer for later use. Process its contents before returning. + */ +typedef int (*ibw_receive_fn_t)(ibw_conn *conn, void *buf, int n); + +/* + * settings: array of (name, value) pairs + * where name is one of: + * dev_name [default is the first one] + * rx_depth [default is 500] + * mtu [default is 1024] + * ib_port [default is 1] + * + * Must be called _ONCE_ for each node. + * + * returns non-NULL on success + * + * talloc_free must be called for the result in IBWS_STOPPED; + * it will close resources by destructor + * connections(ibw_conn *) must have been closed prior talloc_free + */ +ibw_ctx *ibw_init(ibw_initattr *attr, int nattr, + void *ctx_userdata, + ibw_connstate_fn_t ibw_connstate, + ibw_receive_fn_t ibw_receive); + +/* + * Must be called in states of (IBWS_ERROR, IBWS_READY, IBWS_CONNECT_REQUEST) + * + * It will send out disconnect requests and free up ibw_conn structures. + * The ctx->state will transit to IBWS_STOPPED after every conn are disconnected. + * During that time, you mustn't send/recv/disconnect any more. + * Only after ctx->state=IBWS_STOPPED you can talloc_free the ctx. + */ +int ibw_stop(ibw_ctx *ctx); + +/*************** connection initiation - like stream sockets *****/ + +/* + * works like socket bind + * needs a normal internet address here + * + * return 0 on success + */ +int ibw_bind(ibw_ctx *ctx, struct sockaddr_in *my_addr); + +/* + * works like socket listen + * non-blocking + * enables accepting incoming connections (after IBWS_READY) + * (it doesn't touch ctx->state by itself) + * + * returns 0 on success + */ +int ibw_listen(ibw_ctx *ctx, int backlog); + +/* + * works like socket accept + * initializes a connection to a client + * must be called when state=IBWS_CONNECT_REQUEST + * + * returns 0 on success + * + * You have +1 waiting here: you will get ibw_conn (having the + * same member) structure in ibw_connstate_fn_t. + * + * Important: you won't get remote IP address (only internal conn info) + */ +int ibw_accept(ibw_ctx *ctx, void *conn_userdata); + +/* + * Needs a normal internet address here + * can be called within IBWS_READY|IBWS_CONNECT_REQUEST + * + * returns non-NULL on success + * + * You have +1 waiting here: you will get ibw_conn (having the + * same member) structure in ibw_connstate_fn_t. + */ +int ibw_connect(ibw_ctx *ctx, struct sockaddr_in *serv_addr, void *conn_userdata); + +/* + * Sends out a disconnect request. + * You should process fds after calling this function + * and then process it with ibw_process_event normally + * until you get conn->state = IBWC_DISCONNECTED + * + * You mustn't talloc_free yet right after this, + * first wait for IBWC_DISCONNECTED. + */ +void ibw_disconnect(ibw_conn *conn); + +/************ Infiniband specific event loop wrapping ******************/ + +/* + * You have to use this buf to fill in before send. + * It's just to avoid memcpy.in ibw_send. + * Use the same (buf, key) pair with ibw_send. + * + * Returns 0 on success. + */ +int ibw_alloc_send_buf(ibw_conn *conn, void **buf, void **key, int n); + +/* + * Send the message in one + * Can be invoked any times (should fit into buffers) and at any time + * (in conn->state=IBWC_CONNECTED) + * + * You mustn't use (buf, key) any more for sending. + */ +int ibw_send(ibw_conn *conn, void *buf, void *key, int n); + +/* + * Retrieves the last error + * result: always non-zero, mustn't be freed (static) + */ +const char *ibw_getLastError(); diff --git a/ctdb/ib/ibwrapper_internal.h b/ctdb/ib/ibwrapper_internal.h new file mode 100644 index 00000000000..dbf11f6273c --- /dev/null +++ b/ctdb/ib/ibwrapper_internal.h @@ -0,0 +1,81 @@ +/* + * Unix SMB/CIFS implementation. + * Wrap Infiniband calls. + * + * Copyright (C) Sven Oehme 2006 + * + * Major code contributions by Peter Somogyi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +typedef struct _ibw_mr { + struct ibv_mr *mr; + struct _ibw_mr *next, *prev; +} ibw_mr; + +typedef struct _ibw_opts { + char *dev_name; + int rx_depth; + int mtu; + int ib_port; +} ibw_opts; + +typedef struct _ibw_ctx_priv { + ibw_mr *avail_first; + ibw_mr *avail_last; + ibw_mr *used_first; + ibw_mr *used_last; + + struct event_context *ectx; + + ibw_opts opts; + + struct ibv_context *context; + struct ibv_pd *pd; + + struct rdma_event_channel *cm_channel; + struct fd_event *cm_channel_event; + struct rdma_cm_id *cm_id; /* connection on client side,*/ + + ibw_connstate_fn_t connstate_func; + ibw_receive_fn_t receive_func; +} ibw_ctx_priv; + +typedef struct _ibw_conn_priv { + struct ibv_cq *cq; + struct ibv_qp *qp; + struct ib_cm_id *cm_id; +} ibw_conn_priv; + +typedef enum { + IBWET_CM, + IBWET_VERBS +} ibw_event_type; + +typedef struct _ibw_event_ud { + ibw_ctx *ctx; + ibw_event_type id; +} ibw_event_ud; + +/* + * Must be called in all cases after selecting/polling + * for FDs set via ibw_add_event_fn_t. + * + * fd_index: fd identifier passed in ibw_add_event_fn_t + * with the same fd was set there. + */ +//int ibw_process_event(ibw_ctx *ctx, int fd_index); +