From: Andrew Tridgell Date: Fri, 13 Jul 2007 01:31:18 +0000 (+1000) Subject: - merge from ronnie X-Git-Tag: tevent-0.9.20~348^2~2452 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=1e14ecd1766d5de7829bb8d37d1b8bcfb35183a9;p=thirdparty%2Fsamba.git - merge from ronnie - cleaner handling of system capture socket (This used to be ctdb commit d194a41a71b8466d0726dcbae3970a86386fcb3c) --- 1e14ecd1766d5de7829bb8d37d1b8bcfb35183a9 diff --cc ctdb/common/ctdb_util.c index 4890a141ea7,219b31f25f7..54b1e4e7ff5 --- a/ctdb/common/ctdb_util.c +++ b/ctdb/common/ctdb_util.c @@@ -192,42 -192,20 +192,42 @@@ struct ctdb_rec_data *ctdb_marshall_rec /* if possible, make this task real time */ -void ctdb_set_realtime(bool enable) +void ctdb_set_scheduler(struct ctdb_context *ctdb) { -#if HAVE_SCHED_SETSCHEDULER +#if HAVE_SCHED_SETSCHEDULER struct sched_param p; - + if (ctdb->saved_scheduler_param == NULL) { + ctdb->saved_scheduler_param = talloc_size(ctdb, sizeof(p)); + } + + if (sched_getparam(0, (struct sched_param *)ctdb->saved_scheduler_param) == -1) { + DEBUG(0,("Unable to get old scheduler params\n")); + return; + } + + p = *(struct sched_param *)ctdb->saved_scheduler_param; p.sched_priority = 1; - if (enable) { - if (sched_setscheduler(getpid(), SCHED_FIFO, &p) == -1) { - DEBUG(0,("Unable to set scheduler to SCHED_FIFO (%s)\n", strerror(errno))); - } else { - DEBUG(0,("Set scheduler to SCHED_FIFO\n")); - } + if (sched_setscheduler(0, SCHED_FIFO, &p) == -1) { - DEBUG(0,("Unable to set scheduler to SCHED_FIFO (%s)\n", strerror(errno))); ++ DEBUG(0,("Unable to set scheduler to SCHED_FIFO (%s)\n", ++ strerror(errno))); } else { - sched_setscheduler(getpid(), SCHED_OTHER, &p); + DEBUG(0,("Set scheduler to SCHED_FIFO\n")); + } +#endif +} + +/* + restore previous scheduler parameters + */ +void ctdb_restore_scheduler(struct ctdb_context *ctdb) +{ +#if HAVE_SCHED_SETSCHEDULER + if (ctdb->saved_scheduler_param == NULL) { + ctdb_fatal(ctdb, "No saved scheduler parameters\n"); + } + if (sched_setscheduler(0, SCHED_OTHER, (struct sched_param *)ctdb->saved_scheduler_param) == -1) { + ctdb_fatal(ctdb, "Unable to restore old scheduler parameters\n"); } #endif } diff --cc ctdb/common/system.c index 1e536f5e8a4,2fe827d73e6..f4f12a168ba --- a/ctdb/common/system.c +++ b/ctdb/common/system.c @@@ -234,7 -219,7 +219,7 @@@ int ctdb_sys_send_tcp(int s pkt.tcp.rst = 1; } pkt.tcp.doff = sizeof(pkt.tcp)/4; -- pkt.tcp.window = htons(1234); ++ pkt.tcp.window = htons(1234); /* this makes it easier to spot in a sniffer */ pkt.tcp.check = tcp_checksum((uint16_t *)&pkt.tcp, sizeof(pkt.tcp), &pkt.ip); ret = sendto(s, &pkt, sizeof(pkt), 0, dest, sizeof(*dest)); @@@ -273,113 -256,128 +256,115 @@@ bool ctdb_sys_have_ip(const char *ip return ret == 0; } - static void ctdb_wait_handler(struct event_context *ev, struct timed_event *te, - struct timeval yt, void *p) -/* This function is used to open a raw socket to capture from ++/* ++ This function is used to open a raw socket to capture from + */ + int ctdb_sys_open_capture_socket(void) { - uint32_t *timed_out = (uint32_t *)p; - (*timed_out) = 1; - } + int s; + + /* Open a socket to capture all traffic */ - s=socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); - if (s == -1){ ++ s = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); ++ if (s == -1) { + DEBUG(0,(__location__ " failed to open raw socket\n")); + return -1; + } - /* This function is used to kill (RST) the specified tcp connection. + set_nonblocking(s); + set_close_on_exec(s); - This function is not asynchronous and will block until the operation - was successful or it timesout. + return s; + } + -/* This function is used to open a raw socket to send tickles from ++/* ++ This function is used to open a raw socket to send tickles from */ - int ctdb_sys_kill_tcp(struct event_context *ev, - const struct sockaddr_in *dst, - const struct sockaddr_in *src) + int ctdb_sys_open_sending_socket(void) { int s, ret; - uint32_t timedout; - TALLOC_CTX *tmp_ctx = talloc_new(NULL); + uint32_t one = 1; + + s = socket(AF_INET, SOCK_RAW, htons(IPPROTO_RAW)); + if (s == -1) { + DEBUG(0,(__location__ " failed to open raw socket (%s)\n", + strerror(errno))); + return -1; + } + + ret = setsockopt(s, SOL_IP, IP_HDRINCL, &one, sizeof(one)); + if (ret != 0) { + DEBUG(0,(__location__ " failed to setup IP headers (%s)\n", + strerror(errno))); + close(s); + return -1; + } + + set_nonblocking(s); + set_close_on_exec(s); + + return s; + } + - -int ctdb_sys_read_tcp_packet(struct ctdb_kill_tcp *killtcp) ++/* ++ called when the raw socket becomes readable ++ */ ++int ctdb_sys_read_tcp_packet(int s, struct sockaddr_in *src, struct sockaddr_in *dst, ++ uint32_t *ack_seq, uint32_t *seq) + { + int ret; #define RCVPKTSIZE 100 char pkt[RCVPKTSIZE]; struct ether_header *eth; struct iphdr *ip; struct tcphdr *tcp; - struct ctdb_killtcp_connection *conn; - /* Open a socket to capture all traffic */ - s=socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); - if (s == -1){ - DEBUG(0,(__location__ " failed to open raw socket\n")); - ret = recv(killtcp->capture_fd, pkt, RCVPKTSIZE, MSG_TRUNC); ++ ret = recv(s, pkt, RCVPKTSIZE, MSG_TRUNC); + if (ret < sizeof(*eth)+sizeof(*ip)) { return -1; } - /* We wait for up to 1 second for the ACK coming back */ - timedout = 0; - event_add_timed(ev, tmp_ctx, timeval_current_ofs(1, 0), ctdb_wait_handler, &timedout); + /* Ethernet */ + eth = (struct ether_header *)pkt; + - /* Send a tickle ack to probe what the real seq/ack numbers are */ - ctdb_sys_send_tcp(dst, src, 0, 0, 0); - - /* Wait until we either time out or we succeeds in sending the RST */ - while (timedout==0) { - event_loop_once(ev); - - ret = recv(s, pkt, RCVPKTSIZE, MSG_TRUNC); - if (ret < sizeof(*eth)+sizeof(*ip)) { - continue; - } - - /* Ethernet */ - eth = (struct ether_header *)pkt; - /* We only want IP packets */ - if (ntohs(eth->ether_type) != ETHERTYPE_IP) { - continue; - } + /* We only want IP packets */ + if (ntohs(eth->ether_type) != ETHERTYPE_IP) { + return -1; + } - /* IP */ - ip = (struct iphdr *)(eth+1); - /* We only want IPv4 packets */ - if (ip->version != 4) { - continue; - } - /* Dont look at fragments */ - if ((ntohs(ip->frag_off)&0x1fff) != 0) { - continue; - } - /* we only want TCP */ - if (ip->protocol != IPPROTO_TCP) { - continue; - } - - /* We only want packets sent from the guy we tickled */ - if (ip->saddr != dst->sin_addr.s_addr) { - continue; - } - /* We only want packets sent to us */ - if (ip->daddr != src->sin_addr.s_addr) { - continue; - } - - /* make sure its not a short packet */ - if (offsetof(struct tcphdr, ack_seq) + 4 + - (ip->ihl*4) + sizeof(*eth) > ret) { - continue; - } - - /* TCP */ - tcp = (struct tcphdr *)((ip->ihl*4) + (char *)ip); - - /* We only want replies from the port we tickled */ - if (tcp->source != dst->sin_port) { - continue; - } - if (tcp->dest != src->sin_port) { - continue; - } - - ctdb_sys_send_tcp(dst, src, tcp->ack_seq, tcp->seq, 1); + /* IP */ + ip = (struct iphdr *)(eth+1); - close(s); - talloc_free(tmp_ctx); + /* We only want IPv4 packets */ + if (ip->version != 4) { + return -1; + } + /* Dont look at fragments */ + if ((ntohs(ip->frag_off)&0x1fff) != 0) { + return -1; + } + /* we only want TCP */ + if (ip->protocol != IPPROTO_TCP) { + return -1; + } + - return 0; + /* make sure its not a short packet */ + if (offsetof(struct tcphdr, ack_seq) + 4 + + (ip->ihl*4) + sizeof(*eth) > ret) { + return -1; } + - close(s); - talloc_free(tmp_ctx); - DEBUG(0,(__location__ " timedout waiting for tickle ack reply\n")); + /* TCP */ + tcp = (struct tcphdr *)((ip->ihl*4) + (char *)ip); - - - /* loop over all connections and see if we find one that matches */ - for(conn = killtcp->connections; conn; conn = conn->next) { - /* We only want packets sent from a guy we have tickled */ - if (ip->saddr != conn->dst.sin_addr.s_addr) { - continue; - } - /* We only want packets sent to us */ - if (ip->daddr != conn->src.sin_addr.s_addr) { - continue; - } - /* We only want replies from a port we tickled */ - if (tcp->source != conn->dst.sin_port) { - continue; - } - if (tcp->dest != conn->src.sin_port) { - continue; - } - - /* This one has been tickled ! - now reset him and remove him from the list. - */ - ctdb_sys_send_tcp(killtcp->sending_fd, &conn->dst, &conn->src, tcp->ack_seq, tcp->seq, 1); - talloc_free(conn); - return -1; - return 0; - } ++ /* tell the caller which one we've found */ ++ src->sin_addr.s_addr = ip->saddr; ++ src->sin_port = tcp->source; ++ dst->sin_addr.s_addr = ip->daddr; ++ dst->sin_port = tcp->dest; ++ *ack_seq = tcp->ack_seq; ++ *seq = tcp->seq; + - return -1; ++ return 0; } + + diff --cc ctdb/include/ctdb_private.h index be3b832832a,505e97d9567..9c33747dcd6 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@@ -307,8 -307,7 +307,9 @@@ struct ctdb_context struct ctdb_takeover takeover; struct ctdb_tcp_list *tcp_list; struct ctdb_client_ip *client_ip_list; + bool do_setsched; + void *saved_scheduler_param; + struct ctdb_kill_tcp *killtcp; }; struct ctdb_db_context { @@@ -1035,4 -1041,37 +1044,15 @@@ void ctdb_start_freeze(struct ctdb_cont bool parse_ip_port(const char *s, struct sockaddr_in *ip); - -/* - list of tcp connections to kill - */ -struct ctdb_killtcp_connection { - struct ctdb_killtcp_connection *prev, *next; - struct ctdb_context *ctdb; - struct sockaddr_in src; - struct sockaddr_in dst; - int count; -}; - -/* structure containing the listening socket and the list of tcp connections - that the ctdb daemon is to kill -*/ -struct ctdb_kill_tcp { - struct ctdb_context *ctdb; - int capture_fd; - int sending_fd; - struct fd_event *fde; - struct ctdb_killtcp_connection *connections; -}; + int ctdb_sys_open_capture_socket(void); + int ctdb_sys_open_sending_socket(void); -int ctdb_killtcp_add_connection(struct ctdb_context *ctdb, struct sockaddr_in *src, struct sockaddr_in *dst); -int ctdb_sys_read_tcp_packet(struct ctdb_kill_tcp *killtcp); ++int ctdb_sys_read_tcp_packet(int s, struct sockaddr_in *src, struct sockaddr_in *dst, ++ uint32_t *ack_seq, uint32_t *seq); + + int ctdb_ctrl_killtcp(struct ctdb_context *ctdb, + struct timeval timeout, + uint32_t destnode, + struct ctdb_control_killtcp *killtcp); + + #endif diff --cc ctdb/server/ctdb_takeover.c index 42a23808ddb,8f84bb7d0a7..a4536e1c9dc --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@@ -679,6 -686,22 +686,7 @@@ int32_t ctdb_control_tcp_add(struct ctd return 0; } + -/* - called by a daemon to inform us of a TCP connection that one of its - clients managing that should tickled with an ACK when IP takeover is - done - */ -int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata) -{ - struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr; - - ctdb_killtcp_add_connection(ctdb, &killtcp->src, &killtcp->dst); - - return 0; -} - - /* called by a daemon to inform us of a TCP connection that one of its clients managing that should tickled with an ACK when IP takeover is @@@ -795,12 -818,12 +803,14 @@@ void ctdb_release_all_ips(struct ctdb_c /* get list of public IPs */ --int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control *c, TDB_DATA *outdata) ++int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, ++ struct ctdb_req_control *c, TDB_DATA *outdata) { int i, len; struct ctdb_all_public_ips *ips; -- len = offsetof(struct ctdb_all_public_ips, ips) + ctdb->num_nodes*sizeof(struct ctdb_public_ip); ++ len = offsetof(struct ctdb_all_public_ips, ips) + ++ ctdb->num_nodes*sizeof(struct ctdb_public_ip); ips = talloc_zero_size(outdata, len); CTDB_NO_MEMORY(ctdb, ips); @@@ -814,9 -837,155 +824,233 @@@ ips->ips[i].takeover_vnn = ctdb->nodes[i]->takeover_vnn; ips->ips[i].sin.sin_family = AF_INET; if (ctdb->nodes[i]->public_address) { -- inet_aton(ctdb->nodes[i]->public_address, &ips->ips[i].sin.sin_addr); ++ inet_aton(ctdb->nodes[i]->public_address, ++ &ips->ips[i].sin.sin_addr); + } + } + + return 0; + } + + + ++ ++/* ++ list of tcp connections to kill ++ */ ++struct ctdb_killtcp_connection { ++ struct ctdb_killtcp_connection *prev, *next; ++ struct ctdb_context *ctdb; ++ struct sockaddr_in src; ++ struct sockaddr_in dst; ++ int count; ++}; ++ ++/* ++ structure containing the listening socket and the list of tcp connections ++ that the ctdb daemon is to kill ++*/ ++struct ctdb_kill_tcp { ++ struct ctdb_context *ctdb; ++ int capture_fd; ++ int sending_fd; ++ struct fd_event *fde; ++ struct ctdb_killtcp_connection *connections; ++}; ++ ++/* ++ called when we get a read event on the raw socket ++ */ + static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde, - uint16_t flags, void *private_data) ++ uint16_t flags, void *private_data) + { + struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp); ++ struct sockaddr_in src, dst; ++ struct ctdb_killtcp_connection *conn; ++ uint32_t ack_seq, seq; + - if (flags & EVENT_FD_READ) { - ctdb_sys_read_tcp_packet(killtcp); ++ if (!(flags & EVENT_FD_READ)) { ++ return; ++ } ++ ++ if (ctdb_sys_read_tcp_packet(killtcp->capture_fd, &src, &dst, ++ &ack_seq, &seq) != 0) { ++ /* probably a non-tcp ACK packet */ ++ return; ++ } ++ ++ /* loop over all connections and see if we find one that matches */ ++ for (conn = killtcp->connections; conn; conn = conn->next) { ++ /* We only want packets sent from a guy we have tickled */ ++ if (src.sin_addr.s_addr != conn->dst.sin_addr.s_addr) { ++ continue; ++ } ++ /* We only want packets sent to us */ ++ if (dst.sin_addr.s_addr != conn->src.sin_addr.s_addr) { ++ continue; ++ } ++ /* We only want replies from a port we tickled */ ++ if (src.sin_port != conn->dst.sin_port) { ++ continue; ++ } ++ if (dst.sin_port != conn->src.sin_port) { ++ continue; ++ } ++ ++ /* This one has been tickled ! ++ now reset him and remove him from the list. ++ */ ++ ctdb_sys_send_tcp(killtcp->sending_fd, &conn->dst, ++ &conn->src, ack_seq, seq, 1); ++ talloc_free(conn); ++ break; + } + } + + + /* called every second until all sentenced connections have been reset + */ + static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te, - struct timeval t, void *private_data) ++ struct timeval t, void *private_data) + { + struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp); - struct ctdb_killtcp_connection *conn, tmpcon; - ++ struct ctdb_killtcp_connection *conn, *next; + - /* loop over all connections and see if we find one that matches */ - for(conn = killtcp->connections; conn; conn = conn->next) { ++ /* loop over all connections sending tickle ACKs */ ++ for (conn = killtcp->connections; conn; conn = next) { ++ next = conn->next; + conn->count++; + if (conn->count > 5) { - tmpcon.next=conn->next; + talloc_free(conn); - conn=&tmpcon; - + continue; } + ctdb_sys_send_tcp(killtcp->sending_fd, &conn->dst, &conn->src, 0, 0, 0); } + /* If there are no more connections to kill we can remove the + entire killtcp structure + */ + if (killtcp->connections == NULL) { + talloc_free(killtcp); + return; + } + + /* try tickling them again in a seconds time + */ + event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0), + ctdb_tickle_sentenced_connections, killtcp); + } + + /* + destroy the killtcp structure + */ + static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp) + { - if (killtcp->capture_fd != -1) { - close(killtcp->capture_fd); - killtcp->capture_fd = -1; - } + if (killtcp->sending_fd != -1) { + close(killtcp->sending_fd); - killtcp->sending_fd = -1; ++ killtcp->sending_fd = -1; + } + killtcp->ctdb->killtcp = NULL; - + return 0; + } + + /* + destroy a killtcp connection structure + */ + static int ctdb_killtcp_connection_destructor(struct ctdb_killtcp_connection *conn) + { + DLIST_REMOVE(conn->ctdb->killtcp->connections, conn); + return 0; + } + -int ctdb_killtcp_add_connection(struct ctdb_context *ctdb, struct sockaddr_in *src, struct sockaddr_in *dst) ++/* ++ add a tcp socket to the list of connections we will kill on failover ++ */ ++static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb, ++ struct sockaddr_in *src, struct sockaddr_in *dst) + { - struct ctdb_kill_tcp *killtcp=ctdb->killtcp;; ++ struct ctdb_kill_tcp *killtcp = ctdb->killtcp; + struct ctdb_killtcp_connection *conn; + + /* If this is the first connection to kill we must allocate + a new structure + */ + if (killtcp == NULL) { + killtcp = talloc(ctdb, struct ctdb_kill_tcp); + CTDB_NO_MEMORY(ctdb, killtcp); + + killtcp->ctdb = ctdb; + killtcp->capture_fd = -1; + killtcp->sending_fd = -1; + killtcp->connections = NULL; + ctdb->killtcp = killtcp; + talloc_set_destructor(killtcp, ctdb_killtcp_destructor); + } + - /* If we dont have a socket to listen on yet we must create it - */ - if (killtcp->capture_fd == -1) { - killtcp->capture_fd = ctdb_sys_open_capture_socket(); - if (killtcp->capture_fd == -1) { - DEBUG(0,(__location__ " Failed to open capturing socket for killtcp\n")); - goto failed; - } - } ++ conn = talloc(killtcp, struct ctdb_killtcp_connection); ++ CTDB_NO_MEMORY(ctdb, conn); ++ conn->src = *src; ++ conn->dst = *dst; ++ conn->ctdb = ctdb; ++ conn->count = 0; ++ talloc_set_destructor(conn, ctdb_killtcp_connection_destructor); ++ DLIST_ADD(killtcp->connections, conn); + - /* If we dont have a socket to send from yet we must create it ++ /* ++ If we dont have a socket to send from yet we must create it + */ + if (killtcp->sending_fd == -1) { + killtcp->sending_fd = ctdb_sys_open_sending_socket(); + if (killtcp->sending_fd == -1) { + DEBUG(0,(__location__ " Failed to open sending socket for killtcp\n")); + goto failed; + } + } + - conn = talloc(killtcp, struct ctdb_killtcp_connection); - CTDB_NO_MEMORY(ctdb, conn); - conn->src = *src; - conn->dst = *dst; - conn->ctdb = ctdb; - conn->count = 0; - talloc_set_destructor(conn, ctdb_killtcp_connection_destructor); - DLIST_ADD(killtcp->connections, conn); - ++ /* ++ If we dont have a socket to listen on yet we must create it ++ */ ++ if (killtcp->capture_fd == -1) { ++ killtcp->capture_fd = ctdb_sys_open_capture_socket(); ++ if (killtcp->capture_fd == -1) { ++ DEBUG(0,(__location__ " Failed to open capturing socket for killtcp\n")); ++ goto failed; ++ } ++ } + - killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd, - EVENT_FD_READ|EVENT_FD_AUTOCLOSE, - capture_tcp_handler, killtcp); + ++ if (killtcp->fde == NULL) { ++ killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd, ++ EVENT_FD_READ | EVENT_FD_AUTOCLOSE, ++ capture_tcp_handler, killtcp); + - /* We also need to set up some events to tickle all these connections - until they are all reset - */ - event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(0, 0), - ctdb_tickle_sentenced_connections, killtcp); ++ /* We also need to set up some events to tickle all these connections ++ until they are all reset ++ */ ++ event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(0, 0), ++ ctdb_tickle_sentenced_connections, killtcp); ++ } + ++ /* tickle him once now */ ++ ctdb_sys_send_tcp(killtcp->sending_fd, &conn->dst, &conn->src, 0, 0, 0); + + return 0; + + failed: + talloc_free(ctdb->killtcp); + ctdb->killtcp = NULL; + return -1; + } ++ ++/* ++ called by a daemon to inform us of a TCP connection that one of its ++ clients managing that should reset when IP takeover is done ++ */ ++int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata) ++{ ++ struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr; ++ ++ ctdb_killtcp_add_connection(ctdb, &killtcp->src, &killtcp->dst); ++ + return 0; +}