From: David Hankins Date: Mon, 21 Jan 2008 19:53:21 +0000 (+0000) Subject: - Multiple (up to "delayed-ack x;" maximum) DHCPv4 packets are now queued and X-Git-Tag: v4_1_0a1~16 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=6368a1bd72881aa88238969a6f1f9310bff5348f;p=thirdparty%2Fdhcp.git - Multiple (up to "delayed-ack x;" maximum) DHCPv4 packets are now queued and released in bursts after single fsync() events when the upper limit is reached or if the receiving sockets go dry. The practical upshot is that fsync-coupled server performance is now multiplicitively increased. The default delayed ack limit is 28. Thanks entirely to a patch from Christof Chen. --- diff --git a/RELNOTES b/RELNOTES index d959f0469..e397a0284 100644 --- a/RELNOTES +++ b/RELNOTES @@ -141,6 +141,13 @@ suggested fixes to . - MINUS tokens should be parseable again. +- Multiple (up to "delayed-ack x;" maximum) DHCPv4 packets are now queued and + released in bursts after single fsync() events when the upper limit is + reached or if the receiving sockets go dry. The practical upshot is + that fsync-coupled server performance is now multiplicitively increased. + The default delayed ack limit is 28. Thanks entirely to a patch from + Christof Chen. + Changes since 4.0.0b3 - The reverse dns name for PTR updates on IPv6 addresses has been fixed to diff --git a/includes/dhcpd.h b/includes/dhcpd.h index 6afe74ab3..62f8915c8 100644 --- a/includes/dhcpd.h +++ b/includes/dhcpd.h @@ -621,11 +621,16 @@ struct lease_state { #define SV_DHCPV6_LEASE_FILE_NAME 54 #define SV_DHCPV6_PID_FILE_NAME 55 #define SV_LIMIT_ADDRS_PER_IA 56 +#define SV_DELAYED_ACK 57 #if !defined (DEFAULT_PING_TIMEOUT) # define DEFAULT_PING_TIMEOUT 1 #endif +#if !defined (DEFAULT_DELAYED_ACK) +# define DEFAULT_DELAYED_ACK 28 /* default SO_SNDBUF size / 576 bytes */ +#endif + #if !defined (DEFAULT_DEFAULT_LEASE_TIME) # define DEFAULT_DEFAULT_LEASE_TIME 43200 #endif @@ -1138,6 +1143,12 @@ struct hardware_link { struct hardware address; }; +struct leasequeue { + struct leasequeue *prev; + struct leasequeue *next; + struct lease *lease; +}; + typedef void (*tvref_t)(void *, void *, const char *, int); typedef void (*tvunref_t)(void *, const char *, int); struct timeout { @@ -1149,6 +1160,11 @@ struct timeout { tvunref_t unref; }; +struct eventqueue { + struct eventqueue *next; + void (*handler)(void *); +}; + struct protocol { struct protocol *next; int fd; @@ -1653,6 +1669,7 @@ extern const char *path_dhcpd_db; extern const char *path_dhcpd_pid; extern int dhcp_max_agent_option_packet_length; +extern struct eventqueue *rw_queue_empty; int main(int, char **); void postconf_initialization(int); @@ -1912,6 +1929,7 @@ int data_string_sprintfa(struct data_string *ds, const char *fmt, ...); /* dhcp.c */ extern int outstanding_pings; +extern int max_outstanding_acks; void dhcp PROTO ((struct packet *)); void dhcpdiscover PROTO ((struct packet *, int)); @@ -1923,6 +1941,9 @@ void dhcpleasequery PROTO ((struct packet *, int)); void nak_lease PROTO ((struct packet *, struct iaddr *cip)); void ack_lease PROTO ((struct packet *, struct lease *, unsigned int, TIME, char *, int, struct host_decl *)); +void delayed_ack_enqueue(struct lease *); +void commit_leases_readerdry(void *); +void flush_ackqueue(void *); void dhcp_reply PROTO ((struct lease *)); int find_lease PROTO ((struct lease **, struct packet *, struct shared_network *, int *, int *, struct lease *, @@ -2537,6 +2558,7 @@ isc_result_t write_named_billing_class(const void *, unsigned, void *); void write_billing_classes (void); int write_billing_class PROTO ((struct class *)); void commit_leases_timeout PROTO ((void *)); +void commit_leases_readerdry(void *); int commit_leases PROTO ((void)); void db_startup PROTO ((int)); int new_lease_file PROTO ((void)); @@ -2966,6 +2988,10 @@ isc_result_t binding_scope_get_value (omapi_value_t **, isc_result_t binding_scope_stuff_values (omapi_object_t *, struct binding_scope *); +void register_eventhandler(struct eventqueue **, void (*handler)(void *)); +void unregister_eventhandler(struct eventqueue **, void (*handler)(void *)); +void trigger_event(struct eventqueue **); + /* mdb.c */ extern struct subnet *subnets; diff --git a/omapip/dispatch.c b/omapip/dispatch.c index 2f33a8a8b..838375afb 100644 --- a/omapip/dispatch.c +++ b/omapip/dispatch.c @@ -40,11 +40,71 @@ static omapi_io_object_t omapi_io_states; struct timeval cur_tv; +struct eventqueue *rw_queue_empty; + OMAPI_OBJECT_ALLOC (omapi_io, omapi_io_object_t, omapi_type_io_object) OMAPI_OBJECT_ALLOC (omapi_waiter, omapi_waiter_object_t, omapi_type_waiter) +void +register_eventhandler(struct eventqueue **queue, void (*handler)(void *)) +{ + struct eventqueue *t, *q; + + /* traverse to end of list */ + t = NULL; + for (q = *queue ; q ; q = q->next) { + if (q->handler == handler) + return; /* handler already registered */ + t = q; + } + + q = ((struct eventqueue *)dmalloc(sizeof(struct eventqueue), MDL)); + if (!q) + log_fatal("register_eventhandler: no memory!"); + memset(q, 0, sizeof *q); + if (t) + t->next = q; + else + *queue = q; + q->handler = handler; + return; +} + +void +unregister_eventhandler(struct eventqueue **queue, void (*handler)(void *)) +{ + struct eventqueue *t, *q; + + /* traverse to end of list */ + t= NULL; + for (q = *queue ; q ; q = q->next) { + if (q->handler == handler) { + if (t) + t->next = q->next; + else + *queue = q->next; + dfree(q, MDL); /* Don't access q after this!*/ + break; + } + t = q; + } + return; +} + +void +trigger_event(struct eventqueue **queue) +{ + struct eventqueue *q; + + for (q=*queue ; q ; q=q->next) { + if (q->handler) + (*q->handler)(NULL); + } +} + + /* Register an I/O handle so that we can do asynchronous I/O on it. */ isc_result_t omapi_register_io_object (omapi_object_t *h, @@ -208,7 +268,7 @@ isc_result_t omapi_wait_for_completion (omapi_object_t *object, isc_result_t omapi_one_dispatch (omapi_object_t *wo, struct timeval *t) { - fd_set r, w, x; + fd_set r, w, x, rr, ww, xx; int max = 0; int count; int desc; @@ -284,16 +344,21 @@ isc_result_t omapi_one_dispatch (omapi_object_t *wo, } } - /* Wait for a packet or a timeout... XXX */ -#if 0 -#if defined (__linux__) -#define fds_bits __fds_bits -#endif - log_error ("dispatch: %d %lx %lx", max, - (unsigned long)r.fds_bits [0], - (unsigned long)w.fds_bits [0]); -#endif - count = select (max + 1, &r, &w, &x, t ? &to : (struct timeval *)0); + /* poll if all reader are dry */ + now.tv_sec = 0; + now.tv_usec = 0; + rr=r; + ww=w; + xx=x; + + /* poll once */ + count = select(max + 1, &r, &w, &x, &now); + if (!count) { + /* We are dry now */ + trigger_event(&rw_queue_empty); + /* Wait for a packet or a timeout... XXX */ + count = select(max + 1, &rr, &ww, &xx, t ? &to : NULL); + } /* Get the current time... */ gettimeofday (&cur_tv, (struct timezone *)0); @@ -317,11 +382,6 @@ isc_result_t omapi_one_dispatch (omapi_object_t *wo, if (io -> readfd && io -> inner && (desc = (*(io -> readfd)) (io -> inner)) >= 0) { FD_SET (desc, &r); -#if 0 - log_error ("read check: %d %lx %lx", max, - (unsigned long)r.fds_bits [0], - (unsigned long)w.fds_bits [0]); -#endif count = select (desc + 1, &r, &w, &x, &t0); bogon: if (count < 0) { diff --git a/server/db.c b/server/db.c index 424a3abfe..8dca0e465 100644 --- a/server/db.c +++ b/server/db.c @@ -1048,6 +1048,9 @@ int commit_leases () log_info ("commit_leases: unable to commit: %m"); return 0; } + + /* send out all deferred ACKs now*/ + flush_ackqueue(NULL); /* If we haven't rewritten the lease database in over an hour, rewrite it now. (The length of time should probably diff --git a/server/dhcp.c b/server/dhcp.c index 3050c8fa4..6a54c2a49 100644 --- a/server/dhcp.c +++ b/server/dhcp.c @@ -38,6 +38,12 @@ int outstanding_pings; +struct leasequeue *ackqueue_head, *ackqueue_tail; +static struct leasequeue *free_ackqueue; +TIME next_fsync; +int outstanding_acks; +int max_outstanding_acks = DEFAULT_DELAYED_ACK; + static char dhcp_message [256]; static int site_code_min; @@ -2409,13 +2415,15 @@ void ack_lease (packet, lease, offer, when, msg, ms_nulltp, hp) packet -> raw -> chaddr, sizeof packet -> raw -> chaddr); /* XXX */ } else { - /* Install the new information about this lease in the - database. If this is a DHCPACK or a dynamic BOOTREPLY - and we can't write the lease, don't ACK it (or BOOTREPLY - it) either. */ - - if (!supersede_lease (lease, lt, !offer || offer == DHCPACK, - offer == DHCPACK, offer == DHCPACK)) { + /* Install the new information on 'lt' onto the lease at + * 'lease'.  We will not 'commit' this information to disk + * yet (fsync()), we will 'propogate' the information if + * this is BOOTP or a DHCPACK, but we will not 'pimmediate'ly + * transmit failover binding updates (this is delayed until + * after the fsync()). + */ + if (!supersede_lease(lease, lt, 0, !offer || offer == DHCPACK, + 0)) { log_info ("%s: database update failed", msg); free_lease_state (state, MDL); lease_dereference (<, MDL); @@ -2804,11 +2812,104 @@ void ack_lease (packet, lease, offer, when, msg, ms_nulltp, hp) (tvunref_t)lease_dereference); ++outstanding_pings; } else { - lease->cltt = cur_time; - dhcp_reply(lease); + lease->cltt = cur_time; + if (!offer || (offer == DHCPACK)) + delayed_ack_enqueue(lease); + else + dhcp_reply(lease); + } +} + +/* CC: queue single ACK: + - write the lease (but do not fsync it yet) + - add to double linked list + - commit if more than xx ACKs pending + - Not yet: schedule a fsync at the next interval (1 second?) + */ + +void +delayed_ack_enqueue(struct lease *lease) +{ + struct leasequeue *q; + if (!write_lease(lease)) + return; + if (free_ackqueue) { + q = free_ackqueue; + free_ackqueue = q->next; + } else { + q = ((struct leasequeue *) + dmalloc(sizeof(struct leasequeue), MDL)); + if (!q) + log_fatal("delayed_ack_enqueue: no memory!"); + } + memset(q, 0, sizeof *q); + /* prepend to ackqueue*/ + q->lease = lease; + q->next = ackqueue_head; + ackqueue_head = q; + if (!ackqueue_tail) + ackqueue_tail = q; + else + q->next->prev = q; + + outstanding_acks++; + if (outstanding_acks > max_outstanding_acks) + commit_leases(); + + /* If neccessary, schedule a fsync in 1 second */ + /* + if (next_fsync < cur_time + 1) { + next_fsync = cur_time + 1; + add_timeout(next_fsync, commit_leases_readerdry, NULL, + (tvref_t) NULL, (tvunref_t) NULL); + } + */ +} + +void +commit_leases_readerdry(void *foo) +{ + if (outstanding_acks) + commit_leases(); +} + +/* CC: process the delayed ACK responses: + - send out the ACK packets + - move the queue slots to the free list + */ +void +flush_ackqueue(void *foo) +{ + struct leasequeue *ack, *p; + /* process from bottom to retain packet order */ + for (ack = ackqueue_tail ; ack ; ack = p) { + p = ack->prev; + dhcp_reply(ack->lease); + ack->next = free_ackqueue; + free_ackqueue = ack; } + ackqueue_head = NULL; + ackqueue_tail = NULL; + outstanding_acks = 0; } +#if defined (DEBUG_MEMORY_LEAKAGE_ON_EXIT) +void +relinquish_ackqueue(void) +{ + struct leasequeue *q, *n; + + for (q = ackqueue ; q ; q = n) { + n = q->next; + dfree(q, MDL); + } + for (q = free_ackqueue ; q ; q = n) { + n = q->next; + dfree(q, MDL); + } +} +#endif + void dhcp_reply (lease) struct lease *lease; { diff --git a/server/dhcpd.c b/server/dhcpd.c index e4ab86f21..bc73e81f9 100644 --- a/server/dhcpd.c +++ b/server/dhcpd.c @@ -729,6 +729,8 @@ main(int argc, char **argv) { omapi_set_int_value ((omapi_object_t *)dhcp_control_object, (omapi_object_t *)0, "state", server_running); + register_eventhandler(&rw_queue_empty,commit_leases_readerdry); + /* Receive packets and dispatch them... */ dispatch (); @@ -963,6 +965,18 @@ void postconf_initialization (int quiet) data_string_forget (&db, MDL); } } + + oc = lookup_option(&server_universe, options, SV_DELAYED_ACK); + if (oc && + evaluate_option_cache(&db, NULL, NULL, NULL, options, NULL, + &global_scope, oc, MDL)) { + if (db.len == 2) { + max_outstanding_acks = htons(getUShort(db.data)); + } else { + log_fatal("invalid max delayed ACK count "); + } + data_string_forget(&db, MDL); + } /* Don't need the options anymore. */ option_state_dereference (&options, MDL); diff --git a/server/dhcpd.conf.5 b/server/dhcpd.conf.5 index 15e4e0104..a5864eba9 100644 --- a/server/dhcpd.conf.5 +++ b/server/dhcpd.conf.5 @@ -28,7 +28,7 @@ .\" see ``http://www.vix.com''. To learn more about Nominum, Inc., see .\" ``http://www.nominum.com''. .\" -.\" $Id: dhcpd.conf.5,v 1.91 2007/11/20 18:34:37 dhankins Exp $ +.\" $Id: dhcpd.conf.5,v 1.92 2008/01/21 19:53:21 dhankins Exp $ .\" .TH dhcpd.conf 5 .SH NAME @@ -2043,6 +2043,25 @@ as the "valid lifetime" in DHCPv6). .RE .PP The +.I delayed-ack +statement +.RS 0.25i +.PP +.B delayed-ack \fInumber\fR\fB;\fR +.PP +.I Number +should be an integer value from zero to 2^16-1, and defaults to 28. The +number represents how many DHCPv4 replies maximum will be queued pending +transmission until after a database commit event. If this number is +reached, a database commit event (commonly resulting in fsync() and +representing a performance penalty) will be made, and the reply packets +will be transmitted in a batch afterwards. This preserves the RFC2131 +direction that "stable storage" be updated prior to replying to clients. +Should the DHCPv4 sockets "go dry" (select() returns immediately with no +read sockets), the commit is made and any queued packets are transmitted. +.RE +.PP +The .I do-forward-updates statement .RS 0.25i diff --git a/server/mdb.c b/server/mdb.c index ba42b6e90..55e65d543 100644 --- a/server/mdb.c +++ b/server/mdb.c @@ -2824,6 +2824,7 @@ void free_everything () cancel_all_timeouts (); relinquish_timeouts (); + relinquish_ackqueue(); trace_free_all (); group_dereference (&root_group, MDL); executable_statement_dereference (&default_classification_rules, MDL); diff --git a/server/stables.c b/server/stables.c index a2d76c793..bec3a744d 100644 --- a/server/stables.c +++ b/server/stables.c @@ -238,6 +238,7 @@ static struct option server_options[] = { { "dhcpv6-lease-file-name", "t", &server_universe, 54, 1 }, { "dhcpv6-pid-file-name", "t", &server_universe, 55, 1 }, { "limit-addrs-per-ia", "L", &server_universe, 56, 1 }, + { "delayed-ack", "S", &server_universe, 57, 1 }, { NULL, NULL, NULL, 0, 0 } };