- MINUS tokens should be parseable again.
+- Multiple (up to "delayed-ack x;" maximum) DHCPv4 packets are now queued and
+ released in bursts after single fsync() events when the upper limit is
+ reached or if the receiving sockets go dry. The practical upshot is
+ that fsync-coupled server performance is now multiplicitively increased.
+ The default delayed ack limit is 28. Thanks entirely to a patch from
+ Christof Chen.
+
Changes since 4.0.0b3
- The reverse dns name for PTR updates on IPv6 addresses has been fixed to
#define SV_DHCPV6_LEASE_FILE_NAME 54
#define SV_DHCPV6_PID_FILE_NAME 55
#define SV_LIMIT_ADDRS_PER_IA 56
+#define SV_DELAYED_ACK 57
#if !defined (DEFAULT_PING_TIMEOUT)
# define DEFAULT_PING_TIMEOUT 1
#endif
+#if !defined (DEFAULT_DELAYED_ACK)
+# define DEFAULT_DELAYED_ACK 28 /* default SO_SNDBUF size / 576 bytes */
+#endif
+
#if !defined (DEFAULT_DEFAULT_LEASE_TIME)
# define DEFAULT_DEFAULT_LEASE_TIME 43200
#endif
struct hardware address;
};
+struct leasequeue {
+ struct leasequeue *prev;
+ struct leasequeue *next;
+ struct lease *lease;
+};
+
typedef void (*tvref_t)(void *, void *, const char *, int);
typedef void (*tvunref_t)(void *, const char *, int);
struct timeout {
tvunref_t unref;
};
+struct eventqueue {
+ struct eventqueue *next;
+ void (*handler)(void *);
+};
+
struct protocol {
struct protocol *next;
int fd;
extern const char *path_dhcpd_pid;
extern int dhcp_max_agent_option_packet_length;
+extern struct eventqueue *rw_queue_empty;
int main(int, char **);
void postconf_initialization(int);
/* dhcp.c */
extern int outstanding_pings;
+extern int max_outstanding_acks;
void dhcp PROTO ((struct packet *));
void dhcpdiscover PROTO ((struct packet *, int));
void nak_lease PROTO ((struct packet *, struct iaddr *cip));
void ack_lease PROTO ((struct packet *, struct lease *,
unsigned int, TIME, char *, int, struct host_decl *));
+void delayed_ack_enqueue(struct lease *);
+void commit_leases_readerdry(void *);
+void flush_ackqueue(void *);
void dhcp_reply PROTO ((struct lease *));
int find_lease PROTO ((struct lease **, struct packet *,
struct shared_network *, int *, int *, struct lease *,
void write_billing_classes (void);
int write_billing_class PROTO ((struct class *));
void commit_leases_timeout PROTO ((void *));
+void commit_leases_readerdry(void *);
int commit_leases PROTO ((void));
void db_startup PROTO ((int));
int new_lease_file PROTO ((void));
isc_result_t binding_scope_stuff_values (omapi_object_t *,
struct binding_scope *);
+void register_eventhandler(struct eventqueue **, void (*handler)(void *));
+void unregister_eventhandler(struct eventqueue **, void (*handler)(void *));
+void trigger_event(struct eventqueue **);
+
/* mdb.c */
extern struct subnet *subnets;
static omapi_io_object_t omapi_io_states;
struct timeval cur_tv;
+struct eventqueue *rw_queue_empty;
+
OMAPI_OBJECT_ALLOC (omapi_io,
omapi_io_object_t, omapi_type_io_object)
OMAPI_OBJECT_ALLOC (omapi_waiter,
omapi_waiter_object_t, omapi_type_waiter)
+void
+register_eventhandler(struct eventqueue **queue, void (*handler)(void *))
+{
+ struct eventqueue *t, *q;
+
+ /* traverse to end of list */
+ t = NULL;
+ for (q = *queue ; q ; q = q->next) {
+ if (q->handler == handler)
+ return; /* handler already registered */
+ t = q;
+ }
+
+ q = ((struct eventqueue *)dmalloc(sizeof(struct eventqueue), MDL));
+ if (!q)
+ log_fatal("register_eventhandler: no memory!");
+ memset(q, 0, sizeof *q);
+ if (t)
+ t->next = q;
+ else
+ *queue = q;
+ q->handler = handler;
+ return;
+}
+
+void
+unregister_eventhandler(struct eventqueue **queue, void (*handler)(void *))
+{
+ struct eventqueue *t, *q;
+
+ /* traverse to end of list */
+ t= NULL;
+ for (q = *queue ; q ; q = q->next) {
+ if (q->handler == handler) {
+ if (t)
+ t->next = q->next;
+ else
+ *queue = q->next;
+ dfree(q, MDL); /* Don't access q after this!*/
+ break;
+ }
+ t = q;
+ }
+ return;
+}
+
+void
+trigger_event(struct eventqueue **queue)
+{
+ struct eventqueue *q;
+
+ for (q=*queue ; q ; q=q->next) {
+ if (q->handler)
+ (*q->handler)(NULL);
+ }
+}
+
+
/* Register an I/O handle so that we can do asynchronous I/O on it. */
isc_result_t omapi_register_io_object (omapi_object_t *h,
isc_result_t omapi_one_dispatch (omapi_object_t *wo,
struct timeval *t)
{
- fd_set r, w, x;
+ fd_set r, w, x, rr, ww, xx;
int max = 0;
int count;
int desc;
}
}
- /* Wait for a packet or a timeout... XXX */
-#if 0
-#if defined (__linux__)
-#define fds_bits __fds_bits
-#endif
- log_error ("dispatch: %d %lx %lx", max,
- (unsigned long)r.fds_bits [0],
- (unsigned long)w.fds_bits [0]);
-#endif
- count = select (max + 1, &r, &w, &x, t ? &to : (struct timeval *)0);
+ /* poll if all reader are dry */
+ now.tv_sec = 0;
+ now.tv_usec = 0;
+ rr=r;
+ ww=w;
+ xx=x;
+
+ /* poll once */
+ count = select(max + 1, &r, &w, &x, &now);
+ if (!count) {
+ /* We are dry now */
+ trigger_event(&rw_queue_empty);
+ /* Wait for a packet or a timeout... XXX */
+ count = select(max + 1, &rr, &ww, &xx, t ? &to : NULL);
+ }
/* Get the current time... */
gettimeofday (&cur_tv, (struct timezone *)0);
if (io -> readfd && io -> inner &&
(desc = (*(io -> readfd)) (io -> inner)) >= 0) {
FD_SET (desc, &r);
-#if 0
- log_error ("read check: %d %lx %lx", max,
- (unsigned long)r.fds_bits [0],
- (unsigned long)w.fds_bits [0]);
-#endif
count = select (desc + 1, &r, &w, &x, &t0);
bogon:
if (count < 0) {
log_info ("commit_leases: unable to commit: %m");
return 0;
}
+
+ /* send out all deferred ACKs now*/
+ flush_ackqueue(NULL);
/* If we haven't rewritten the lease database in over an
hour, rewrite it now. (The length of time should probably
int outstanding_pings;
+struct leasequeue *ackqueue_head, *ackqueue_tail;
+static struct leasequeue *free_ackqueue;
+TIME next_fsync;
+int outstanding_acks;
+int max_outstanding_acks = DEFAULT_DELAYED_ACK;
+
static char dhcp_message [256];
static int site_code_min;
packet -> raw -> chaddr,
sizeof packet -> raw -> chaddr); /* XXX */
} else {
- /* Install the new information about this lease in the
- database. If this is a DHCPACK or a dynamic BOOTREPLY
- and we can't write the lease, don't ACK it (or BOOTREPLY
- it) either. */
-
- if (!supersede_lease (lease, lt, !offer || offer == DHCPACK,
- offer == DHCPACK, offer == DHCPACK)) {
+ /* Install the new information on 'lt' onto the lease at
+ * 'lease'. We will not 'commit' this information to disk
+ * yet (fsync()), we will 'propogate' the information if
+ * this is BOOTP or a DHCPACK, but we will not 'pimmediate'ly
+ * transmit failover binding updates (this is delayed until
+ * after the fsync()).
+ */
+ if (!supersede_lease(lease, lt, 0, !offer || offer == DHCPACK,
+ 0)) {
log_info ("%s: database update failed", msg);
free_lease_state (state, MDL);
lease_dereference (<, MDL);
(tvunref_t)lease_dereference);
++outstanding_pings;
} else {
- lease->cltt = cur_time;
- dhcp_reply(lease);
+ lease->cltt = cur_time;
+ if (!offer || (offer == DHCPACK))
+ delayed_ack_enqueue(lease);
+ else
+ dhcp_reply(lease);
+ }
+}
+
+/* CC: queue single ACK:
+ - write the lease (but do not fsync it yet)
+ - add to double linked list
+ - commit if more than xx ACKs pending
+ - Not yet: schedule a fsync at the next interval (1 second?)
+ */
+
+void
+delayed_ack_enqueue(struct lease *lease)
+{
+ struct leasequeue *q;
+ if (!write_lease(lease))
+ return;
+ if (free_ackqueue) {
+ q = free_ackqueue;
+ free_ackqueue = q->next;
+ } else {
+ q = ((struct leasequeue *)
+ dmalloc(sizeof(struct leasequeue), MDL));
+ if (!q)
+ log_fatal("delayed_ack_enqueue: no memory!");
+ }
+ memset(q, 0, sizeof *q);
+ /* prepend to ackqueue*/
+ q->lease = lease;
+ q->next = ackqueue_head;
+ ackqueue_head = q;
+ if (!ackqueue_tail)
+ ackqueue_tail = q;
+ else
+ q->next->prev = q;
+
+ outstanding_acks++;
+ if (outstanding_acks > max_outstanding_acks)
+ commit_leases();
+
+ /* If neccessary, schedule a fsync in 1 second */
+ /*
+ if (next_fsync < cur_time + 1) {
+ next_fsync = cur_time + 1;
+ add_timeout(next_fsync, commit_leases_readerdry, NULL,
+ (tvref_t) NULL, (tvunref_t) NULL);
+ }
+ */
+}
+
+void
+commit_leases_readerdry(void *foo)
+{
+ if (outstanding_acks)
+ commit_leases();
+}
+
+/* CC: process the delayed ACK responses:
+ - send out the ACK packets
+ - move the queue slots to the free list
+ */
+void
+flush_ackqueue(void *foo)
+{
+ struct leasequeue *ack, *p;
+ /* process from bottom to retain packet order */
+ for (ack = ackqueue_tail ; ack ; ack = p) {
+ p = ack->prev;
+ dhcp_reply(ack->lease);
+ ack->next = free_ackqueue;
+ free_ackqueue = ack;
}
+ ackqueue_head = NULL;
+ ackqueue_tail = NULL;
+ outstanding_acks = 0;
}
+#if defined (DEBUG_MEMORY_LEAKAGE_ON_EXIT)
+void
+relinquish_ackqueue(void)
+{
+ struct leasequeue *q, *n;
+
+ for (q = ackqueue ; q ; q = n) {
+ n = q->next;
+ dfree(q, MDL);
+ }
+ for (q = free_ackqueue ; q ; q = n) {
+ n = q->next;
+ dfree(q, MDL);
+ }
+}
+#endif
+
void dhcp_reply (lease)
struct lease *lease;
{
omapi_set_int_value ((omapi_object_t *)dhcp_control_object,
(omapi_object_t *)0, "state", server_running);
+ register_eventhandler(&rw_queue_empty,commit_leases_readerdry);
+
/* Receive packets and dispatch them... */
dispatch ();
data_string_forget (&db, MDL);
}
}
+
+ oc = lookup_option(&server_universe, options, SV_DELAYED_ACK);
+ if (oc &&
+ evaluate_option_cache(&db, NULL, NULL, NULL, options, NULL,
+ &global_scope, oc, MDL)) {
+ if (db.len == 2) {
+ max_outstanding_acks = htons(getUShort(db.data));
+ } else {
+ log_fatal("invalid max delayed ACK count ");
+ }
+ data_string_forget(&db, MDL);
+ }
/* Don't need the options anymore. */
option_state_dereference (&options, MDL);
.\" see ``http://www.vix.com''. To learn more about Nominum, Inc., see
.\" ``http://www.nominum.com''.
.\"
-.\" $Id: dhcpd.conf.5,v 1.91 2007/11/20 18:34:37 dhankins Exp $
+.\" $Id: dhcpd.conf.5,v 1.92 2008/01/21 19:53:21 dhankins Exp $
.\"
.TH dhcpd.conf 5
.SH NAME
.RE
.PP
The
+.I delayed-ack
+statement
+.RS 0.25i
+.PP
+.B delayed-ack \fInumber\fR\fB;\fR
+.PP
+.I Number
+should be an integer value from zero to 2^16-1, and defaults to 28. The
+number represents how many DHCPv4 replies maximum will be queued pending
+transmission until after a database commit event. If this number is
+reached, a database commit event (commonly resulting in fsync() and
+representing a performance penalty) will be made, and the reply packets
+will be transmitted in a batch afterwards. This preserves the RFC2131
+direction that "stable storage" be updated prior to replying to clients.
+Should the DHCPv4 sockets "go dry" (select() returns immediately with no
+read sockets), the commit is made and any queued packets are transmitted.
+.RE
+.PP
+The
.I do-forward-updates
statement
.RS 0.25i
cancel_all_timeouts ();
relinquish_timeouts ();
+ relinquish_ackqueue();
trace_free_all ();
group_dereference (&root_group, MDL);
executable_statement_dereference (&default_classification_rules, MDL);
{ "dhcpv6-lease-file-name", "t", &server_universe, 54, 1 },
{ "dhcpv6-pid-file-name", "t", &server_universe, 55, 1 },
{ "limit-addrs-per-ia", "L", &server_universe, 56, 1 },
+ { "delayed-ack", "S", &server_universe, 57, 1 },
{ NULL, NULL, NULL, 0, 0 }
};