]> git.ipfire.org Git - thirdparty/bind9.git/commitdiff
prepare final release of 9.4.2-P2-W1 v9.4.2-P2-W1
authorEvan Hunt <each@isc.org>
Thu, 4 Sep 2008 05:47:09 +0000 (05:47 +0000)
committerEvan Hunt <each@isc.org>
Thu, 4 Sep 2008 05:47:09 +0000 (05:47 +0000)
CHANGES
lib/isc/include/isc/socket.h
lib/isc/win32/errno2result.c
lib/isc/win32/include/isc/mutex.h
lib/isc/win32/socket.c
lib/isc/win32/time.c

diff --git a/CHANGES b/CHANGES
index 0956017aa4899f3da1c4c22e776012bc84c9e919..33e85e9f9b79c2de708f52e6da135f83af4f9fdf 100644 (file)
--- a/CHANGES
+++ b/CHANGES
@@ -1,11 +1,19 @@
        --- 9.4.2-P2-W1 released ---
 
-2420.  [bug]           Windows socket handling cleanup.  Let the IO
+2432.  [bug]           More Windows socket handling improvements.  Stop
+                       using I/O events and use IO Completion Ports
+                       throughout.  Rewrite the receive path logic to make
+                       it easier to support multiple simultaneous
+                       requestrs in the future.  Add stricter consistency
+                        checking as a compile-time option (define
+                        ISC_SOCKET_CONSISTENCY_CHECKS; defaults to off).
+
+2420.  [bug]           Windows socket handling cleanup.  Let the io
                        completion event send out cancelled read/write
-                       done events, which keeps us from writing to memory
-                       we no longer own.  Add debugging socket_log()
-                        function.  Rework TCP socket handling to avoid
-                        leaking sockets.
+                       done events, which keeps us from writing to memeory
+                       we no longer have ownership of.  Add debugging
+                       socket_log() function.  Rework TCP socket handling
+                       to not leak sockets.
 
        --- 9.4.2-P2 released ---
 
index 951a06316ed2cc11a85b0b595cc551b25da00961..26393a0b100941f01820ca8fd7054d1642a4180c 100644 (file)
@@ -15,7 +15,7 @@
  * PERFORMANCE OF THIS SOFTWARE.
  */
 
-/* $Id: socket.h,v 1.57.18.6.46.4 2008/07/23 23:16:43 marka Exp $ */
+/* $Id: socket.h,v 1.57.18.6.46.4.6.1 2008/09/04 05:47:09 each Exp $ */
 
 #ifndef ISC_SOCKET_H
 #define ISC_SOCKET_H 1
@@ -165,6 +165,8 @@ typedef enum {
 /*@{*/
 /*!
  * What I/O events to cancel in isc_socket_cancel() calls.
+ * ISC_SOCKCANCEL_ALL *must* contain all the possible bits,
+ * and only those bits.
  */
 #define ISC_SOCKCANCEL_RECV    0x00000001      /*%< cancel recv */
 #define ISC_SOCKCANCEL_SEND    0x00000002      /*%< cancel send */
index 1836270be3cd6866958840ed20607263de53c37f..6f4279ddad1986c9b73276915946d46bbf5907ba 100644 (file)
@@ -15,7 +15,7 @@
  * PERFORMANCE OF THIS SOFTWARE.
  */
 
-/* $Id: errno2result.c,v 1.9.18.3.62.1 2008/08/21 00:50:09 each Exp $ */
+/* $Id: errno2result.c,v 1.9.18.3.62.2 2008/09/04 05:47:09 each Exp $ */
 
 #include <config.h>
 
@@ -64,29 +64,36 @@ isc__errno2resultx(int posixerrno, const char *file, int line) {
        case ERROR_CANCELLED:
                return (ISC_R_CANCELED);
        case ERROR_CONNECTION_REFUSED:
+       case WSAECONNREFUSED:
                return (ISC_R_CONNREFUSED);
+       case WSAENOTCONN:
        case ERROR_CONNECTION_INVALID:
                return (ISC_R_NOTCONNECTED);
        case ERROR_HOST_UNREACHABLE:
+       case WSAEHOSTUNREACH:
                return (ISC_R_HOSTUNREACH);
        case ERROR_NETWORK_UNREACHABLE:
+       case WSAENETUNREACH:
                return (ISC_R_NETUNREACH);
        case ERROR_NO_NETWORK:
                return (ISC_R_NETUNREACH);
-       case ERROR_OPERATION_ABORTED:
-               return (ISC_R_CONNECTIONRESET);
        case ERROR_PORT_UNREACHABLE:
                return (ISC_R_HOSTUNREACH);
+       case WSAECONNRESET:
+       case WSAENETRESET:
+       case WSAECONNABORTED:
+       case WSAEDISCON:
+       case ERROR_OPERATION_ABORTED:
+       case ERROR_CONNECTION_ABORTED:
        case ERROR_REQUEST_ABORTED:
                return (ISC_R_CONNECTIONRESET);
        case WSAEADDRNOTAVAIL:
                return (ISC_R_ADDRNOTAVAIL);
-       case WSAEHOSTUNREACH:
-               return (ISC_R_HOSTUNREACH);
+       case ERROR_NETNAME_DELETED:
+       case WSAENETDOWN:
+               return (ISC_R_NETUNREACH);
        case WSAEHOSTDOWN:
                return (ISC_R_HOSTUNREACH);
-       case WSAENETUNREACH:
-               return (ISC_R_NETUNREACH);
        case WSAENOBUFS:
                return (ISC_R_NORESOURCES);
        default:
index bae0ed38e7b2d9b8cb4f33c3b37a67949843f868..86830b66c10df9b80ef145b0322d07f011438ded 100644 (file)
@@ -15,7 +15,7 @@
  * PERFORMANCE OF THIS SOFTWARE.
  */
 
-/* $Id: mutex.h,v 1.17 2004/03/05 05:12:05 marka Exp $ */
+/* $Id: mutex.h,v 1.17.956.1 2008/09/04 05:47:09 each Exp $ */
 
 #ifndef ISC_MUTEX_H
 #define ISC_MUTEX_H 1
 
 typedef CRITICAL_SECTION isc_mutex_t;
 
-/* This definition is here since WINBASE.H omits it for some reason */
-
+/* 
+ * This definition is here since somve versions of WINBASE.H
+ * omits it for some reason
+ */
+#if(_WIN32_WINNT < 0x0400)
 WINBASEAPI BOOL WINAPI
 TryEnterCriticalSection(LPCRITICAL_SECTION lpCriticalSection);
+#endif /* _WIN32_WINNT < 0x0400 */
 
 #define isc_mutex_init(mp) \
        (InitializeCriticalSection((mp)), ISC_R_SUCCESS)
@@ -46,6 +50,6 @@ TryEnterCriticalSection(LPCRITICAL_SECTION lpCriticalSection);
 /*
  * This is a placeholder for now since we are not keeping any mutex stats
  */
-#define isc_mutex_stats(fp)
+#define isc_mutex_stats(fp) do {} while (0)
 
 #endif /* ISC_MUTEX_H */
index fe368ec6ff8290e194f3e121eb03065ad174274c..cb7022f8a59e629cae7c74771ad31773e16bd792 100644 (file)
  * PERFORMANCE OF THIS SOFTWARE.
  */
 
-/* $Id: socket.c,v 1.30.18.20.12.5.6.1 2008/08/21 00:50:09 each Exp $ */
+/* $Id: socket.c,v 1.30.18.20.12.5.6.2 2008/09/04 05:47:09 each Exp $ */
 
-/* This code has been rewritten to take advantage of Windows Sockets
- * I/O Completion Ports and Events. I/O Completion Ports is ONLY
- * available on Windows NT, Windows 2000 and Windows XP series of
- * the Windows Operating Systems. In CANNOT run on Windows 95, Windows 98
- * or the follow-ons to those Systems.
+/* This code uses functions which are only available on Server 2003 and
+ * higher, and Windows XP and higher.
  *
  * This code is by nature multithreaded and takes advantage of various
  * features to pass on information through the completion port for
- * when I/O is completed.  All sends and receives are completed through
- * the completion port. Due to an implementation bug in Windows 2000,
- * Service Pack 2 must installed on the system for this code to run correctly.
- * For details on this problem see Knowledge base article Q263823.
- * The code checks for this. The number of Completion Port Worker threads
- * used is the total number of CPU's + 1. This increases the likelihood that
- * a Worker Thread is available for processing a completed request.
+ * when I/O is completed.  All sends, receives, accepts, and connects are
+ * completed through the completion port.
  *
- * All accepts and connects are accomplished through the WSAEventSelect()
- * function and the event_wait loop. Events are added to and deleted from
- * each event_wait thread via a common event_update stack owned by the socket
- * manager. If the event_wait thread runs out of array space in the events
- * array it will look for another event_wait thread to add the event. If it
- * fails to find another one it will create a new thread to handle the
- * outstanding event.
- *
- * A future enhancement is to use AcceptEx to take avantage of Overlapped
- * I/O which allows for enhanced performance of TCP connections.
- * This will also reduce the number of events that are waited on by the
- * event_wait threads to just the connect sockets and reduce the number
- * additional threads required.
+ * The number of Completion Port Worker threads used is the total number
+ * of CPU's + 1. This increases the likelihood that a Worker Thread is
+ * available for processing a completed request.
  *
  * XXXPDM 5 August, 2002
  */
 #include <isc/util.h>
 #include <isc/win32os.h>
 
+#include <mswsock.h>
+
 #include "errno2result.h"
 
+/*
+ * How in the world can Microsoft exist with APIs like this?
+ * We can't actually call this directly, because it turns out
+ * no library exports this function.  Instead, we need to
+ * issue a runtime call to get the address.
+ */
+LPFN_CONNECTEX ISCConnectEx;
+LPFN_ACCEPTEX ISCAcceptEx;
+LPFN_GETACCEPTEXSOCKADDRS ISCGetAcceptExSockaddrs;
+
 /*
  * 0 = no debugging, 1 = write to file "socket.log" in working directory.
  */
 #define XXXMLG_DEBUG 0
-
 #if XXXMLG_DEBUG
 FILE *logfile = NULL;
 #endif
 
+/*
+ * Run expensive internal consistancy checks.
+ */
+#ifdef ISC_SOCKET_CONSISTENCY_CHECKS
+#define CONSISTENT(sock) consistent(sock)
+#else
+#define CONSISTENT(sock) do {} while (0)
+#endif
+static void consistent(isc_socket_t *sock);
+
 /*
  * Define this macro to control the behavior of connection
  * resets on UDP sockets.  See Microsoft KnowledgeBase Article Q263823
@@ -142,6 +145,7 @@ FILE *logfile = NULL;
 #define DOIO_HARD        2       /* i/o error, event sent */
 #define DOIO_EOF         3       /* EOF, no event sent */
 #define DOIO_PENDING     4       /* status when i/o is in process */
+#define DOIO_NEEDMORE    5       /* IO was processed, but we need more due to minimum */
 
 #define DLVL(x) ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_DEBUG(x)
 
@@ -166,6 +170,19 @@ FILE *logfile = NULL;
 
 typedef isc_event_t intev_t;
 
+/*
+ * Socket State
+ */
+enum {
+  SOCK_INITIALIZED,    /* Socket Initialized */
+  SOCK_OPEN,           /* Socket opened but nothing yet to do */
+  SOCK_DATA,           /* Socket sending or receiving data */
+  SOCK_LISTEN,         /* TCP Socket listening for connects */
+  SOCK_ACCEPT,         /* TCP socket is waiting to accept */
+  SOCK_CONNECT,                /* TCP Socket connecting */
+  SOCK_CLOSED,         /* Socket has been closed */
+};
+
 #define SOCKET_MAGIC           ISC_MAGIC('I', 'O', 'i', 'o')
 #define VALID_SOCKET(t)                ISC_MAGIC_VALID(t, SOCKET_MAGIC)
 
@@ -190,16 +207,13 @@ typedef isc_event_t intev_t;
  * Message header for recvmsg and sendmsg calls.
  * Used value-result for recvmsg, value only for sendmsg.
  */
-
-
 struct msghdr {
-        void   *msg_name;              /* optional address */
-        u_int   msg_namelen;            /* size of address */
+       SOCKADDR to_addr;               /* UDP send/recv address */
+       int      to_addr_len;           /* length of the address */
         WSABUF  *msg_iov;              /* scatter/gather array */
         u_int   msg_iovlen;             /* # elements in msg_iov */
         void   *msg_control;           /* ancillary data, see below */
         u_int   msg_controllen;         /* ancillary data buffer len */
-        int     msg_flags;              /* flags on received message */
        int     msg_totallen;           /* total length of this message */
 } msghdr;
        
@@ -220,45 +234,63 @@ struct isc_socket {
        isc_socketmgr_t        *manager;
        isc_mutex_t             lock;
        isc_sockettype_t        type;
-       OVERLAPPED              overlapped;
+
        /* Pointers to scatter/gather buffers */
        WSABUF                  iov[ISC_SOCKET_MAXSCATTERGATHER];
-       WSAEVENT                hEvent;         /* Event Handle */
-       long                    wait_type;      /* Events to wait on */
-       WSAEVENT                hAlert;         /* Alert Event Handle */
-       DWORD                   evthread_id;    /* Event Thread Id for socket */
 
        /* Locked by socket lock. */
        ISC_LINK(isc_socket_t)  link;
-       unsigned int            references;
-       SOCKET                  fd;
-       int                     pf;
+       unsigned int            references; /* EXTERNAL references */
+       SOCKET                  fd;     /* file handle */
+       int                     pf;     /* protocol family */
+
+       /*
+        * Each recv() call uses this buffer.  It is a per-socket receive
+        * buffer that allows us to decouple the system recv() from the
+        * recv_list done events.  This means the items on the recv_list
+        * can be removed without having to cancel pending system recv()
+        * calls.  It also allows us to read-ahead in some cases.
+        */
+       struct {
+               SOCKADDR        from_addr;         // UDP send/recv address
+               int             from_addr_len;     // length of the address
+               char            *base;             // the base of the buffer
+               char            *consume_position; // where to start copying data from next
+               unsigned int    len;               // the actual size of this buffer
+               unsigned int    remaining;         // the number of bytes remaining
+       } recvbuf;
 
        ISC_LIST(isc_socketevent_t)             send_list;
        ISC_LIST(isc_socketevent_t)             recv_list;
        ISC_LIST(isc_socket_newconnev_t)        accept_list;
        isc_socket_connev_t                    *connect_ev;
 
-       /*
-        * Internal events.  Posted when a descriptor is readable or
-        * writable.  These are statically allocated and never freed.
-        * They will be set to non-purgable before use.
-        */
-       intev_t                 readable_ev;
-       intev_t                 writable_ev;
-
        isc_sockaddr_t          address;  /* remote address */
 
-       unsigned int            pending_close : 1,
-                               pending_accept : 1,
-                               iocp : 1,       /* I/O Completion Port */
-                               listener : 1,   /* listener socket */
+       unsigned int            listener : 1,   /* listener socket */
                                connected : 1,
-                               connecting : 1, /* connect pending */
-                               bound : 1,      /* bound to local addr */
-                               pending_free: 1;
-       unsigned int            pending_recv;
-       unsigned int            pending_send;
+                               pending_connect : 1, /* connect pending */
+                               bound : 1;      /* bound to local addr */
+
+       unsigned int            pending_iocp;  /* Should equal the counters below. Debug. */
+       unsigned int            pending_recv;  /* Number of outstanding recv() calls. */
+       unsigned int            pending_send;  /* Number of outstanding send() calls. */
+       unsigned int            pending_accept; /* Number of outstanding accept() calls. */
+       unsigned int            state; /* Socket state. Debugging and consistency checking. */
+       int                     state_lineno;  /* line which last touched state */
+};
+
+#define _set_state(sock, _state) do { (sock)->state = (_state); (sock)->state_lineno = __LINE__; } while (0)
+
+/*
+ * Buffer structure
+ */
+typedef struct buflist buflist_t;
+
+struct buflist {
+       void                    *buf;
+       unsigned int            buflen;
+       ISC_LINK(buflist_t)     link;
 };
 
 /*
@@ -267,10 +299,15 @@ struct isc_socket {
 
 static HANDLE hHeapHandle = NULL;
 typedef struct IoCompletionInfo {
-       OVERLAPPED               overlapped;
-       isc_socketevent_t       *dev;
-       int                      request_type;
-       struct msghdr            messagehdr;
+       OVERLAPPED              overlapped;
+       isc_socketevent_t       *dev;  /* send()/recv() done event */
+       isc_socket_connev_t     *cdev; /* connect() done event */
+       isc_socket_newconnev_t  *adev; /* accept() done event */
+       void                    *acceptbuffer;
+       DWORD                   received_bytes;
+       int                     request_type;
+       struct msghdr           messagehdr;
+       ISC_LIST(buflist_t)     bufferlist;     /*%< list of buffers */
 } IoCompletionInfo;
 
 /*
@@ -280,52 +317,6 @@ typedef struct IoCompletionInfo {
  */
 #define MAX_IOCPTHREADS 20
 
-/*
- * event_change structure to handle adds and deletes from the list of
- * events in the Wait
- */
-typedef struct event_change event_change_t;
-
-struct event_change {
-       isc_socket_t                    *sock;
-       WSAEVENT                        hEvent;
-       DWORD                           evthread_id;
-       SOCKET                          fd;
-       unsigned int                    action;
-       ISC_LINK(event_change_t)        link;
-};
-
-/*
- * Note: We are using an array here since *WaitForMultiple* wants an array
- * WARNING: This value may not be greater than 64 since the
- * WSAWaitForMultipleEvents function is limited to 64 events.
- */
-
-#define MAX_EVENTS 64
-
-/*
- * List of events being waited on and their associated sockets
- */
-typedef struct sock_event_list {
-       int max_event;
-       int total_events;
-       isc_socket_t                    *aSockList[MAX_EVENTS];
-       WSAEVENT                        aEventList[MAX_EVENTS];
-} sock_event_list;
-
-/*
- * Thread Event structure for managing the threads handling events
- */
-typedef struct events_thread events_thread_t;
-
-struct events_thread {
-       isc_thread_t                    thread_handle;  /* Thread's handle */
-       DWORD                           thread_id;      /* Thread's id */
-       sock_event_list                 sockev_list;
-       isc_socketmgr_t                 *manager;
-       ISC_LINK(events_thread_t)       link;
-};
-
 #define SOCKET_MANAGER_MAGIC   ISC_MAGIC('I', 'O', 'm', 'g')
 #define VALID_MANAGER(m)       ISC_MAGIC_VALID(m, SOCKET_MANAGER_MAGIC)
 
@@ -335,21 +326,27 @@ struct isc_socketmgr {
        isc_mem_t                      *mctx;
        isc_mutex_t                     lock;
        /* Locked by manager lock. */
-       ISC_LIST(event_change_t)        event_updates;
        ISC_LIST(isc_socket_t)          socklist;
-       int                             event_written;
-       WSAEVENT                        prime_alert;
        isc_boolean_t                   bShutdown;
-       ISC_LIST(events_thread_t)       ev_threads;
        isc_condition_t                 shutdown_ok;
        HANDLE                          hIoCompletionPort;
        int                             maxIOCPThreads;
        HANDLE                          hIOCPThreads[MAX_IOCPTHREADS];
        DWORD                           dwIOCPThreadIds[MAX_IOCPTHREADS];
-       unsigned int                    totalHandles;
-       unsigned int                    totalSockets;
-       unsigned int                    totalHandleRequests;
-       unsigned int                    iocp_total;
+
+       /*
+        * Debugging.
+        * Modified by InterlockedIncrement() and InterlockedDecrement()
+        */
+       LONG                            totalSockets;
+       LONG                            iocp_total;
+};
+
+enum {
+       SOCKET_RECV,
+       SOCKET_SEND,
+       SOCKET_ACCEPT,
+       SOCKET_CONNECT
 };
 
 /*
@@ -358,21 +355,20 @@ struct isc_socketmgr {
 #define MAXSCATTERGATHER_SEND  (ISC_SOCKET_MAXSCATTERGATHER)
 #define MAXSCATTERGATHER_RECV  (ISC_SOCKET_MAXSCATTERGATHER)
 
-static isc_threadresult_t WINAPI event_wait(void *uap);
 static isc_threadresult_t WINAPI SocketIoThread(LPVOID ThreadContext);
-static void free_socket(isc_socket_t **);
-
-enum {
-       SOCKET_RECV,
-       SOCKET_SEND,
-};
-
-enum {
-       EVENT_ADD,
-       EVENT_DELETE
-};
+static void maybe_free_socket(isc_socket_t **, int);
+static void free_socket(isc_socket_t **, int);
+static isc_boolean_t senddone_is_active(isc_socket_t *sock, isc_socketevent_t *dev);
+static isc_boolean_t acceptdone_is_active(isc_socket_t *sock, isc_socket_newconnev_t *dev);
+static isc_boolean_t connectdone_is_active(isc_socket_t *sock, isc_socket_connev_t *dev);
+static void send_recvdone_event(isc_socket_t *sock, isc_socketevent_t **dev);
+static void send_senddone_event(isc_socket_t *sock, isc_socketevent_t **dev);
+static void send_acceptdone_event(isc_socket_t *sock, isc_socket_newconnev_t **adev);
+static void send_connectdone_event(isc_socket_t *sock, isc_socket_connev_t **cdev);
+static void send_recvdone_abort(isc_socket_t *sock, isc_result_t result);
+static void queue_receive_event(isc_socket_t *sock, isc_task_t *task, isc_socketevent_t *dev);
+static void queue_receive_request(isc_socket_t *sock);
 
-#if defined(ISC_SOCKET_DEBUG)
 /*
  * This is used to dump the contents of the sock structure
  * You should make sure that the sock is locked before
@@ -383,26 +379,27 @@ void
 sock_dump(isc_socket_t *sock) {
        isc_socketevent_t *ldev;
        isc_socket_newconnev_t *ndev;
+
+#if 0
        isc_sockaddr_t addr;
        char socktext[256];
 
-
        isc_socket_getpeername(sock, &addr);
        isc_sockaddr_format(&addr, socktext, sizeof(socktext));
        printf("Remote Socket: %s\n", socktext);
        isc_socket_getsockname(sock, &addr);
        isc_sockaddr_format(&addr, socktext, sizeof(socktext));
        printf("This Socket: %s\n", socktext);
+#endif
 
        printf("\n\t\tSock Dump\n");
        printf("\t\tfd: %u\n", sock->fd);
        printf("\t\treferences: %d\n", sock->references);
        printf("\t\tpending_accept: %d\n", sock->pending_accept);
-       printf("\t\tpending_close: %d\n", sock->pending_close);
-       printf("\t\tconnecting: %d\n", sock->connecting);
+       printf("\t\tconnecting: %d\n", sock->pending_connect);
        printf("\t\tconnected: %d\n", sock->connected);
        printf("\t\tbound: %d\n", sock->bound);
-       printf("\t\tiocp: %d\n", sock->iocp);
+       printf("\t\tpending_iocp: %d\n", sock->pending_iocp);
        printf("\t\tsocket type: %d\n", sock->type);
 
        printf("\n\t\tSock Recv List\n");
@@ -411,12 +408,14 @@ sock_dump(isc_socket_t *sock) {
                printf("\t\tdev: %p\n", ldev);
                ldev = ISC_LIST_NEXT(ldev, ev_link);
        }
+
        printf("\n\t\tSock Send List\n");
        ldev = ISC_LIST_HEAD(sock->send_list);
        while (ldev != NULL) {
                printf("\t\tdev: %p\n", ldev);
                ldev = ISC_LIST_NEXT(ldev, ev_link);
        }
+
        printf("\n\t\tSock Accept List\n");
        ndev = ISC_LIST_HEAD(sock->accept_list);
        while (ndev != NULL) {
@@ -424,7 +423,6 @@ sock_dump(isc_socket_t *sock) {
                ndev = ISC_LIST_NEXT(ndev, ev_link);
        }
 }
-#endif
 
 static void
 socket_log(int lineno, isc_socket_t *sock, isc_sockaddr_t *address,
@@ -474,7 +472,7 @@ iocompletionport_createthreads(int total_threads, isc_socketmgr_t *manager) {
                manager->hIOCPThreads[i] = CreateThread(NULL, 0, SocketIoThread,
                                                manager, 0,
                                                &manager->dwIOCPThreadIds[i]);
-               if(manager->hIOCPThreads[i] == NULL) {
+               if (manager->hIOCPThreads[i] == NULL) {
                        errval = GetLastError();
                        isc__strerror(errval, strbuf, sizeof(strbuf));
                        FATAL_ERROR(__FILE__, __LINE__,
@@ -482,6 +480,7 @@ iocompletionport_createthreads(int total_threads, isc_socketmgr_t *manager) {
                                ISC_MSG_FAILED,
                                "Can't create IOCP thread: %s"),
                                strbuf);
+                       exit(1);
                }
        }
 }
@@ -499,14 +498,25 @@ iocompletionport_init(isc_socketmgr_t *manager) {
         * Create a private heap to handle the socket overlapped structure
         * The miniumum number of structures is 10, there is no maximum
         */
-       hHeapHandle = HeapCreate(0, 10*sizeof(IoCompletionInfo), 0);
-       manager->maxIOCPThreads = min(isc_os_ncpus() + 1,
-                                       MAX_IOCPTHREADS);
+       hHeapHandle = HeapCreate(0, 10 * sizeof(IoCompletionInfo), 0);
+       if (hHeapHandle == NULL) {
+               errval = GetLastError();
+               isc__strerror(errval, strbuf, sizeof(strbuf));
+               FATAL_ERROR(__FILE__, __LINE__,
+                           isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
+                                          ISC_MSG_FAILED,
+                                          "HeapCreate() failed during "
+                                          "initialization: %s"),
+                           strbuf);
+               exit(1);
+       }
+
+       manager->maxIOCPThreads = min(isc_os_ncpus() + 1, MAX_IOCPTHREADS);
 
        /* Now Create the Completion Port */
        manager->hIoCompletionPort = CreateIoCompletionPort(
-                                    INVALID_HANDLE_VALUE, NULL,
-                                    0, manager->maxIOCPThreads);
+                       INVALID_HANDLE_VALUE, NULL,
+                       0, manager->maxIOCPThreads);
        if (manager->hIoCompletionPort == NULL) {
                errval = GetLastError();
                isc__strerror(errval, strbuf, sizeof(strbuf));
@@ -532,571 +542,257 @@ iocompletionport_init(isc_socketmgr_t *manager) {
 void
 iocompletionport_update(isc_socket_t *sock) {
        HANDLE hiocp;
+       char strbuf[ISC_STRERRORSIZE];
+
        REQUIRE(VALID_SOCKET(sock));
-       INSIST(sock->iocp == 0);
 
-       sock->iocp = 1;
        hiocp = CreateIoCompletionPort((HANDLE)sock->fd,
-               sock->manager->hIoCompletionPort, (DWORD)sock,
-               sock->manager->maxIOCPThreads);
-       InterlockedIncrement(&sock->manager->iocp_total);
-}
-
-isc_result_t
-socket_event_minit(sock_event_list *evlist) {
-       BOOL bReset;
-       int i;
-       int stat;
-       WSAEVENT hEvent;
-       char strbuf[ISC_STRERRORSIZE];
-
-       REQUIRE(evlist != NULL);
-       /* Initialize the Event List */
-       evlist->max_event = 0;
-       evlist->total_events = 0;
-       for (i = 0; i < MAX_EVENTS; i++) {
-               evlist->aSockList[i] = NULL;
-               evlist->aEventList[i] = (WSAEVENT) 0;
-       }
+               sock->manager->hIoCompletionPort, (ULONG_PTR)sock, 0);
 
-       /*
-        * The event list needs its own event handle so that when we
-        * want to change the list the event loop can be notified.
-        */
-       hEvent = WSACreateEvent();
-       if (hEvent == WSA_INVALID_EVENT) {
-               stat = WSAGetLastError();
-               isc__strerror(stat, strbuf, sizeof(strbuf));
+       if (hiocp == NULL) {
+               DWORD errval = GetLastError();
+               isc__strerror(errval, strbuf, sizeof(strbuf));
                isc_log_iwrite(isc_lctx,
                                ISC_LOGCATEGORY_GENERAL,
                                ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR,
                                isc_msgcat, ISC_MSGSET_SOCKET,
                                ISC_MSG_TOOMANYHANDLES,
-                               "%s: too many open WSA event handles: %s",
-                               "WSACreateEvent", strbuf);
-               return (ISC_R_UNEXPECTED);
+                               "iocompletionport_update: failed to open"
+                               " io completion port: %s",
+                               strbuf);
+
+               /* XXXMLG temporary hack to make failures detected.
+                * This function should return errors to the caller, not
+                * exit here.
+                */
+               FATAL_ERROR(__FILE__, __LINE__,
+                               isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
+                               ISC_MSG_FAILED,
+                               "CreateIoCompletionPort() failed "
+                               "during initialization: %s"),
+                               strbuf);
+               exit(1);
        }
 
-       evlist->aEventList[0] = hEvent;
-       (evlist->max_event)++;
-       bReset = WSAResetEvent(evlist->aEventList[0]);
-       return (ISC_R_SUCCESS);
+       InterlockedIncrement(&sock->manager->iocp_total);
 }
+
 /*
- * Event Thread Initialization
+ * Routine to cleanup and then close the socket.
+ * Only close the socket here if it is NOT associated
+ * with an event, otherwise the WSAWaitForMultipleEvents
+ * may fail due to the fact that the the Wait should not
+ * be running while closing an event or a socket.
+ * The socket is locked before calling this function
  */
-isc_result_t
-event_thread_create(events_thread_t **evthreadp, isc_socketmgr_t *manager) {
-       events_thread_t *evthread;
+void
+socket_close(isc_socket_t *sock) {
 
-       REQUIRE(VALID_MANAGER(manager));
-       REQUIRE(evthreadp != NULL && *evthreadp == NULL);
+       REQUIRE(sock != NULL);
 
-       evthread = isc_mem_get(manager->mctx, sizeof(*evthread));
-       if (socket_event_minit(&evthread->sockev_list) != ISC_R_SUCCESS) {
-               isc_mem_put(manager->mctx, evthread, sizeof(*evthread));
-               return (ISC_R_UNEXPECTED);
+       if (sock->fd != INVALID_SOCKET) {
+               closesocket(sock->fd);
+               sock->fd = INVALID_SOCKET;
+               _set_state(sock, SOCK_CLOSED);
+               InterlockedDecrement(&sock->manager->totalSockets);
        }
-       ISC_LINK_INIT(evthread, link);
-       evthread->manager = manager;
+}
 
-       ISC_LIST_APPEND(manager->ev_threads, evthread, link);
+static isc_once_t initialise_once = ISC_ONCE_INIT;
+static isc_boolean_t initialised = ISC_FALSE;
 
-       /*
-        * Start up the event wait thread.
-        */
-       if (isc_thread_create(event_wait, evthread, &evthread->thread_handle) !=
-           ISC_R_SUCCESS) {
-               isc_mem_put(manager->mctx, evthread, sizeof(*evthread));
-               UNEXPECTED_ERROR(__FILE__, __LINE__,
-                                "isc_thread_create() %s",
-                                isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
-                                               ISC_MSG_FAILED, "failed"));
-               return (ISC_R_UNEXPECTED);
-       }
-       *evthreadp = evthread;
-       return (ISC_R_SUCCESS);
-}
-/*
- * Locate a thread with space for additional events or create one if
- * necessary. The manager is locked at this point so the information
- * cannot be changed by another thread while we are searching.
- */
-void
-locate_available_thread(isc_socketmgr_t *manager) {
-       events_thread_t *evthread;
-       DWORD threadid = GetCurrentThreadId();
+static void
+initialise(void) {
+       WORD wVersionRequested;
+       WSADATA wsaData;
+       int err;
+       SOCKET sock;
+       GUID GUIDConnectEx = WSAID_CONNECTEX;
+       GUID GUIDAcceptEx = WSAID_ACCEPTEX;
+       GUID GUIDGetAcceptExSockaddrs = WSAID_GETACCEPTEXSOCKADDRS;
+       DWORD dwBytes;
 
-       evthread = ISC_LIST_HEAD(manager->ev_threads);
-       while (evthread != NULL) {
-               /*
-                * We need to find a thread with space to add an event
-                * If we find it, alert it to process the event change
-                * list
-                */
-               if(threadid != evthread->thread_id &&
-                       evthread->sockev_list.max_event < MAX_EVENTS) {
-                       WSASetEvent(evthread->sockev_list.aEventList[0]);
-                       return;
-               }
-               evthread = ISC_LIST_NEXT(evthread, link);
+       /* Need Winsock 2.2 or better */
+       wVersionRequested = MAKEWORD(2, 2);
+
+       err = WSAStartup(wVersionRequested, &wsaData);
+       if (err != 0) {
+               char strbuf[ISC_STRERRORSIZE];
+               isc__strerror(err, strbuf, sizeof(strbuf));
+               FATAL_ERROR(__FILE__, __LINE__, "WSAStartup() %s: %s",
+                           isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
+                                          ISC_MSG_FAILED, "failed"),
+                           strbuf);
+               exit(1);
        }
        /*
-        * We need to create a new thread as other threads are full.
-        * If we succeed in creating the thread, alert it to
-        * process the event change list since it will have space.
-        * If we are unable to create one, the event will stay on the
-        * list and the next event_wait thread will try again to add
-        * the event. It will call here again if it has no space.
+        * The following APIs do not exist as functions in a library, but we must
+        * ask winsock for them.  They are "extensions" -- but why they cannot be
+        * actual functions is beyond me.  So, ask winsock for the pointers to the
+        * functions we need.
         */
-       if (event_thread_create(&evthread, manager) == ISC_R_SUCCESS) {
-               WSASetEvent(evthread->sockev_list.aEventList[0]);
-       }
+       sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+       INSIST(sock != INVALID_SOCKET);
+       err = WSAIoctl(sock,  SIO_GET_EXTENSION_FUNCTION_POINTER,
+                &GUIDConnectEx, sizeof(GUIDConnectEx),
+                &ISCConnectEx, sizeof(ISCConnectEx),
+                &dwBytes, NULL, NULL);
+       INSIST(err == 0);
 
-}
+       err = WSAIoctl(sock,  SIO_GET_EXTENSION_FUNCTION_POINTER,
+                &GUIDAcceptEx, sizeof(GUIDAcceptEx),
+                &ISCAcceptEx, sizeof(ISCAcceptEx),
+                &dwBytes, NULL, NULL);
+       INSIST(err == 0);
 
-isc_boolean_t
-socket_eventlist_add(event_change_t *evchange, sock_event_list *evlist,
-                    isc_socketmgr_t *manager) {
-       int max_event;
-       isc_socket_t *sock;
-       REQUIRE(evchange != NULL);
+       err = WSAIoctl(sock,  SIO_GET_EXTENSION_FUNCTION_POINTER,
+                &GUIDGetAcceptExSockaddrs, sizeof(GUIDGetAcceptExSockaddrs),
+                &ISCGetAcceptExSockaddrs, sizeof(ISCGetAcceptExSockaddrs),
+                &dwBytes, NULL, NULL);
+       INSIST(err == 0);
 
-       sock = evchange->sock;
-       REQUIRE(sock != NULL);
-       REQUIRE(sock->hEvent != NULL);
-       REQUIRE(evlist != NULL);
+       closesocket(sock);
 
-       max_event = evlist->max_event;
-       if(max_event >= MAX_EVENTS) {
-               locate_available_thread(manager);
-               return (ISC_FALSE);
-       }
-       /*
-        * Lock the socket before updating
-        */
-       LOCK(&sock->lock);
-       evlist->aSockList[max_event] = sock;
-       evlist->aEventList[max_event] = sock->hEvent;
-       evlist->max_event++;
-       evlist->total_events++;
-       sock->hAlert = evlist->aEventList[0];
-       sock->evthread_id = GetCurrentThreadId();
-       UNLOCK(&sock->lock);
-       return (ISC_TRUE);
+       initialised = ISC_TRUE;
 }
 
 /*
- * Delete the event from the list
+ * Initialize socket services
  */
-isc_boolean_t
-eventlist_event_delete(isc_socket_t *sock, sock_event_list *evlist,
-                       isc_socketmgr_t *manager)
+void
+InitSockets(void) {
+       RUNTIME_CHECK(isc_once_do(&initialise_once,
+                                  initialise) == ISC_R_SUCCESS);
+       if (!initialised)
+               exit(1);
+}
+
+int
+internal_sendmsg(isc_socket_t *sock, IoCompletionInfo *lpo,
+                struct msghdr *messagehdr, int flags, int *Error)
 {
-       int i;
-       WSAEVENT hEvent;
-       int iEvent = -1;
-       isc_boolean_t dofree = ISC_FALSE;
+       int Result;
+       DWORD BytesSent;
+       DWORD Flags = flags;
+       int total_sent;
 
-       REQUIRE(sock != NULL);
-       REQUIRE(evlist != NULL);
-       REQUIRE(manager != NULL);
-       REQUIRE(sock->hEvent != NULL);
-       hEvent = sock->hEvent;
-
-       /* Find the Event */
-       for (i = 1; i < evlist->max_event; i++) {
-               if (evlist->aEventList[i] == hEvent) {
-                       iEvent = i;
+       *Error = 0;
+       Result = WSASendTo(sock->fd, messagehdr->msg_iov,
+                          messagehdr->msg_iovlen, &BytesSent,
+                          Flags, &messagehdr->to_addr,
+                          messagehdr->to_addr_len, (LPWSAOVERLAPPED)lpo,
+                          NULL);
+
+       total_sent = (int)BytesSent;
+
+       /* Check for errors.*/
+       if (Result == SOCKET_ERROR) {
+               *Error = WSAGetLastError();
+
+               switch (*Error) {
+               case WSA_IO_INCOMPLETE:
+               case WSA_WAIT_IO_COMPLETION:
+               case WSA_IO_PENDING:
+               case NO_ERROR:          /* Strange, but okay */
+                       sock->pending_iocp++;
+                       sock->pending_send++;
+                       break;
+
+               default:
+                       return (-1);
                        break;
                }
+       } else {
+               sock->pending_iocp++;
+               sock->pending_send++;
        }
 
+       if (lpo != NULL)
+               return (0);
+       else
+               return (total_sent);
+}
+
+static void
+queue_receive_request(isc_socket_t *sock) {
+       DWORD Flags = 0;
+       DWORD NumBytes = 0;
+       int total_bytes = 0;
+       int Result;
+       int Error;
+       WSABUF iov[1];
+       IoCompletionInfo *lpo;
+       isc_result_t isc_result;
+
        /*
-        * Actual event start at 1
-        * event at 0 is the thread wakeup
+        * If we already have a receive pending, do nothing.
         */
-       if (iEvent < 1)
-               return (ISC_FALSE);
+       if (sock->pending_recv > 0)
+               return;
 
-       for(i = iEvent; i < (evlist->max_event - 1); i++) {
-               evlist->aEventList[i] = evlist->aEventList[i + 1];
-               evlist->aSockList[i] = evlist->aSockList[i + 1];
-       }
+       /*
+        * If no one is waiting, do nothing.
+        */
+       if (ISC_LIST_EMPTY(sock->recv_list))
+               return;
 
-       evlist->aEventList[evlist->max_event - 1] = 0;
-       evlist->aSockList[evlist->max_event - 1] = NULL;
+       INSIST(sock->recvbuf.remaining == 0);
+       INSIST(sock->fd != INVALID_SOCKET);
 
-       /* Cleanup */
-       WSAEventSelect(sock->fd, hEvent, 0);
-       WSACloseEvent(hEvent);
-       InterlockedDecrement(&sock->manager->totalHandles);
+       iov[0].len = sock->recvbuf.len;
+       iov[0].buf = sock->recvbuf.base;
 
-       LOCK(&sock->lock);
-       sock->hEvent = NULL;
-       sock->hAlert = NULL;
-       sock->wait_type = 0;
+       lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
+                                           HEAP_ZERO_MEMORY,
+                                           sizeof(IoCompletionInfo));
+       RUNTIME_CHECK(lpo != NULL);
+       lpo->request_type = SOCKET_RECV;
 
-       if (sock->pending_close) {
-               sock->pending_close = 0;
-               closesocket(sock->fd);
-               sock->fd = INVALID_SOCKET;
-               InterlockedDecrement(&sock->manager->totalSockets);
-       }
+       sock->recvbuf.from_addr_len = sizeof(sock->recvbuf.from_addr);
 
-       UNLOCK(&sock->lock);
-       evlist->max_event--;
-       evlist->total_events--;
+       Error = 0;
+       Result = WSARecvFrom((SOCKET)sock->fd, iov, 1,
+                            &NumBytes, &Flags,
+                            &sock->recvbuf.from_addr,
+                            &sock->recvbuf.from_addr_len,
+                            (LPWSAOVERLAPPED)lpo, NULL);
 
-       return (ISC_TRUE);
-}
+       /* Check for errors. */
+       if (Result == SOCKET_ERROR) {
+               Error = WSAGetLastError();
 
-/*
- * Note that the eventLock is locked before calling this function.
- */
-isc_boolean_t
-socket_eventlist_delete(event_change_t *evchange, sock_event_list *evlist,
-                       isc_socketmgr_t *manager)
-{
+               switch (Error) {
+               case WSA_IO_PENDING:
+                       sock->pending_iocp++;
+                       sock->pending_recv++;
+                       break;
 
-       REQUIRE(evchange != NULL);
-       /*  Make sure this is the right thread from which to delete the event */
-       if (evchange->evthread_id != GetCurrentThreadId())
-               return (ISC_FALSE);
+               default:
+                       isc_result = isc__errno2result(Result);
+                       if (isc_result == ISC_R_UNEXPECTED)
+                               UNEXPECTED_ERROR(__FILE__, __LINE__,
+                                       "WSARecvFrom: Windows error code: %d, isc result %d",
+                                       Error, isc_result);
+                       send_recvdone_abort(sock, isc_result);
+                       break;
+               }
+       } else {
+               /*
+                * The recv() finished immediately, but we will still get
+                * a completion event.  Rather than duplicate code, let
+                * that thread handle sending the data along its way.
+                */
+               sock->pending_iocp++;
+               sock->pending_recv++;
+       }
+
+       socket_log(__LINE__, sock, NULL, IOEVENT,
+                  isc_msgcat, ISC_MSGSET_SOCKET,
+                  ISC_MSG_DOIORECV,
+                  "queue_io_request: fd %d result %d error %d",
+                  sock->fd, Result, Error);
 
-       return (eventlist_event_delete(evchange->sock, evlist, manager));
-}
-/*
- * Get the event changes off of the list and apply the
- * requested changes. The manager lock is taken out at
- * the start of this function to prevent other event_wait
- * threads processing the same information at the same
- * time. The queue may not be empty on exit since other
- * threads may be involved in processing the queue.
- *
- * The deletes are done first in order that there be space
- * available for the events being added in the same thread
- * in case the event list is almost full. This reduces the
- * probability of having to create another thread which would
- * increase overhead costs.
- */
-isc_result_t
-process_eventlist(sock_event_list *evlist, isc_socketmgr_t *manager) {
-       event_change_t *evchange;
-       event_change_t *next;
-       isc_boolean_t del;
-
-       REQUIRE(evlist != NULL);
-
-       LOCK(&manager->lock);
-
-       /*
-        * First the deletes.
-        */
-       evchange = ISC_LIST_HEAD(manager->event_updates);
-       while (evchange != NULL) {
-               next = ISC_LIST_NEXT(evchange, link);
-               del = ISC_FALSE;
-               if (evchange->action == EVENT_DELETE) {
-                       del = socket_eventlist_delete(evchange, evlist,
-                                                     manager);
-
-                       /*
-                        * Delete only if this thread's socket list was
-                        * updated.
-                        */
-                       if (del) {
-                               ISC_LIST_DEQUEUE(manager->event_updates,
-                                                evchange, link);
-                               HeapFree(hHeapHandle, 0, evchange);
-                               manager->event_written--;
-                       }
-               }
-               evchange = next;
-       }
-
-       /*
-        * Now the adds.
-        */
-       evchange = ISC_LIST_HEAD(manager->event_updates);
-       while (evchange != NULL) {
-               next = ISC_LIST_NEXT(evchange, link);
-               del = ISC_FALSE;
-               if (evchange->action == EVENT_ADD) {
-                       del = socket_eventlist_add(evchange, evlist, manager);
-
-                       /*
-                        * Delete only if this thread's socket list was
-                        * updated.
-                        */
-                       if (del) {
-                               ISC_LIST_DEQUEUE(manager->event_updates,
-                                                evchange, link);
-                               HeapFree(hHeapHandle, 0, evchange);
-                               manager->event_written--;
-                       }
-               }
-               evchange = next;
-       }
-       UNLOCK(&manager->lock);
-       return (ISC_R_SUCCESS);
-}
-
-/*
- * Add the event list changes to the queue and notify the
- * event loop
- */
-static void
-notify_eventlist(isc_socket_t *sock, isc_socketmgr_t *manager,
-                unsigned int action)
-{
-
-       event_change_t *evchange;
-
-       REQUIRE(VALID_MANAGER(manager));
-       REQUIRE(sock != NULL);
-
-       evchange = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY,
-                            sizeof(event_change_t));
-       evchange->sock = sock;
-       evchange->action = action;
-       evchange->hEvent = sock->hEvent;
-       evchange->fd = sock->fd;
-       evchange->evthread_id = sock->evthread_id;
-
-       LOCK(&manager->lock);
-       ISC_LIST_APPEND(manager->event_updates, evchange, link);
-       sock->manager->event_written++;
-       UNLOCK(&manager->lock);
-
-       /* Alert the Wait List */
-       if (sock->hAlert != NULL)
-               WSASetEvent(sock->hAlert);
-       else
-               WSASetEvent(manager->prime_alert);
-}
-
-/*
- * Note that the socket is already locked before calling this function
- */
-isc_result_t
-socket_event_add(isc_socket_t *sock, long type) {
-       int stat;
-       WSAEVENT hEvent;
-       char strbuf[ISC_STRERRORSIZE];
-       const char *msg;
-
-       REQUIRE(sock != NULL);
-
-       hEvent = WSACreateEvent();
-       if (hEvent == WSA_INVALID_EVENT) {
-               stat = WSAGetLastError();
-               isc__strerror(stat, strbuf, sizeof(strbuf));
-               isc_log_iwrite(isc_lctx,
-                               ISC_LOGCATEGORY_GENERAL,
-                               ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR,
-                               isc_msgcat, ISC_MSGSET_SOCKET,
-                               ISC_MSG_TOOMANYHANDLES,
-                               "%s: too many open WSA event handles: %s",
-                               "WSACreateEvent", strbuf);
-               return (ISC_R_UNEXPECTED);
-       }
-       if (WSAEventSelect(sock->fd, hEvent, type) != 0) {
-               stat = WSAGetLastError();
-               isc__strerror(stat, strbuf, sizeof(strbuf));
-               msg = isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
-                                    ISC_MSG_FAILED, "failed");
-               UNEXPECTED_ERROR(__FILE__, __LINE__, "WSAEventSelect: %s: %s",
-                                msg, strbuf);
-               WSACloseEvent(hEvent);
-               return (ISC_R_UNEXPECTED);
-       }
-       sock->hEvent = hEvent;
-       InterlockedIncrement(&sock->manager->totalHandles);
-       InterlockedIncrement(&sock->manager->totalHandleRequests);
-
-       sock->wait_type = type;
-       notify_eventlist(sock, sock->manager, EVENT_ADD);
-       return (ISC_R_SUCCESS);
-}
-
-/*
- * Note that the socket is locked before calling this function
- * Note also that we cannot close the socket here or event handle being
- * used since the event is being waited upon and any change to either
- * will signal the change. The notify_eventlist will take care of
- * these details.
- */
-void
-socket_event_delete(isc_socket_t *sock) {
-
-       REQUIRE(sock != NULL);
-       REQUIRE(sock->hEvent != NULL);
-
-       sock->wait_type = 0;
-       sock->pending_close = 1;
-       notify_eventlist(sock, sock->manager, EVENT_DELETE);
-       sock->evthread_id = 0;
-}
-
-/*
- * Routine to cleanup and then close the socket.
- * Only close the socket here if it is NOT associated
- * with an event, otherwise the WSAWaitForMultipleEvents
- * may fail due to the fact that the the Wait should not
- * be running while closing an event or a socket.
- * The socket is locked before calling this function
- */
-void
-socket_close(isc_socket_t *sock) {
-
-       REQUIRE(sock != NULL);
-
-       if (sock->fd != INVALID_SOCKET) {
-               sock->pending_close = 0;
-               if (sock->hEvent != NULL) {
-                       socket_event_delete(sock);
-               } else  {
-                       closesocket(sock->fd);
-                       sock->fd = INVALID_SOCKET;
-                       InterlockedDecrement(&sock->manager->totalSockets);
-               }
-       }
-}
-
-static isc_once_t initialise_once = ISC_ONCE_INIT;
-static isc_boolean_t initialised = ISC_FALSE;
-
-static void
-initialise(void) {
-       WORD wVersionRequested;
-       WSADATA wsaData;
-       int err;
-
-       /* Need Winsock 2.0 or better */
-       wVersionRequested = MAKEWORD(2, 0);
-
-       err = WSAStartup(wVersionRequested, &wsaData);
-       if (err != 0) {
-               char strbuf[ISC_STRERRORSIZE];
-               isc__strerror(err, strbuf, sizeof(strbuf));
-               FATAL_ERROR(__FILE__, __LINE__, "WSAStartup() %s: %s",
-                           isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
-                                          ISC_MSG_FAILED, "failed"),
-                           strbuf);
-       } else
-               initialised = ISC_TRUE;
-}
-
-/*
- * Initialize socket services
- */
-void
-InitSockets(void) {
-       RUNTIME_CHECK(isc_once_do(&initialise_once,
-                                  initialise) == ISC_R_SUCCESS);
-       if (!initialised)
-               exit(1);
-}
-
-int
-internal_sendmsg(isc_socket_t *sock, IoCompletionInfo *lpo,
-                struct msghdr *messagehdr, int flags, int *Error)
-{
-       int Result;
-       DWORD BytesSent;
-       DWORD Flags = flags;
-       int total_sent;
-
-       *Error = 0;
-       Result = WSASendTo(sock->fd, messagehdr->msg_iov,
-                          messagehdr->msg_iovlen, &BytesSent,
-                          Flags, messagehdr->msg_name,
-                          messagehdr->msg_namelen, (LPOVERLAPPED) lpo,
-                          NULL);
-
-       total_sent = (int) BytesSent;
-
-       /* Check for errors.*/
-       if (Result == SOCKET_ERROR) {
-
-               *Error = WSAGetLastError();
-
-               switch (*Error) {
-               case WSA_IO_INCOMPLETE :
-               case WSA_WAIT_IO_COMPLETION :
-               case WSA_IO_PENDING :
-               case NO_ERROR :         /* Strange, but okay */
-                       sock->pending_send++;
-                       break;
-
-               default :
-                       return (-1);
-                       break;
-               }
-       } else
-               sock->pending_send++;
-       if (lpo != NULL)
-               return (0);
-       else
-               return (total_sent);
-}
-
-int
-internal_recvmsg(isc_socket_t *sock, IoCompletionInfo *lpo,
-                struct msghdr *messagehdr, int flags, int *Error)
-{
-       DWORD Flags = 0;
-       DWORD NumBytes = 0;
-       int total_bytes = 0;
-       int Result;
-
-       *Error = 0;
-       Result = WSARecvFrom((SOCKET) sock->fd,
-                            messagehdr->msg_iov,
-                            messagehdr->msg_iovlen,
-                            &NumBytes,
-                            &Flags,
-                            messagehdr->msg_name,
-                            (int *)&(messagehdr->msg_namelen),
-                            (LPOVERLAPPED) lpo,
-                            NULL);
-
-       total_bytes = (int) NumBytes;
-
-       socket_log(__LINE__, sock, NULL, IOEVENT,
-                  isc_msgcat, ISC_MSGSET_SOCKET,
-                  ISC_MSG_DOIORECV,
-                  "internal_recvmsg: fd %d result %d error %d %d bytes",
-                  sock->fd, Result, *Error, total_bytes);
-
-       /* Check for errors. */
-       if (Result == SOCKET_ERROR) {
-
-               *Error = WSAGetLastError();
-
-               switch (*Error) {
-               case WSA_IO_INCOMPLETE:
-               case WSA_WAIT_IO_COMPLETION:
-               case WSA_IO_PENDING:
-               case NO_ERROR :         /* Strange, but okay */
-                       sock->pending_recv++;
-                       break;
-
-               default :
-                       return (-1);
-                       break;
-               }
-       } else {
-               sock->pending_recv++;
-       }
-
-       /* Return the flags received in header */
-       messagehdr->msg_flags = Flags;
-       if (lpo != NULL)
-               return (-1);
-       else
-               return (total_bytes);
+       CONSISTENT(sock);
 }
 
 static void
@@ -1150,20 +846,26 @@ socket_log(int lineno, isc_socket_t *sock, isc_sockaddr_t *address,
                               msgcat, msgset, message,
                               "socket %p line %d: %s", sock, lineno, msgbuf);
 #if XXXMLG_DEBUG
-               fprintf(logfile, "%s socket %p line %d: %s:\n", timebuf, sock, lineno, msgbuf);
+               if (logfile)
+                       fprintf(logfile, "%s socket %p line %d: %s:\n",
+                               timebuf, sock, lineno, msgbuf);
 #endif
        } else {
                isc_sockaddr_format(address, peerbuf, sizeof(peerbuf));
                isc_log_iwrite(isc_lctx, category, module, level,
                               msgcat, msgset, message,
-                                  "socket %p line %d: %s: %s", sock, lineno, peerbuf, msgbuf);
+                                  "socket %p line %d peer %s: %s", sock, lineno,
+                                  peerbuf, msgbuf);
 #if XXXMLG_DEBUG
-               fprintf(logfile, "%s socket %p line %d: %s: %s\n", timebuf, sock, lineno, peerbuf, msgbuf);
+               if (logfile)
+                       fprintf(logfile, "%s socket %p line %d: %s: %s\n",
+                       timebuf, sock, lineno, peerbuf, msgbuf);
 #endif
        }
 
 #if XXXMLG_DEBUG
-       fflush(logfile);
+       if (logfile)
+               fflush(logfile);
 #endif
 }
 
@@ -1207,7 +909,7 @@ connection_reset_fix(SOCKET fd) {
        BOOL  bNewBehavior = FALSE;
        DWORD status;
 
-       if(isc_win32os_majorversion() < 5)
+       if (isc_win32os_majorversion() < 5)
                return (ISC_R_SUCCESS); /*  NT 4.0 has no problem */
 
        /* disable bad behavior using IOCTL: SIO_UDP_CONNRESET */
@@ -1236,24 +938,21 @@ connection_reset_fix(SOCKET fd) {
  */
 static void
 build_msghdr_send(isc_socket_t *sock, isc_socketevent_t *dev,
-                 struct msghdr *msg, char *cmsg, WSABUF *iov)
+                 struct msghdr *msg, char *cmsg, WSABUF *iov,
+                 IoCompletionInfo  *lpo)
 {
        unsigned int iovcount;
        isc_buffer_t *buffer;
+       buflist_t  *cpbuffer;
        isc_region_t used;
        size_t write_count;
        size_t skip_count;
 
        memset(msg, 0, sizeof(*msg));
 
-       if (sock->type == isc_sockettype_udp) {
-               msg->msg_name = (void *)&dev->address.type.sa;
-               msg->msg_namelen = dev->address.length;
-       } else {
-               msg->msg_name = NULL;
-               msg->msg_namelen = 0;
-       }
-
+       memcpy(&msg->to_addr, &dev->address.type, dev->address.length);
+       msg->to_addr_len = dev->address.length;
+       
        buffer = ISC_LIST_HEAD(dev->bufferlist);
        write_count = 0;
        iovcount = 0;
@@ -1263,7 +962,20 @@ build_msghdr_send(isc_socket_t *sock, isc_socketevent_t *dev,
         */
        if (buffer == NULL) {
                write_count = dev->region.length - dev->n;
-               iov[0].buf = (void *)(dev->region.base + dev->n);
+               cpbuffer = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, sizeof(buflist_t));
+               RUNTIME_CHECK(cpbuffer != NULL);
+               cpbuffer->buf = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, write_count);
+               RUNTIME_CHECK(cpbuffer->buf != NULL);
+
+               socket_log(__LINE__, sock, NULL, TRACE,
+                  isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
+                  "alloc_buffer %p %d %p %d", cpbuffer, sizeof(buflist_t),
+                  cpbuffer->buf, write_count);
+
+               memcpy(cpbuffer->buf,(dev->region.base + dev->n), write_count);
+               cpbuffer->buflen = write_count;
+               ISC_LIST_ENQUEUE(lpo->bufferlist, cpbuffer, link);
+               iov[0].buf = cpbuffer->buf;
                iov[0].len = write_count;
                iovcount = 1;
 
@@ -1289,10 +1001,22 @@ build_msghdr_send(isc_socket_t *sock, isc_socketevent_t *dev,
                isc_buffer_usedregion(buffer, &used);
 
                if (used.length > 0) {
-                       iov[iovcount].buf = (void *)(used.base
-                                                         + skip_count);
+                       int uselen = used.length - skip_count;
+                       cpbuffer = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, sizeof(buflist_t));
+                       RUNTIME_CHECK(cpbuffer != NULL);
+                       cpbuffer->buf = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, uselen);
+                       RUNTIME_CHECK(cpbuffer->buf != NULL);
+
+                       socket_log(__LINE__, sock, NULL, TRACE,
+                          isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
+                          "alloc_buffer %p %d %p %d", cpbuffer, sizeof(buflist_t),
+                          cpbuffer->buf, write_count);
+
+                       memcpy(cpbuffer->buf,(used.base + skip_count), uselen);
+                       cpbuffer->buflen = uselen;
+                       iov[iovcount].buf = cpbuffer->buf;
                        iov[iovcount].len = used.length - skip_count;
-                       write_count += (used.length - skip_count);
+                       write_count += uselen;
                        skip_count = 0;
                        iovcount++;
                }
@@ -1307,99 +1031,14 @@ build_msghdr_send(isc_socket_t *sock, isc_socketevent_t *dev,
        msg->msg_totallen = write_count;
 }
 
-/*
- * Construct an iov array and attach it to the msghdr passed in.  This is
- * the RECV constructor, which will use the available region of the buffer
- * (if using a buffer list) or will use the internal region (if a single
- * buffer I/O is requested).
- *
- * Nothing can be NULL, and the done event must list at least one buffer
- * on the buffer linked list for this function to be meaningful.
- */
-static void
-build_msghdr_recv(isc_socket_t *sock, isc_socketevent_t *dev,
-                 struct msghdr *msg, char *cmsg, WSABUF *iov)
-{
-       unsigned int iovcount;
-       isc_buffer_t *buffer;
-       isc_region_t available;
-       size_t read_count;
-
-       memset(msg, 0, sizeof(struct msghdr));
-
-       if (sock->type == isc_sockettype_udp) {
-               memset(&dev->address, 0, sizeof(dev->address));
-               msg->msg_name = (void *)&dev->address.type.sa;
-               msg->msg_namelen = sizeof(dev->address.type);
-       } else { /* TCP */
-               msg->msg_name = NULL;
-               msg->msg_namelen = 0;
-               dev->address = sock->address;
-       }
-
-       buffer = ISC_LIST_HEAD(dev->bufferlist);
-       read_count = 0;
-
-       /*
-        * Single buffer I/O?  Skip what we've done so far in this region.
-        */
-       if (buffer == NULL) {
-               read_count = dev->region.length - dev->n;
-               iov[0].buf = (void *)(dev->region.base + dev->n);
-               iov[0].len = read_count;
-               iovcount = 1;
-       } else {
-               /*
-                * Multibuffer I/O.
-                * Skip empty buffers.
-                */
-               while (buffer != NULL) {
-                       REQUIRE(ISC_BUFFER_VALID(buffer));
-                       if (isc_buffer_availablelength(buffer) != 0)
-                               break;
-                       buffer = ISC_LIST_NEXT(buffer, link);
-               }
-
-               iovcount = 0;
-               while (buffer != NULL) {
-                       INSIST(iovcount < MAXSCATTERGATHER_RECV);
-
-                       isc_buffer_availableregion(buffer, &available);
-
-                       if (available.length > 0) {
-                               iov[iovcount].buf = (void *)(available.base);
-                               iov[iovcount].len = available.length;
-                               read_count += available.length;
-                               iovcount++;
-                       }
-                       buffer = ISC_LIST_NEXT(buffer, link);
-               }
-       }
-
-       /*
-        * If needed, set up to receive that one extra byte.  Note that
-        * we know there is at least one iov left, since we stole it
-        * at the top of this function.
-        */
-
-       msg->msg_iov = iov;
-       msg->msg_iovlen = iovcount;
-       msg->msg_totallen = read_count;
-}
-
 static void
 set_dev_address(isc_sockaddr_t *address, isc_socket_t *sock,
                isc_socketevent_t *dev)
 {
-       if (sock->type == isc_sockettype_udp) {
-               if (address != NULL)
-                       dev->address = *address;
-               else
-                       dev->address = sock->address;
-       } else if (sock->type == isc_sockettype_tcp) {
-               INSIST(address == NULL);
+       if (address != NULL)
+               dev->address = *address;
+       else
                dev->address = sock->address;
-       }
 }
 
 static void
@@ -1424,7 +1063,7 @@ allocate_socketevent(isc_socket_t *sock, isc_eventtype_t eventtype,
        if (ev == NULL)
                return (NULL);
 
-       ev->result = ISC_R_UNEXPECTED;
+       ev->result = ISC_R_IOERROR; // XXXMLG temporary change to detect failure to set
        ISC_LINK_INIT(ev, ev_link);
        ISC_LIST_INIT(ev->bufferlist);
        ev->region.base = NULL;
@@ -1452,70 +1091,128 @@ dump_msg(struct msghdr *msg, isc_socket_t *sock) {
 }
 #endif
 
-static int
-completeio_recv(isc_socket_t *sock, isc_socketevent_t *dev,
-               struct msghdr *messagehdr, int cc, int recv_errno)
-{
-       size_t actual_count;
-       isc_buffer_t *buffer;
-
-#define SOFT_OR_HARD(_system, _isc) \
-       if (recv_errno == _system) { \
-               if (sock->connected) { \
-                       dev->result = _isc; \
-                       return (DOIO_HARD); \
-               } \
-               return (DOIO_SOFT); \
+/*
+ * map the error code
+ */
+int
+map_socket_error(isc_socket_t *sock, int windows_errno, int *isc_errno,
+                char *errorstring, size_t bufsize) {
+
+       int doreturn;
+       switch (windows_errno) {
+       case WSAECONNREFUSED:
+               *isc_errno = ISC_R_CONNREFUSED;
+               if (sock->connected)
+                       doreturn = DOIO_HARD;
+               else
+                       doreturn = DOIO_SOFT;
+               break;
+       case WSAENETUNREACH:
+       case ERROR_NETWORK_UNREACHABLE:
+               *isc_errno = ISC_R_NETUNREACH;
+               if (sock->connected)
+                       doreturn = DOIO_HARD;
+               else
+                       doreturn = DOIO_SOFT;
+               break;
+       case ERROR_PORT_UNREACHABLE:
+       case ERROR_HOST_UNREACHABLE:
+       case WSAEHOSTUNREACH:
+               *isc_errno = ISC_R_HOSTUNREACH;
+               if (sock->connected)
+                       doreturn = DOIO_HARD;
+               else
+                       doreturn = DOIO_SOFT;
+               break;
+       case WSAENETDOWN:
+               *isc_errno = ISC_R_NETDOWN;
+               if (sock->connected)
+                       doreturn = DOIO_HARD;
+               else
+                       doreturn = DOIO_SOFT;
+               break;
+       case WSAEHOSTDOWN:
+               *isc_errno = ISC_R_HOSTDOWN;
+               if (sock->connected)
+                       doreturn = DOIO_HARD;
+               else
+                       doreturn = DOIO_SOFT;
+               break;
+       case WSAEACCES:
+               *isc_errno = ISC_R_NOPERM;
+               if (sock->connected)
+                       doreturn = DOIO_HARD;
+               else
+                       doreturn = DOIO_SOFT;
+               break;
+       case WSAECONNRESET:
+       case WSAENETRESET:
+       case WSAECONNABORTED:
+       case WSAEDISCON:
+               *isc_errno = ISC_R_CONNECTIONRESET;
+               if (sock->connected)
+                       doreturn = DOIO_HARD;
+               else
+                       doreturn = DOIO_SOFT;
+               break;
+       case WSAENOTCONN:
+               *isc_errno = ISC_R_NOTCONNECTED;
+               if (sock->connected)
+                       doreturn = DOIO_HARD;
+               else
+                       doreturn = DOIO_SOFT;
+               break;
+       case ERROR_OPERATION_ABORTED:
+       case ERROR_CONNECTION_ABORTED:
+       case ERROR_REQUEST_ABORTED:
+               *isc_errno = ISC_R_CONNECTIONRESET;
+               doreturn = DOIO_HARD;
+               break;
+       case WSAENOBUFS:
+               *isc_errno = ISC_R_NORESOURCES;
+               doreturn = DOIO_HARD;
+               break;
+       case WSAEAFNOSUPPORT:
+               *isc_errno = ISC_R_FAMILYNOSUPPORT;
+               doreturn = DOIO_HARD;
+               break;
+       case WSAEADDRNOTAVAIL:
+               *isc_errno = ISC_R_ADDRNOTAVAIL;
+               doreturn = DOIO_HARD;
+               break;
+       case WSAEDESTADDRREQ:
+               *isc_errno = ISC_R_BADADDRESSFORM;
+               doreturn = DOIO_HARD;
+               break;
+       case ERROR_NETNAME_DELETED:
+               *isc_errno = ISC_R_NETDOWN;
+               doreturn = DOIO_HARD;
+               break;
+       default:
+               *isc_errno = ISC_R_IOERROR;
+               doreturn = DOIO_HARD;
+               break;
        }
-
-#define ALWAYS_HARD(_system, _isc) \
-       if (recv_errno == _system) { \
-               dev->result = _isc; \
-               return (DOIO_HARD); \
+       if (doreturn == DOIO_HARD) {
+               isc__strerror(windows_errno, errorstring, bufsize);
        }
+       return (doreturn);
+}
 
-       if (recv_errno != 0) {
-
-               if (SOFT_ERROR(recv_errno))
-                       return (DOIO_SOFT);
+static void
+fill_recv(isc_socket_t *sock, isc_socketevent_t *dev) {
+       isc_region_t r;
+       int copylen;
+       isc_buffer_t *buffer;
 
-               SOFT_OR_HARD(WSAECONNREFUSED, ISC_R_CONNREFUSED);
-               SOFT_OR_HARD(WSAENETUNREACH, ISC_R_NETUNREACH);
-               SOFT_OR_HARD(WSAEHOSTUNREACH, ISC_R_HOSTUNREACH);
-               SOFT_OR_HARD(WSAECONNRESET, ISC_R_CONNECTIONRESET);
-               SOFT_OR_HARD(WSAENETRESET, ISC_R_CONNECTIONRESET);
-               SOFT_OR_HARD(WSAECONNABORTED, ISC_R_CONNECTIONRESET);
-               SOFT_OR_HARD(WSAEDISCON, ISC_R_CONNECTIONRESET);
-               SOFT_OR_HARD(WSAENETDOWN, ISC_R_NETDOWN);
-               ALWAYS_HARD(ERROR_OPERATION_ABORTED, ISC_R_CONNECTIONRESET);
-               ALWAYS_HARD(ERROR_REQUEST_ABORTED, ISC_R_CONNECTIONRESET);
-               ALWAYS_HARD(ERROR_NETNAME_DELETED, ISC_R_CONNECTIONRESET);
-               ALWAYS_HARD(ERROR_PORT_UNREACHABLE, ISC_R_HOSTUNREACH);
-               ALWAYS_HARD(ERROR_HOST_UNREACHABLE, ISC_R_HOSTUNREACH);
-               ALWAYS_HARD(ERROR_NETWORK_UNREACHABLE, ISC_R_NETUNREACH);
-               ALWAYS_HARD(ERROR_NETNAME_DELETED, ISC_R_NETUNREACH);
-//             ALWAYS_HARD(WSA_OPERATION_ABORTED, ISC_R_CONNECTIONRESET);
-               ALWAYS_HARD(WSAENOBUFS, ISC_R_NORESOURCES);
-#undef SOFT_OR_HARD
-#undef ALWAYS_HARD
-
-               if (recv_errno == WSA_OPERATION_ABORTED) {
-                       return (DOIO_EOF);
-               }
-               dev->result = isc__errno2result(recv_errno);
-               return (DOIO_HARD);
-       }
-
-       /*
-        * On TCP, zero length reads indicate EOF, while on
-        * UDP, zero length reads are perfectly valid, although
-        * strange.
-        */
-       if ((sock->type == isc_sockettype_tcp) && (cc == 0))
-               return (DOIO_EOF);
+       INSIST(dev->n < dev->minimum);
+       INSIST(sock->recvbuf.remaining > 0);
+       INSIST(sock->pending_recv == 0);
 
        if (sock->type == isc_sockettype_udp) {
-               dev->address.length = messagehdr->msg_namelen;
+               dev->address.length = sock->recvbuf.from_addr_len;
+               memcpy(&dev->address.type, &sock->recvbuf.from_addr,
+                   sock->recvbuf.from_addr_len);
                if (isc_sockaddr_getport(&dev->address) == 0) {
                        if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) {
                                socket_log(__LINE__, sock, &dev->address, IOEVENT,
@@ -1523,122 +1220,84 @@ completeio_recv(isc_socket_t *sock, isc_socketevent_t *dev,
                                           ISC_MSG_ZEROPORT,
                                           "dropping source port zero packet");
                        }
-                       return (DOIO_SOFT);
+                       sock->recvbuf.remaining = 0;
+                       return;
                }
+       } else if (sock->type == isc_sockettype_tcp) {
+               dev->address = sock->address;
        }
 
-       socket_log(__LINE__, sock, &dev->address, IOEVENT,
-                  isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_PKTRECV,
-                  "packet received correctly");
-
-       /*
-        * Overflow bit detection.  If we received MORE bytes than we should,
-        * this indicates an overflow situation.  Set the flag in the
-        * dev entry and adjust how much we read by one.
-        */
-#ifdef ISC_NET_RECVOVERFLOW
-       if ((sock->type == isc_sockettype_udp) && ((size_t)cc > read_count)) {
-               dev->attributes |= ISC_SOCKEVENTATTR_TRUNC;
-               cc--;
-       }
-#endif
-
        /*
-        * update the buffers (if any) and the i/o count
+        * Run through the list of buffers we were given, and find the
+        * first one with space.  Once it is found, loop through, filling
+        * the buffers as much as possible.
         */
-       dev->n += cc;
-       actual_count = cc;
        buffer = ISC_LIST_HEAD(dev->bufferlist);
-       while (buffer != NULL && actual_count > 0) {
-               REQUIRE(ISC_BUFFER_VALID(buffer));
-               if (isc_buffer_availablelength(buffer) <= actual_count) {
-                       actual_count -= isc_buffer_availablelength(buffer);
-                       isc_buffer_add(buffer,
-                                      isc_buffer_availablelength(buffer));
-               } else {
-                       isc_buffer_add(buffer, actual_count);
-                       actual_count = 0;
-                       break;
-               }
-               buffer = ISC_LIST_NEXT(buffer, link);
-               if (buffer == NULL) {
-                       INSIST(actual_count == 0);
+       if (buffer != NULL) { // Multi-buffer receive
+               while (buffer != NULL && sock->recvbuf.remaining > 0) {
+                       REQUIRE(ISC_BUFFER_VALID(buffer));
+                       if (isc_buffer_availablelength(buffer) > 0) {
+                               isc_buffer_availableregion(buffer, &r);
+                               copylen = min(r.length, sock->recvbuf.remaining);
+                               memcpy(r.base, sock->recvbuf.consume_position, copylen);
+                               sock->recvbuf.consume_position += copylen;
+                               sock->recvbuf.remaining -= copylen;
+                               isc_buffer_add(buffer, copylen);
+                               dev->n += copylen;
+                       }
+                       buffer = ISC_LIST_NEXT(buffer, link);
                }
+       } else { // Single-buffer receive
+               copylen = min(dev->region.length - dev->n, sock->recvbuf.remaining);
+               memcpy(dev->region.base + dev->n, sock->recvbuf.consume_position, copylen);
+               sock->recvbuf.consume_position += copylen;
+               sock->recvbuf.remaining -= copylen;
+               dev->n += copylen;
        }
 
        /*
-        * If we read less than we expected, update counters,
-        * and let the upper layer handle it.
-        */
-       if ((cc != messagehdr->msg_totallen) && (dev->n < dev->minimum))
-               return (DOIO_SOFT);
-
-       /*
-        * Full reads are posted, or partials if partials are ok.
+        * UDP receives are all-consuming.  That is, if we have 4k worth of
+        * data in our receive buffer, and the caller only gave us
+        * 1k of space, we will toss the remaining 3k of data.  TCP
+        * will keep the extra data around and use it for later requests.
         */
-       dev->result = ISC_R_SUCCESS;
-       return (DOIO_SUCCESS);
+       if (sock->type == isc_sockettype_udp)
+               sock->recvbuf.remaining = 0;
 }
 
-static int
-startio_recv(isc_socket_t *sock, isc_socketevent_t *dev, int *nbytes,
-            int *recv_errno)
+/*
+ * Copy out as much data from the internal buffer to done events.
+ * As each done event is filled, send it along its way.
+ */
+static void
+completeio_recv(isc_socket_t *sock)
 {
-       char *cmsg = NULL;
-       char strbuf[ISC_STRERRORSIZE];
-       IoCompletionInfo *lpo;
-       int status;
-       struct msghdr *msghdr;
-
-       lpo = (IoCompletionInfo *) HeapAlloc(hHeapHandle,
-                                            HEAP_ZERO_MEMORY,
-                                            sizeof(IoCompletionInfo));
-       lpo->request_type = SOCKET_RECV;
-       lpo->dev = dev;
-       msghdr = &lpo->messagehdr;
-       memset(msghdr, 0, sizeof(struct msghdr));
-
-       build_msghdr_recv(sock, dev, msghdr, cmsg, sock->iov);
+       isc_socketevent_t *dev;
 
-#if defined(ISC_SOCKET_DEBUG)
-       dump_msg(msghdr, sock);
-#endif
+       /*
+        * If we are in the process of filling our buffer, we cannot
+        * touch it yet, so don't.
+        */
+       if (sock->pending_recv > 0)
+               return;
 
-       *nbytes = internal_recvmsg(sock, lpo, msghdr, 0, recv_errno);
+       while (sock->recvbuf.remaining > 0 && !ISC_LIST_EMPTY(sock->recv_list)) {
+               dev = ISC_LIST_HEAD(sock->recv_list);
 
-       if (*nbytes < 0) {
                /*
-                * I/O has been initiated
-                * return will be via the completion port
+                * See if we have sufficient data in our receive buffer
+                * to handle this.  If we do, copy out the data.
                 */
-               if (PENDING_ERROR(*recv_errno)) {
-                       status = DOIO_PENDING;
-                       goto done;
-               }
-               if (SOFT_ERROR(*recv_errno)) {
-                       status = DOIO_SOFT;
-                       goto done;
-               }
+               fill_recv(sock, dev);
 
                /*
-                * If we got this far something is wrong
+                * Did we satisfy it?
                 */
-               if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) {
-                       isc__strerror(*recv_errno, strbuf, sizeof(strbuf));
-                       socket_log(__LINE__, sock, NULL, IOEVENT,
-                                  isc_msgcat, ISC_MSGSET_SOCKET,
-                                  ISC_MSG_DOIORECV,
-                                 "startio_recv: recvmsg(%d) %d bytes, "
-                                 "err %d/%s",
-                                  sock->fd, *nbytes, *recv_errno, strbuf);
+               if (dev->n >= dev->minimum) {
+                       dev->result = ISC_R_SUCCESS;
+                       send_recvdone_event(sock, &dev);
                }
-               status = DOIO_HARD;
-               goto done;
        }
-       dev->result = ISC_R_SUCCESS;
-       status = DOIO_SOFT;
-done:
-       return (status);
 }
 
 /*
@@ -1661,52 +1320,12 @@ completeio_send(isc_socket_t *sock, isc_socketevent_t *dev,
        char addrbuf[ISC_SOCKADDR_FORMATSIZE];
        char strbuf[ISC_STRERRORSIZE];
 
-       if(send_errno != 0) {
-
-
+       if (send_errno != 0) {
                if (SOFT_ERROR(send_errno))
                        return (DOIO_SOFT);
 
-#define SOFT_OR_HARD(_system, _isc) \
-       if (send_errno == _system) { \
-               if (sock->connected) { \
-                       dev->result = _isc; \
-                       return (DOIO_HARD); \
-               } \
-               return (DOIO_SOFT); \
-       }
-#define ALWAYS_HARD(_system, _isc) \
-       if (send_errno == _system) { \
-               dev->result = _isc; \
-               return (DOIO_HARD); \
-       }
-
-               SOFT_OR_HARD(WSAEACCES, ISC_R_NOPERM);
-               SOFT_OR_HARD(WSAEAFNOSUPPORT, ISC_R_ADDRNOTAVAIL);
-               SOFT_OR_HARD(WSAECONNREFUSED, ISC_R_CONNREFUSED);
-               SOFT_OR_HARD(WSAENOTCONN, ISC_R_CONNREFUSED);
-               SOFT_OR_HARD(WSAECONNRESET, ISC_R_CONNECTIONRESET);
-               SOFT_OR_HARD(WSAECONNABORTED, ISC_R_CONNECTIONRESET);
-               SOFT_OR_HARD(WSAENETRESET, ISC_R_CONNECTIONRESET);
-               SOFT_OR_HARD(WSAEDISCON, ISC_R_CONNECTIONRESET);
-               SOFT_OR_HARD(WSAENETDOWN, ISC_R_NETDOWN);
-               ALWAYS_HARD(ERROR_OPERATION_ABORTED, ISC_R_CONNECTIONRESET);
-               ALWAYS_HARD(ERROR_NETNAME_DELETED, ISC_R_CONNECTIONRESET);
-               ALWAYS_HARD(ERROR_PORT_UNREACHABLE, ISC_R_HOSTUNREACH);
-               ALWAYS_HARD(ERROR_HOST_UNREACHABLE, ISC_R_HOSTUNREACH);
-               ALWAYS_HARD(ERROR_NETWORK_UNREACHABLE, ISC_R_NETUNREACH);
-               ALWAYS_HARD(ERROR_REQUEST_ABORTED, ISC_R_CONNECTIONRESET);
-               ALWAYS_HARD(WSA_OPERATION_ABORTED, ISC_R_CONNECTIONRESET);
-               ALWAYS_HARD(WSAEADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL);
-               ALWAYS_HARD(WSAEHOSTUNREACH, ISC_R_HOSTUNREACH);
-               ALWAYS_HARD(WSAEHOSTDOWN, ISC_R_HOSTUNREACH);
-               ALWAYS_HARD(WSAENETUNREACH, ISC_R_NETUNREACH);
-               ALWAYS_HARD(WSAENOBUFS, ISC_R_NORESOURCES);
-               ALWAYS_HARD(EPERM, ISC_R_HOSTUNREACH);
-               ALWAYS_HARD(EPIPE, ISC_R_NOTCONNECTED);
-
-#undef SOFT_OR_HARD
-#undef ALWAYS_HARD
+               return (map_socket_error(sock, send_errno, &dev->result,
+                       strbuf, sizeof(strbuf)));
 
                /*
                 * The other error types depend on whether or not the
@@ -1750,19 +1369,20 @@ startio_send(isc_socket_t *sock, isc_socketevent_t *dev, int *nbytes,
        int status;
        struct msghdr *msghdr;
 
-       lpo = (IoCompletionInfo *) HeapAlloc(hHeapHandle,
-                                            HEAP_ZERO_MEMORY,
-                                            sizeof(IoCompletionInfo));
+       lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
+                                           HEAP_ZERO_MEMORY,
+                                           sizeof(IoCompletionInfo));
+       RUNTIME_CHECK(lpo != NULL);
        lpo->request_type = SOCKET_SEND;
        lpo->dev = dev;
        msghdr = &lpo->messagehdr;
        memset(msghdr, 0, sizeof(struct msghdr));
+       ISC_LIST_INIT(lpo->bufferlist);
 
-       build_msghdr_send(sock, dev, msghdr, cmsg, sock->iov);
+       build_msghdr_send(sock, dev, msghdr, cmsg, sock->iov, lpo);
 
        *nbytes = internal_sendmsg(sock, lpo, msghdr, 0, send_errno);
 
-
        if (*nbytes < 0) {
                /*
                 * I/O has been initiated
@@ -1795,40 +1415,10 @@ startio_send(isc_socket_t *sock, isc_socketevent_t *dev, int *nbytes,
        dev->result = ISC_R_SUCCESS;
        status = DOIO_SOFT;
  done:
+       _set_state(sock, SOCK_DATA);
        return (status);
 }
 
-/*
- * Kill.
- *
- * Caller must ensure that the socket is not locked and no external
- * references exist. Note that the socket structure does not get
- * freed here
- */
-static void
-destroy_socket(isc_socket_t **sockp) {
-       isc_socket_t *sock = *sockp;
-
-       REQUIRE(sock != NULL);
-
-       socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET,
-                  ISC_MSG_DESTROYING, "closing socket %d, %p", sock->fd, sock);
-
-       LOCK(&sock->lock);
-
-       INSIST(ISC_LIST_EMPTY(sock->accept_list));
-       INSIST(ISC_LIST_EMPTY(sock->recv_list));
-       INSIST(ISC_LIST_EMPTY(sock->send_list));
-       INSIST(sock->connect_ev == NULL);
-
-       socket_close(sock);
-       if (sock->pending_recv != 0 || sock->pending_send != 0 ||
-           sock->pending_close != 0 || sock->iocp == 1) {
-               sock->pending_free = 1;
-       } 
-       UNLOCK(&sock->lock);
-}
-
 static isc_result_t
 allocate_socket(isc_socketmgr_t *manager, isc_sockettype_t type,
                isc_socket_t **socketp) {
@@ -1840,8 +1430,6 @@ allocate_socket(isc_socketmgr_t *manager, isc_sockettype_t type,
        if (sock == NULL)
                return (ISC_R_NOMEMORY);
 
-       result = ISC_R_UNEXPECTED;
-
        sock->magic = 0;
        sock->references = 0;
 
@@ -1859,19 +1447,23 @@ allocate_socket(isc_socketmgr_t *manager, isc_sockettype_t type,
        ISC_LIST_INIT(sock->accept_list);
        sock->connect_ev = NULL;
        sock->pending_accept = 0;
-       sock->pending_close = 0;
        sock->pending_recv = 0;
        sock->pending_send = 0;
-       sock->pending_free = 0;
-       sock->iocp = 0;
+       sock->pending_iocp = 0;
        sock->listener = 0;
        sock->connected = 0;
-       sock->connecting = 0;
+       sock->pending_connect = 0;
        sock->bound = 0;
-       sock->hEvent = NULL;
-       sock->hAlert = NULL;
-       sock->evthread_id = 0;
-       sock->wait_type = 0;
+       _set_state(sock, SOCK_INITIALIZED);
+
+       sock->recvbuf.len = 65536;
+       sock->recvbuf.consume_position = sock->recvbuf.base;
+       sock->recvbuf.remaining = 0;
+       sock->recvbuf.base = isc_mem_get(manager->mctx, sock->recvbuf.len); // max buffer size
+       if (sock->recvbuf.base == NULL) {
+               sock->magic = 0;
+               goto error;
+       }
 
        /*
         * initialize the lock
@@ -1879,18 +1471,13 @@ allocate_socket(isc_socketmgr_t *manager, isc_sockettype_t type,
        result = isc_mutex_init(&sock->lock);
        if (result != ISC_R_SUCCESS) {
                sock->magic = 0;
+               isc_mem_put(manager->mctx, sock->recvbuf.base, sock->recvbuf.len);
+               sock->recvbuf.base = NULL;
                goto error;
        }
 
-       /*
-        * Initialize readable and writable events
-        */
-       ISC_EVENT_INIT(&sock->readable_ev, sizeof(intev_t),
-                      ISC_EVENTATTR_NOPURGE, NULL, ISC_SOCKEVENT_INTR,
-                      NULL, sock, sock, NULL, NULL);
-       ISC_EVENT_INIT(&sock->writable_ev, sizeof(intev_t),
-                      ISC_EVENTATTR_NOPURGE, NULL, ISC_SOCKEVENT_INTW,
-                      NULL, sock, sock, NULL, NULL);
+       socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
+                  "allocated");
 
        sock->magic = SOCKET_MAGIC;
        *socketp = sock;
@@ -1904,36 +1491,122 @@ allocate_socket(isc_socketmgr_t *manager, isc_sockettype_t type,
 }
 
 /*
- * This event requires that the various lists be empty, that the reference
- * count be 1, and that the magic number is valid.  The other socket bits,
- * like the lock, must be initialized as well.  The fd associated must be
- * marked as closed, by setting it to INVALID_SOCKET on close, or this
- * routine will also close the socket.
+ * Verify that the socket state is CONSISTENT.
+ */
+static void
+consistent(isc_socket_t *sock) {
+
+       isc_socketevent_t *dev;
+       isc_socket_newconnev_t *nev;
+       unsigned int count;
+       char *crash_reason;
+       isc_boolean_t crash = ISC_FALSE;
+
+       REQUIRE(sock->pending_iocp == sock->pending_recv + sock->pending_send
+               + sock->pending_accept + sock->pending_connect);
+
+       dev = ISC_LIST_HEAD(sock->send_list);
+       count = 0;
+       while (dev != NULL) {
+               count++;
+               dev = ISC_LIST_NEXT(dev, ev_link);
+       }
+       if (count > sock->pending_send) {
+               crash = ISC_TRUE;
+               crash_reason = "send_list > sock->pending_send";
+       }
+
+       nev = ISC_LIST_HEAD(sock->accept_list);
+       count = 0;
+       while (nev != NULL) {
+               count++;
+               nev = ISC_LIST_NEXT(nev, ev_link);
+       }
+       if (count > sock->pending_accept) {
+               crash = ISC_TRUE;
+               crash_reason = "send_list > sock->pending_send";
+       }
+
+       if (crash) {
+               socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET,
+                          ISC_MSG_DESTROYING, "SOCKET INCONSISTENT: %s",
+                          crash_reason);
+               sock_dump(sock);
+               INSIST(crash == ISC_FALSE);
+       }
+}
+
+/*
+ * Maybe free the socket.
+ *
+ * This function will veriy tht the socket is no longer in use in any way,
+ * either internally or externally.  This is the only place where this
+ * check is to be made; if some bit of code believes that IT is done with
+ * the socket (e.g., some reference counter reaches zero), it should call
+ * this function.
+ *
+ * When calling this function, the socket must be locked, and the manager
+ * must be unlocked.
+ *
+ * When this function returns, *socketp will be NULL.  No tricks to try
+ * to hold on to this pointer are allowed.
  */
 static void
-free_socket(isc_socket_t **socketp) {
+maybe_free_socket(isc_socket_t **socketp, int lineno) {
        isc_socket_t *sock = *socketp;
+       *socketp = NULL;
 
-       INSIST(sock->references == 0);
        INSIST(VALID_SOCKET(sock));
-       INSIST(!sock->connecting);
-       INSIST(!sock->pending_accept);
-       INSIST(ISC_LIST_EMPTY(sock->recv_list));
-       INSIST(ISC_LIST_EMPTY(sock->send_list));
-       INSIST(ISC_LIST_EMPTY(sock->accept_list));
-       INSIST(!ISC_LINK_LINKED(sock, link));
-       INSIST(sock->iocp == 0);
+       CONSISTENT(sock);
+
+       if (sock->pending_iocp > 0
+           || sock->pending_recv > 0
+           || sock->pending_send > 0
+           || sock->pending_accept > 0
+           || sock->references > 0
+           || sock->pending_connect == 1
+           || !ISC_LIST_EMPTY(sock->recv_list)
+           || !ISC_LIST_EMPTY(sock->send_list)
+           || !ISC_LIST_EMPTY(sock->accept_list)
+           || sock->fd != INVALID_SOCKET) {
+               UNLOCK(&sock->lock);
+               return;
+       }
+       UNLOCK(&sock->lock);
 
-       sock->magic = 0;
+       free_socket(&sock, lineno);
+}
 
-       DESTROYLOCK(&sock->lock);
+void
+free_socket(isc_socket_t **sockp, int lineno) {
+       isc_socketmgr_t *manager;
+       isc_socket_t *sock = *sockp;
+       *sockp = NULL;
 
+       manager = sock->manager;
+
+       /*
+        * Seems we can free the socket after all.
+        */
+       manager = sock->manager;
        socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET,
-                  ISC_MSG_DESTROYING, "freeing socket (fd %d) %p", sock->fd, sock);
+                  ISC_MSG_DESTROYING, "freeing socket line %d fd %d lock %p semaphore %p",
+                  lineno, sock->fd, &sock->lock, sock->lock.LockSemaphore);
 
-       isc_mem_put(sock->manager->mctx, sock, sizeof(*sock));
+       sock->magic = 0;
+       DESTROYLOCK(&sock->lock);
 
-       *socketp = NULL;
+       if (sock->recvbuf.base != NULL)
+               isc_mem_put(manager->mctx, sock->recvbuf.base, sock->recvbuf.len);
+
+       LOCK(&manager->lock);
+       if (ISC_LINK_LINKED(sock, link))
+               ISC_LIST_UNLINK(manager->socklist, sock, link);
+       isc_mem_put(manager->mctx, sock, sizeof(*sock));
+
+       if (ISC_LIST_EMPTY(manager->socklist))
+               SIGNAL(&manager->shutdown_ok);
+       UNLOCK(&manager->lock);
 }
 
 /*
@@ -1971,9 +1644,14 @@ isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type,
                if (sock->fd != INVALID_SOCKET) {
                        result = connection_reset_fix(sock->fd);
                        if (result != ISC_R_SUCCESS) {
+                               socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
+                                       "closed %d %d %d con_reset_fix_failed",
+                                       sock->pending_recv, sock->pending_send,
+                                       sock->references);
                                closesocket(sock->fd);
+                               _set_state(sock, SOCK_CLOSED);
                                sock->fd = INVALID_SOCKET;
-                               free_socket(&sock);
+                               free_socket(&sock, __LINE__);
                                return (result);
                        }
                }
@@ -1985,7 +1663,7 @@ isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type,
 
        if (sock->fd == INVALID_SOCKET) {
                socket_errno = WSAGetLastError();
-               free_socket(&sock);
+               free_socket(&sock, __LINE__);
 
                switch (socket_errno) {
                case WSAEMFILE:
@@ -2012,9 +1690,13 @@ isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type,
 
        result = make_nonblock(sock->fd);
        if (result != ISC_R_SUCCESS) {
+               socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
+                       "closed %d %d %d make_nonblock_failed",
+                       sock->pending_recv, sock->pending_send,
+                       sock->references);
                closesocket(sock->fd);
                sock->fd = INVALID_SOCKET;
-               free_socket(&sock);
+               free_socket(&sock, __LINE__);
                return (result);
        }
 
@@ -2080,18 +1762,17 @@ isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type,
        }
 #endif /* defined(USE_CMSG) || defined(SO_RCVBUF) */
 
+       _set_state(sock, SOCK_OPEN);
        sock->references = 1;
        *socketp = sock;
 
        iocompletionport_update(sock);
 
-       LOCK(&manager->lock);
-
        /*
         * Note we don't have to lock the socket like we normally would because
         * there are no external references to it yet.
         */
-
+       LOCK(&manager->lock);
        ISC_LIST_APPEND(manager->socklist, sock, link);
        InterlockedIncrement(&manager->totalSockets);
        UNLOCK(&manager->lock);
@@ -2111,6 +1792,7 @@ isc_socket_attach(isc_socket_t *sock, isc_socket_t **socketp) {
        REQUIRE(socketp != NULL && *socketp == NULL);
 
        LOCK(&sock->lock);
+       CONSISTENT(sock);
        sock->references++;
        UNLOCK(&sock->lock);
 
@@ -2131,37 +1813,23 @@ isc_socket_detach(isc_socket_t **socketp) {
        REQUIRE(VALID_SOCKET(sock));
 
        LOCK(&sock->lock);
+       CONSISTENT(sock);
        REQUIRE(sock->references > 0);
        sock->references--;
 
-#if XXXMLG_DEBUG
-       printf("Detaching socket %p %d (%d %d %d %d %d)\n",
-               sock, sock->fd, sock->pending_recv, sock->pending_send, sock->pending_close,
-               sock->pending_free, sock->references);
-#endif
-
-       if (sock->references == 0 && sock->pending_recv == 0 && sock->pending_send == 0)
-               kill_socket = ISC_TRUE;
-
-       UNLOCK(&sock->lock);
+       socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
+               "detach_socket %d %d %d",
+               sock->pending_recv, sock->pending_send,
+               sock->references);
 
-       if (kill_socket) {
-               isc_socket_t *s = sock;
-               destroy_socket(&sock);
-               sock = s;
-               if (sock->pending_free) {
-                       isc_socketmgr_t *manager = sock->manager;
-                       LOCK(&manager->lock);
-                       ISC_LIST_UNLINK(manager->socklist, sock, link);
-                       InterlockedDecrement(&manager->iocp_total);
-                       sock->iocp = 0;
-                       free_socket(&sock);
-                       if (ISC_LIST_EMPTY(manager->socklist))
-                               SIGNAL(&manager->shutdown_ok);
-                       UNLOCK(&manager->lock);
-               }
+       if (sock->references == 0 && sock->fd != INVALID_SOCKET) {
+               closesocket(sock->fd);
+               sock->fd = INVALID_SOCKET;
+               _set_state(sock, SOCK_CLOSED);
        }
 
+       maybe_free_socket(&sock, __LINE__);
+
        *socketp = NULL;
 }
 
@@ -2171,7 +1839,7 @@ isc_socket_detach(isc_socket_t **socketp) {
  * destined for.
  *
  * If the event to be sent is on a list, remove it before sending.  If
- * asked to, send and detach from the socket as well.
+ * asked to, send and detach from the task as well.
  *
  * Caller must have the socket locked if the event is attached to the socket.
  */
@@ -2180,24 +1848,22 @@ send_recvdone_event(isc_socket_t *sock, isc_socketevent_t **dev) {
        isc_task_t *task;
 
        task = (*dev)->ev_sender;
-
        (*dev)->ev_sender = sock;
 
-       if (ISC_LINK_LINKED(*dev, ev_link)) {
+       if (ISC_LINK_LINKED(*dev, ev_link))
                ISC_LIST_DEQUEUE(sock->recv_list, *dev, ev_link);
-       }
 
        if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED)
            == ISC_SOCKEVENTATTR_ATTACHED)
                isc_task_sendanddetach(&task, (isc_event_t **)dev);
        else
                isc_task_send(task, (isc_event_t **)dev);
+
+       CONSISTENT(sock);
 }
 
 /*
  * See comments for send_recvdone_event() above.
- *
- * Caller must have the socket locked if the event is attached to the socket.
  */
 static void
 send_senddone_event(isc_socket_t *sock, isc_socketevent_t **dev) {
@@ -2208,289 +1874,154 @@ send_senddone_event(isc_socket_t *sock, isc_socketevent_t **dev) {
        task = (*dev)->ev_sender;
        (*dev)->ev_sender = sock;
 
-       if (ISC_LINK_LINKED(*dev, ev_link)) {
-               ISC_LIST_DEQUEUE(sock->send_list, *dev, ev_link);
-       }
+       if (ISC_LINK_LINKED(*dev, ev_link))
+               ISC_LIST_DEQUEUE(sock->send_list, *dev, ev_link);
+
+       if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED)
+           == ISC_SOCKEVENTATTR_ATTACHED)
+               isc_task_sendanddetach(&task, (isc_event_t **)dev);
+       else
+               isc_task_send(task, (isc_event_t **)dev);
+
+       CONSISTENT(sock);
+}
+
+/*
+ * See comments for send_recvdone_event() above.
+ */
+static void
+send_acceptdone_event(isc_socket_t *sock, isc_socket_newconnev_t **adev) {
+       isc_task_t *task;
+
+       INSIST(adev != NULL && *adev != NULL);
+
+       task = (*adev)->ev_sender;
+       (*adev)->ev_sender = sock;
+
+       if (ISC_LINK_LINKED(*adev, ev_link))
+               ISC_LIST_DEQUEUE(sock->accept_list, *adev, ev_link);
+
+       isc_task_sendanddetach(&task, (isc_event_t **)adev);
+
+       CONSISTENT(sock);
+}
+
+/*
+ * See comments for send_recvdone_event() above.
+ */
+static void
+send_connectdone_event(isc_socket_t *sock, isc_socket_connev_t **cdev) {
+       isc_task_t *task;
+
+       INSIST(cdev != NULL && *cdev != NULL);
+
+       task = (*cdev)->ev_sender;
+       (*cdev)->ev_sender = sock;
+
+       sock->connect_ev = NULL;
+
+       isc_task_sendanddetach(&task, (isc_event_t **)cdev);
 
-       if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED)
-           == ISC_SOCKEVENTATTR_ATTACHED)
-               isc_task_sendanddetach(&task, (isc_event_t **)dev);
-       else
-               isc_task_send(task, (isc_event_t **)dev);
+       CONSISTENT(sock);
 }
 
 /*
- * Call accept() on a socket, to get the new file descriptor.  The listen
- * socket is used as a prototype to create a new isc_socket_t.  The new
- * socket has one outstanding reference.  The task receiving the event
- * will be detached from just after the event is delivered.
- *
  * On entry to this function, the event delivered is the internal
  * readable event, and the first item on the accept_list should be
  * the done event we want to send.  If the list is empty, this is a no-op,
- * so just unlock and return.
+ * so just close the new connection, unlock, and return.
  *
  * Note the the socket is locked before entering here
  */
 static void
-internal_accept(isc_socket_t *sock, int accept_errno) {
-       isc_socketmgr_t *manager;
-       isc_socket_newconnev_t *dev;
-       isc_task_t *task;
-       ISC_SOCKADDR_LEN_T addrlen;
-       SOCKET fd;
+internal_accept(isc_socket_t *sock, IoCompletionInfo *lpo, int accept_errno) {
+       isc_socket_newconnev_t *adev;
        isc_result_t result = ISC_R_SUCCESS;
-       char strbuf[ISC_STRERRORSIZE];
+       isc_socket_t *nsock;
+       struct sockaddr_in *localaddr;
+       int localaddr_len = sizeof(*localaddr);
+       struct sockaddr_in *remoteaddr;
+       int remoteaddr_len = sizeof(*remoteaddr);
 
        INSIST(VALID_SOCKET(sock));
+       LOCK(&sock->lock);
+       CONSISTENT(sock);
 
        socket_log(__LINE__, sock, NULL, TRACE,
                   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
-                  "internal_accept called, locked socket");
-
-       manager = sock->manager;
-       INSIST(VALID_MANAGER(manager));
+                  "internal_accept called");
 
        INSIST(sock->listener);
-       INSIST(sock->hEvent != NULL);
-       INSIST(sock->pending_accept == 1);
-       sock->pending_accept = 0;
 
-       /*
-        * Check any possible error status from the event notification here.
-        * Note that we don't take any action since it was only
-        * Windows that was notifying about a network event, not the
-        * application.
-        * PDMXXX: Should we care about any of the possible event errors
-        *         signalled? The only possible valid errors are:
-        *         WSAENETDOWN, WSAECONNRESET, & WSAECONNABORTED
-        */
-       if (accept_errno != 0) {
-               switch (accept_errno) {
-               case WSAENETDOWN:
-               case WSAECONNRESET:
-               case WSAECONNABORTED:
-                       break;          /* Expected errors */
-               default:
-                       isc__strerror(accept_errno, strbuf, sizeof(strbuf));
-                       UNEXPECTED_ERROR(__FILE__, __LINE__,
-                                        "internal_accept: from event wait: %s",
-                                        strbuf);
-                       break;
-               }
-               return;
-       }
+       INSIST(sock->pending_iocp > 0);
+       sock->pending_iocp--;
+       INSIST(sock->pending_accept > 0);
+       sock->pending_accept--;
 
-       /*
-        * Get the first item off the accept list.
-        * If it is empty, unlock the socket and return.
-        */
-       dev = ISC_LIST_HEAD(sock->accept_list);
-       if (dev == NULL) {
-               isc_sockaddr_t from;
-               /*
-                * This should only happen if WSAEventSelect() fails
-                * or when cancelling a specific event, when we can do that
-                * again.
-                */
-               addrlen = sizeof(from.type);
-               fd = accept(sock->fd, &from.type.sa, &addrlen);
-               if (fd == INVALID_SOCKET) {
-                       accept_errno = WSAGetLastError();
-                       if (accept_errno == WSAEMFILE) {
-                               isc_log_iwrite(isc_lctx,
-                                       ISC_LOGCATEGORY_GENERAL,
-                                       ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR,
-                                       isc_msgcat, ISC_MSGSET_SOCKET,
-                                       ISC_MSG_TOOMANYFDS,
-                                       "%s: too many open file descriptors",
-                                       "accept");
-                               goto soft_error;
-                       } else if (SOFT_ERROR(accept_errno) ||
-                                  accept_errno == WSAECONNRESET) {
-                               goto soft_error;
-                       } else {
-                               isc__strerror(accept_errno, strbuf, 
-                                             sizeof(strbuf));
-                               UNEXPECTED_ERROR(__FILE__, __LINE__,
-                                        "internal_accept: accept() %s: %s",
-                                        isc_msgcat_get(isc_msgcat,
-                                                       ISC_MSGSET_GENERAL,
-                                                       ISC_MSG_FAILED,
-                                                       "failed"),
-                                        strbuf);
-                               fd = INVALID_SOCKET;
-                               result = ISC_R_UNEXPECTED;
-                       }
-               } else {
-                       char addrbuf[ISC_SOCKADDR_FORMATSIZE];
-                       isc_sockaddr_format(&from, addrbuf, sizeof(addrbuf));
-                       UNEXPECTED_ERROR(__FILE__, __LINE__,
-                                        "sock->accept_list empty: "
-                                        "dropping TCP request from %s",
-                                        addrbuf);
-                       (void)closesocket(fd);
-                       sock->fd = INVALID_SOCKET;
-               }
-               return;
-       }
+       adev = lpo->adev;
 
        /*
-        * Try to accept the new connection.  If the accept fails with
-        * WSAEINTR, the event wait will be notified again since
-        * the event will be reset on return to caller.
-        */
-       addrlen = sizeof(dev->newsocket->address.type);
-       memset(&dev->newsocket->address.type.sa, 0, addrlen);
-       fd = accept(sock->fd, &dev->newsocket->address.type.sa,
-                   (void *)&addrlen);
-       if (fd == INVALID_SOCKET) {
-               accept_errno = WSAGetLastError();
-               if (accept_errno == WSAEMFILE) {
-                       isc_log_iwrite(isc_lctx, ISC_LOGCATEGORY_GENERAL,
-                                      ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR,
-                                      isc_msgcat, ISC_MSGSET_SOCKET,
-                                      ISC_MSG_TOOMANYFDS,
-                                      "%s: too many open file descriptors",
-                                      "accept");
-                       goto soft_error;
-               } else if (SOFT_ERROR(accept_errno) ||
-                          accept_errno == WSAECONNRESET) {
-                       goto soft_error;
-               } else {
-                       isc__strerror(accept_errno, strbuf, sizeof(strbuf));
-                       UNEXPECTED_ERROR(__FILE__, __LINE__,
-                                        "internal_accept: accept() %s: %s",
-                                        isc_msgcat_get(isc_msgcat,
-                                                       ISC_MSGSET_GENERAL,
-                                                       ISC_MSG_FAILED,
-                                                       "failed"),
-                                        strbuf);
-                       fd = INVALID_SOCKET;
-                       result = ISC_R_UNEXPECTED;
-               }
-       } else {
-               if (addrlen == 0) {
-                       UNEXPECTED_ERROR(__FILE__, __LINE__,
-                                        "internal_accept(): "
-                                        "accept() failed to return "
-                                        "remote address");
-
-                       (void)closesocket(fd);
-                       dev->newsocket->fd = INVALID_SOCKET;
-                       goto soft_error;
-               } else if (dev->newsocket->address.type.sa.sa_family !=
-                          sock->pf)
-               {
-                       UNEXPECTED_ERROR(__FILE__, __LINE__,
-                                        "internal_accept(): "
-                                        "accept() returned peer address "
-                                        "family %u (expected %u)",
-                                        dev->newsocket->address.
-                                        type.sa.sa_family,
-                                        sock->pf);
-                       (void)closesocket(fd);
-                       dev->newsocket->fd = INVALID_SOCKET;
-                       goto soft_error;
-               }
-       }
+        * If the event is no longer in the list we can just return.
+        */
+       if (!acceptdone_is_active(sock, adev))
+               goto done;
 
-       if (fd != INVALID_SOCKET) {
-               dev->newsocket->address.length = addrlen;
-               dev->newsocket->pf = sock->pf;
-       }
+       nsock = adev->newsocket;
 
        /*
         * Pull off the done event.
         */
-       ISC_LIST_UNLINK(sock->accept_list, dev, ev_link);
+       ISC_LIST_UNLINK(sock->accept_list, adev, ev_link);
 
        /*
-        * Stop listening for connects.
+        * Extract the addresses from the socket, copy them into the structure,
+        * and return the new socket.
         */
-       if (ISC_LIST_EMPTY(sock->accept_list) &&
-           WSAEventSelect(sock->fd, sock->hEvent, FD_CLOSE) != 0) {
-               int stat;
-               const char *msg;
-               stat = WSAGetLastError();
-               isc__strerror(stat, strbuf, sizeof(strbuf));
-               msg = isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
-                                    ISC_MSG_FAILED, "failed");
-               UNEXPECTED_ERROR(__FILE__, __LINE__, "WSAEventSelect: %s: %s",
-                                msg, strbuf);
-       }
+       ISCGetAcceptExSockaddrs(lpo->acceptbuffer, 0,
+               sizeof(SOCKADDR) + 16, sizeof(SOCKADDR) + 16,
+               (LPSOCKADDR *)&localaddr, &localaddr_len,
+               (LPSOCKADDR *)&remoteaddr, &remoteaddr_len);
+       memcpy(&adev->address.type, remoteaddr, remoteaddr_len);
+       adev->address.length = remoteaddr_len;
+       nsock->address = adev->address;
+       nsock->pf = adev->address.type.sa.sa_family;
 
+       socket_log(__LINE__, nsock, &nsock->address, TRACE,
+                  isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
+                  "internal_accept parent %p", sock);
 
-       if (fd != INVALID_SOCKET) {
-               isc_result_t tresult;
-               tresult = make_nonblock(fd);
-               if (tresult != ISC_R_SUCCESS) {
-                       closesocket(fd);
-                       sock->fd = INVALID_SOCKET;
-                       fd = INVALID_SOCKET;
-                       result = tresult;
-               }
-       }
+       result = make_nonblock(adev->newsocket->fd);
+       INSIST(result == ISC_R_SUCCESS);
+
+       INSIST(setsockopt(nsock->fd, SOL_SOCKET, SO_UPDATE_ACCEPT_CONTEXT, 
+              (char *)&sock->fd, sizeof(sock->fd)) == 0);
 
        /*
-        * INVALID_SOCKET means the new socket didn't happen.
+        * Hook it up into the manager.
         */
-       if (fd != INVALID_SOCKET) {
-               LOCK(&manager->lock);
-               ISC_LIST_APPEND(manager->socklist, dev->newsocket, link);
-
-               dev->newsocket->fd = fd;
-               dev->newsocket->bound = 1;
-               dev->newsocket->connected = 1;
-               InterlockedIncrement(&manager->totalSockets);
+       nsock->bound = 1;
+       nsock->connected = 1;
+       _set_state(nsock, SOCK_OPEN);
 
-               /*
-                * The accept socket inherits the listen socket's
-                * selected events. Remove this socket from all events
-                * as it is handled by IOCP. (Joe Quanaim, lucent.com)
-                */
-               if (WSAEventSelect(dev->newsocket->fd, 0, 0) != 0) {
-                       /* this is an unlikely but non-fatal error */
-                       int stat;
-                       const char *msg;
-                       stat = WSAGetLastError();
-                       isc__strerror(stat, strbuf, sizeof(strbuf));
-                       msg = isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
-                                            ISC_MSG_FAILED, "failed");
-                       UNEXPECTED_ERROR(__FILE__, __LINE__,
-                                        "WSAEventSelect: %s: %s", msg, strbuf);
-               }
-
-               /*
-                * Save away the remote address
-                */
-               dev->address = dev->newsocket->address;
-
-               socket_log(__LINE__, sock, &dev->newsocket->address, CREATION,
-                          isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTEDCXN,
-                          "accepted connection, new socket %p",
-                          dev->newsocket);
-
-               iocompletionport_update(dev->newsocket);
+       LOCK(&nsock->manager->lock);
+       ISC_LIST_APPEND(nsock->manager->socklist, nsock, link);
+       InterlockedIncrement(&nsock->manager->totalSockets);
+       UNLOCK(&nsock->manager->lock);
 
-               UNLOCK(&manager->lock);
-       } else {
-               dev->newsocket->references--;
-               free_socket(&dev->newsocket);
-       }
+       socket_log(__LINE__, sock, &nsock->address, CREATION,
+                  isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTEDCXN,
+                  "accepted_connection new_socket %p fd %d",
+                  nsock, nsock->fd);
 
-       /*
-        * Fill in the done event details and send it off.
-        */
-       dev->result = result;
-       task = dev->ev_sender;
-       dev->ev_sender = sock;
+       adev->result = result;
+       send_acceptdone_event(sock, &adev);
 
-       isc_task_sendanddetach(&task, (isc_event_t **)&dev);
-       return;
+done:
+       CONSISTENT(sock);
+       UNLOCK(&sock->lock);
 
- soft_error:
-       return;
+       HeapFree(hHeapHandle, 0, lpo->acceptbuffer);
+       lpo->acceptbuffer = NULL;
 }
 
 /*
@@ -2498,25 +2029,35 @@ internal_accept(isc_socket_t *sock, int accept_errno) {
  * Note that the socket is locked before entering.
  */
 static void
-internal_connect(isc_socket_t *sock, int connect_errno) {
-       isc_socket_connev_t *dev;
-       isc_task_t *task;
+internal_connect(isc_socket_t *sock, IoCompletionInfo *lpo, int connect_errno) {
+       isc_socket_connev_t *cdev;
        char strbuf[ISC_STRERRORSIZE];
 
        INSIST(VALID_SOCKET(sock));
 
+       LOCK(&sock->lock);
+
+       INSIST(sock->pending_iocp > 0);
+       sock->pending_iocp--;
+       INSIST(sock->pending_connect == 1);
+       sock->pending_connect = 0;
+
        /*
         * Has this event been canceled?
         */
-       dev = sock->connect_ev;
-       if (dev == NULL) {
-               INSIST(!sock->connecting);
+       cdev = lpo->cdev;
+       if (!connectdone_is_active(sock, cdev)) {
+               sock->pending_connect = 0;
+               if (sock->fd != INVALID_SOCKET) {
+                       closesocket(sock->fd);
+                       sock->fd = INVALID_SOCKET;
+                       _set_state(sock, SOCK_CLOSED);
+               }
+               CONSISTENT(sock);
+               UNLOCK(&sock->lock);
                return;
        }
 
-       INSIST(sock->connecting);
-       sock->connecting = 0;
-
        /*
         * Check possible Windows network event error status here.
         */
@@ -2526,9 +2067,10 @@ internal_connect(isc_socket_t *sock, int connect_errno) {
                 * fd and pretend nothing strange happened.
                 */
                if (SOFT_ERROR(connect_errno) ||
-                   connect_errno == WSAEINPROGRESS)
-               {
-                       sock->connecting = 1;
+                   connect_errno == WSAEINPROGRESS) {
+                       sock->pending_connect = 1;
+                       CONSISTENT(sock);
+                       UNLOCK(&sock->lock);
                        return;
                }
 
@@ -2536,7 +2078,7 @@ internal_connect(isc_socket_t *sock, int connect_errno) {
                 * Translate other errors into ISC_R_* flavors.
                 */
                switch (connect_errno) {
-#define ERROR_MATCH(a, b) case a: dev->result = b; break;
+#define ERROR_MATCH(a, b) case a: cdev->result = b; break;
                        ERROR_MATCH(WSAEACCES, ISC_R_NOPERM);
                        ERROR_MATCH(WSAEADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL);
                        ERROR_MATCH(WSAEAFNOSUPPORT, ISC_R_ADDRNOTAVAIL);
@@ -2551,89 +2093,105 @@ internal_connect(isc_socket_t *sock, int connect_errno) {
                        ERROR_MATCH(WSAETIMEDOUT, ISC_R_TIMEDOUT);
 #undef ERROR_MATCH
                default:
-                       dev->result = ISC_R_UNEXPECTED;
+                       cdev->result = ISC_R_UNEXPECTED;
                        isc__strerror(connect_errno, strbuf, sizeof(strbuf));
                        UNEXPECTED_ERROR(__FILE__, __LINE__,
                                         "internal_connect: connect() %s",
                                         strbuf);
                }
        } else {
-               dev->result = ISC_R_SUCCESS;
+               INSIST(setsockopt(sock->fd, SOL_SOCKET, SO_UPDATE_CONNECT_CONTEXT, NULL, 0) == 0);
+               cdev->result = ISC_R_SUCCESS;
                sock->connected = 1;
                sock->bound = 1;
+               socket_log(__LINE__, sock, &sock->address, IOEVENT,
+                          isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTEDCXN,
+                          "internal_connect: success");
        }
 
-       sock->connect_ev = NULL;
+       send_connectdone_event(sock, &cdev);
+
+       UNLOCK(&sock->lock);
+}
 
-       task = dev->ev_sender;
-       dev->ev_sender = sock;
-       isc_task_sendanddetach(&task, (isc_event_t **)&dev);
+/*
+ * Loop through the socket, returning ISC_R_EOF for each done event pending.
+ */
+static void
+send_recvdone_abort(isc_socket_t *sock, isc_result_t result) {
+       isc_socketevent_t *dev;
+
+       while (!ISC_LIST_EMPTY(sock->recv_list)) {
+               dev = ISC_LIST_HEAD(sock->recv_list);
+               dev->result = result;
+               send_recvdone_event(sock, &dev);
+       }
 }
 
+/*
+ * Take the data we received in our private buffer, and if any recv() calls on
+ * our list are satisfied, send the corresponding done event.
+ *
+ * If we need more data (there are still items on the recv_list after we consume all
+ * our data) then arrange for another system recv() call to fill our buffers.
+ */
 static void
-internal_recv(isc_socket_t *sock, isc_socketevent_t *dev,
-             struct msghdr *messagehdr, int nbytes, int recv_errno)
+internal_recv(isc_socket_t *sock, int nbytes)
 {
-       isc_socketevent_t *ldev;
-       int io_state;
-       int cc;
-
        INSIST(VALID_SOCKET(sock));
 
        LOCK(&sock->lock);
+       CONSISTENT(sock);
+
        socket_log(__LINE__, sock, NULL, IOEVENT,
                   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_INTERNALRECV,
-                  "internal_recv: task got socket event %p", dev);
+                  "internal_recv: %d bytes received", nbytes);
 
+       /*
+        * If we got here, the I/O operation succeeded.  However, we might still have removed this
+        * event from our notification list (or never placed it on it due to immediate completion.)
+        * Handle the reference counting here, and handle the cancellation event just after.
+        */
+       INSIST(sock->pending_iocp > 0);
+       sock->pending_iocp--;
        INSIST(sock->pending_recv > 0);
        sock->pending_recv--;
 
-       /* If the event is no longer in the list we can just return */
-       ldev = ISC_LIST_HEAD(sock->recv_list);
-       while (ldev != NULL && ldev != dev) {
-               ldev = ISC_LIST_NEXT(ldev, ev_link);
+       /*
+        * The only way we could have gotten here is that our I/O has successfully completed.
+        * Update our pointers, and move on.  The only odd case here is that we might not
+        * have received enough data on a TCP stream to satisfy the minimum requirements.  If
+        * this is the case, we will re-issue the recv() call for what we need.
+        *
+        * We do check for a recv() of 0 bytes on a TCP stream.  This means the remote end
+        * has closed.
+        */
+       if (nbytes == 0) {
+               send_recvdone_abort(sock, ISC_R_EOF);
+               maybe_free_socket(&sock, __LINE__);
+               return;
        }
-       if (ldev == NULL)
-               goto done;
+       sock->recvbuf.remaining = nbytes;
+       sock->recvbuf.consume_position = sock->recvbuf.base;
+       completeio_recv(sock);
 
        /*
-        * Try to do as much I/O as possible on this socket.  There are no
-        * limits here, currently.
+        * If there are more receivers waiting for data, queue another receive
+        * here.
         */
-       switch (completeio_recv(sock, dev, messagehdr, nbytes, recv_errno)) {
-       case DOIO_SOFT:
-               cc = 0;
-               recv_errno = 0;
-               io_state = startio_recv(sock, dev, &cc, &recv_errno);
-               goto done;
-
-       case DOIO_EOF:
-               /*
-                * read of 0 means the remote end was closed.
-                * Run through the event queue and dispatch all
-                * the events with an EOF result code.
-                */
-               do {
-                       dev->result = ISC_R_EOF;
-                       send_recvdone_event(sock, &dev);
-                       dev = ISC_LIST_HEAD(sock->recv_list);
-               } while (dev != NULL);
-               goto done;
+       queue_receive_request(sock);
 
-       case DOIO_SUCCESS:
-       case DOIO_HARD:
-               send_recvdone_event(sock, &dev);
-               break;
-       }
- done:
-       UNLOCK(&sock->lock);
+       /*
+        * Unlock and/or destroy if we are the last thing this socket has left to do.
+        */
+       maybe_free_socket(&sock, __LINE__);
 }
 
 static void
 internal_send(isc_socket_t *sock, isc_socketevent_t *dev,
-             struct msghdr *messagehdr, int nbytes, int send_errno)
+             struct msghdr *messagehdr, int nbytes, int send_errno, IoCompletionInfo *lpo)
 {
-       isc_socketevent_t *ldev;
+       buflist_t *buffer;
 
        /*
         * Find out what socket this is and lock it.
@@ -2641,23 +2199,36 @@ internal_send(isc_socket_t *sock, isc_socketevent_t *dev,
        INSIST(VALID_SOCKET(sock));
 
        LOCK(&sock->lock);
+       CONSISTENT(sock);
+
        socket_log(__LINE__, sock, NULL, IOEVENT,
                   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_INTERNALSEND,
                   "internal_send: task got socket event %p", dev);
 
+       buffer = ISC_LIST_HEAD(lpo->bufferlist);
+       while (buffer != NULL) {
+               ISC_LIST_DEQUEUE(lpo->bufferlist, buffer, link);
+
+               socket_log(__LINE__, sock, NULL, TRACE,
+                  isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
+                  "free_buffer %p %p", buffer, buffer->buf);
+
+               HeapFree(hHeapHandle, 0, buffer->buf);
+               HeapFree(hHeapHandle, 0, buffer);
+               buffer = ISC_LIST_HEAD(lpo->bufferlist);
+       }
+
+       INSIST(sock->pending_iocp > 0);
+       sock->pending_iocp--;
        INSIST(sock->pending_send > 0);
        sock->pending_send--;
 
        /* If the event is no longer in the list we can just return */
-       ldev = ISC_LIST_HEAD(sock->send_list);
-       while (ldev != NULL && ldev != dev) {
-               ldev = ISC_LIST_NEXT(ldev, ev_link);
-       }
-       if (ldev == NULL)
+       if (!senddone_is_active(sock, dev))
                goto done;
+
        /*
-        * Try to do as much I/O as possible on this socket.  There are no
-        * limits here, currently.
+        * Set the error code and send things on its way.
         */
        switch (completeio_send(sock, dev, messagehdr, nbytes, send_errno)) {
        case DOIO_SOFT:
@@ -2669,7 +2240,42 @@ internal_send(isc_socket_t *sock, isc_socketevent_t *dev,
        }
 
  done:
-       UNLOCK(&sock->lock);
+       maybe_free_socket(&sock, __LINE__);
+}
+
+/*
+ * These return if the done event passed in is on the list (or for connect, is
+ * the one we're waiting for.  Using these ensures we will not double-send an
+ * event.
+ */
+static isc_boolean_t
+senddone_is_active(isc_socket_t *sock, isc_socketevent_t *dev)
+{
+       isc_socketevent_t *ldev;
+
+       ldev = ISC_LIST_HEAD(sock->send_list);
+       while (ldev != NULL && ldev != dev)
+               ldev = ISC_LIST_NEXT(ldev, ev_link);
+
+       return (ldev == NULL ? ISC_FALSE : ISC_TRUE);
+}
+
+static isc_boolean_t
+acceptdone_is_active(isc_socket_t *sock, isc_socket_newconnev_t *dev)
+{
+       isc_socket_newconnev_t *ldev;
+
+       ldev = ISC_LIST_HEAD(sock->accept_list);
+       while (ldev != NULL && ldev != dev)
+               ldev = ISC_LIST_NEXT(ldev, ev_link);
+
+       return (ldev == NULL ? ISC_FALSE : ISC_TRUE);
+}
+
+static isc_boolean_t
+connectdone_is_active(isc_socket_t *sock, isc_socket_connev_t *dev)
+{
+       return (sock->connect_ev == dev ? ISC_TRUE : ISC_FALSE);
 }
 
 /*
@@ -2685,7 +2291,6 @@ SocketIoThread(LPVOID ThreadContext) {
        IoCompletionInfo *lpo = NULL;
        isc_socket_t *sock = NULL;
        int request;
-       isc_socketevent_t *dev = NULL;
        struct msghdr *messagehdr = NULL;
        int errval;
        char strbuf[ISC_STRERRORSIZE];
@@ -2693,13 +2298,13 @@ SocketIoThread(LPVOID ThreadContext) {
 
        REQUIRE(VALID_MANAGER(manager));
 
-       /*      Set the thread priority high enough so I/O will
-        *      preempt normal recv packet processing, but not
-        *      higher than the timer sync thread.
+       /*
+        * Set the thread priority high enough so I/O will
+        * preempt normal recv packet processing, but not
+        * higher than the timer sync thread.
         */
        if (!SetThreadPriority(GetCurrentThread(),
-                              THREAD_PRIORITY_ABOVE_NORMAL))
-       {
+                              THREAD_PRIORITY_ABOVE_NORMAL)) {
                errval = GetLastError();
                isc__strerror(errval, strbuf, sizeof(strbuf));
                FATAL_ERROR(__FILE__, __LINE__,
@@ -2714,86 +2319,106 @@ SocketIoThread(LPVOID ThreadContext) {
         */
        while (TRUE) {
                bSuccess = GetQueuedCompletionStatus(manager->hIoCompletionPort,
-                                                    &nbytes, (LPDWORD) &sock,
-                                                    (LPOVERLAPPED *)&lpo,
+                                                    &nbytes, (LPDWORD)&sock,
+                                                    (LPWSAOVERLAPPED *)&lpo,
                                                     INFINITE);
                if (lpo == NULL) /* Received request to exit */
                        break;
                
-               dev = lpo->dev;
-               lpo->dev = NULL;
+               REQUIRE(VALID_SOCKET(sock));
+
                request = lpo->request_type;
 
                errstatus = 0;
                if (!bSuccess) {
-                       isc_boolean_t dofree = ISC_FALSE;
-                       REQUIRE(VALID_SOCKET(sock));
+                       isc_result_t isc_result;
+
                        /*
-                        * Was this the socket closed under us?
+                        * Did the I/O operation complete?
                         */
-                       errstatus = GetLastError();
-                       if (errstatus == WSA_OPERATION_ABORTED) {
-                               LOCK(&sock->lock);
-                               switch (request) {
-                               case SOCKET_RECV:
-                                       INSIST(sock->pending_recv > 0);
-                                       sock->pending_recv--;
-                                       dev->result = ISC_R_CANCELED;
-#if XXXMLG_DEBUG
-                                       printf("Sending recvdone socket %p %d (%d %d %d %d %d)\n",
-                                               sock, sock->fd, sock->pending_recv, sock->pending_send, sock->pending_close,
-                                               sock->pending_free, sock->references);
-#endif
-                                       send_recvdone_event(sock, &dev);
+                       errstatus = WSAGetLastError();
+                       isc_result = isc__errno2resultx(errstatus, __FILE__, __LINE__);
+
+                       LOCK(&sock->lock);
+                       CONSISTENT(sock);
+                       switch (request) {
+                       case SOCKET_RECV:
+                               INSIST(sock->pending_iocp > 0);
+                               sock->pending_iocp--;
+                               INSIST(sock->pending_recv > 0);
+                               sock->pending_recv--;
+                               send_recvdone_abort(sock, isc_result);
+                               if (isc_result == ISC_R_UNEXPECTED) {
+                                       UNEXPECTED_ERROR(__FILE__, __LINE__,
+                                               "SOCKET_RECV: Windows error code: %d, returning ISC error %d",
+                                               errstatus, isc_result);
+                               }
+                               break;
 
-                                       break;
-                               case SOCKET_SEND:
-                                       INSIST(sock->pending_send > 0);
-                                       sock->pending_send--;
-                                       dev->result = ISC_R_CANCELED;
-#if XXXMLG_DEBUG
-                                       printf("Sending senddone socket %p %d (%d %d %d %d %d)\n",
-                                               sock, sock->fd, sock->pending_recv, sock->pending_send, sock->pending_close,
-                                               sock->pending_free, sock->references);
-#endif
-                                       send_senddone_event(sock, &dev);
-                                       break;
+                       case SOCKET_SEND:
+                               INSIST(sock->pending_iocp > 0);
+                               sock->pending_iocp--;
+                               INSIST(sock->pending_send > 0);
+                               sock->pending_send--;
+                               if (senddone_is_active(sock, lpo->dev)) {
+                                       lpo->dev->result = isc_result;
+                                       socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
+                                               "cancelled_send");
+                                       send_senddone_event(sock, &lpo->dev);
                                }
-                               if (sock->pending_recv == 0 &&
-                                   sock->pending_send == 0 &&
-                                   sock->pending_close == 0 &&
-                                   sock->pending_free == 1 &&
-                                       sock->references == 0) {
-                                       sock->pending_free = 0;
-                                       dofree = ISC_TRUE;
+                               break;
+
+                       case SOCKET_ACCEPT:
+                               INSIST(sock->pending_iocp > 0);
+                               sock->pending_iocp--;
+                               INSIST(sock->pending_accept > 0);
+                               sock->pending_accept--;
+                               if (acceptdone_is_active(sock, lpo->adev)) {
+                                       closesocket(lpo->adev->newsocket->fd);
+                                       lpo->adev->newsocket->fd = INVALID_SOCKET;
+                                       lpo->adev->newsocket->references--;
+                                       free_socket(&lpo->adev->newsocket, __LINE__);
+                                       lpo->adev->result = isc_result;
+                                       socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
+                                               "cancelled_accept");
+                                       send_acceptdone_event(sock, &lpo->adev);
                                }
+                               break;
 
-                               UNLOCK(&sock->lock);
-
-                               if (dofree) {
-                                       InterlockedDecrement(&manager->iocp_total);
-                                       sock->iocp = 0;
-                                       LOCK(&manager->lock);
-                                       ISC_LIST_UNLINK(manager->socklist, sock, link);
-                                       free_socket(&sock);
-                                       if (ISC_LIST_EMPTY(manager->socklist))
-                                               SIGNAL(&manager->shutdown_ok);
-                                       UNLOCK(&manager->lock);
+                       case SOCKET_CONNECT:
+                               INSIST(sock->pending_iocp > 0);
+                               sock->pending_iocp--;
+                               INSIST(sock->pending_connect == 1);
+                               sock->pending_connect = 0;
+                               if (connectdone_is_active(sock, lpo->cdev)) {
+                                       lpo->cdev->result = isc_result;
+                                       socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
+                                               "cancelled_connect");
+                                       send_connectdone_event(sock, &lpo->cdev);
                                }
-                               if (lpo != NULL)
-                                       HeapFree(hHeapHandle, 0, lpo);
-                               continue;
+                               break;
                        }
+                       maybe_free_socket(&sock, __LINE__);
+
+                       if (lpo != NULL)
+                               HeapFree(hHeapHandle, 0, lpo);
+                       continue;
                }
 
                messagehdr = &lpo->messagehdr;
 
                switch (request) {
                case SOCKET_RECV:
-                       internal_recv(sock, dev, messagehdr, nbytes, errstatus);
+                       internal_recv(sock, nbytes);
                        break;
                case SOCKET_SEND:
-                       internal_send(sock, dev, messagehdr, nbytes, errstatus);
+                       internal_send(sock, lpo->dev, messagehdr, nbytes, errstatus, lpo);
+                       break;
+               case SOCKET_ACCEPT:
+                       internal_accept(sock, lpo, errstatus);
+                       break;
+               case SOCKET_CONNECT:
+                       internal_connect(sock, lpo, errstatus);
                        break;
                }
 
@@ -2810,150 +2435,12 @@ SocketIoThread(LPVOID ThreadContext) {
        return ((isc_threadresult_t)0);
 }
 
-/*
- * This is the thread that will loop forever, waiting for an event to
- * happen.
- *
- * When the wait returns something to do, find the signaled event
- * and issue the request for the given socket
- */
-static isc_threadresult_t WINAPI
-event_wait(void *uap) {
-       events_thread_t *evthread = uap;
-       isc_socketmgr_t *manager = evthread->manager;
-       int cc;
-       int event_errno;
-       char strbuf[ISC_STRERRORSIZE];
-       isc_socket_t *wsock;
-       int iEvent;
-       int max_event;
-       sock_event_list *evlist;
-       WSANETWORKEVENTS NetworkEvents;
-       int err;
-
-       REQUIRE(evthread != NULL);
-       REQUIRE(VALID_MANAGER(manager));
-
-       /* We need to know the Id of the thread */
-       evthread->thread_id = GetCurrentThreadId();
-
-       evlist = &(evthread->sockev_list);
-
-       /* See if there's anything waiting to add to the event list */
-       if (manager->event_written > 0)
-               process_eventlist(evlist, manager);
-
-       while (!manager->bShutdown) {
-               do {
-
-                       max_event = evlist->max_event;
-                       event_errno = 0;
-
-                       WSAResetEvent(evlist->aEventList[0]);
-                       cc = WSAWaitForMultipleEvents(max_event,
-                                       evlist->aEventList, FALSE, WSA_INFINITE,
-                                       FALSE);
-                       if (cc == WSA_WAIT_FAILED) {
-                               event_errno = WSAGetLastError();
-                               if (!SOFT_ERROR(event_errno)) {
-                                       isc__strerror(event_errno, strbuf,
-                                             sizeof(strbuf));
-                                       FATAL_ERROR(__FILE__, __LINE__,
-                                          "WSAWaitForMultipleEvents() %s: %s",
-                                           isc_msgcat_get(isc_msgcat,
-                                                   ISC_MSGSET_GENERAL,
-                                                   ISC_MSG_FAILED,
-                                                   "failed"),
-                                           strbuf);
-                               }
-                       }
-
-               } while (cc < 0 && !manager->bShutdown
-                        && manager->event_written == 0);
-
-               if (manager->bShutdown)
-                       break;
-
-               iEvent = cc - WSA_WAIT_EVENT_0;
-
-               /*
-                * Add or delete events as requested
-                */
-               if (manager->event_written > 0)
-                       process_eventlist(evlist, manager);
-               /*
-                * Stopped to add and delete events on the list
-                */
-               if (iEvent == 0)
-                       continue;
-
-               wsock = evlist->aSockList[iEvent];
-               if (wsock == NULL)
-                       continue;
-
-               if (WSAEnumNetworkEvents(wsock->fd, wsock->hEvent,
-                       &NetworkEvents) == SOCKET_ERROR) {
-                       err = WSAGetLastError();
-                       isc__strerror(err, strbuf, sizeof(strbuf));
-                       UNEXPECTED_ERROR(__FILE__, __LINE__,
-                                        "event_wait: WSAEnumNetworkEvents() %s",
-                                        strbuf);
-                       /* XXXMPA */
-               }
-
-               if(NetworkEvents.lNetworkEvents == 0 ) {
-                       continue;
-               }
-
-               /*
-                * Check for FD_CLOSE events first. This takes precedence over
-                * other possible events as it needs to be handled instead of
-                * any other event if it happens on the socket.
-                * The error code found, if any, is fed into the internal_*()
-                * routines.
-                */
-               if(NetworkEvents.lNetworkEvents & FD_CLOSE) {
-                       event_errno = NetworkEvents.iErrorCode[FD_CLOSE_BIT];
-               } else if (NetworkEvents.lNetworkEvents & FD_ACCEPT) {
-                       event_errno = NetworkEvents.iErrorCode[FD_ACCEPT_BIT];
-               } else if (NetworkEvents.lNetworkEvents & FD_CONNECT) {
-                       event_errno = NetworkEvents.iErrorCode[FD_CONNECT_BIT];
-               } else {
-                       UNEXPECTED_ERROR(__FILE__, __LINE__,
-                                        "event_wait: WSAEnumNetworkEvents() "
-                                        "unexpected event bit set: %0x",
-                                        NetworkEvents.lNetworkEvents);
-               }
-
-               if (wsock->references > 0 && wsock->pending_close == 0) {
-                       LOCK(&wsock->lock);
-                       if (wsock->listener == 1 &&
-                           wsock->pending_accept == 0) {
-                               wsock->pending_accept = 1;
-                               internal_accept(wsock, event_errno);
-                               UNLOCK(&wsock->lock);
-                       } else {
-                               internal_connect(wsock, event_errno);
-                               UNLOCK(&wsock->lock);
-                               eventlist_event_delete(wsock, evlist, manager);
-                       }
-               }
-       }
-
-       manager_log(manager, TRACE,
-                   isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
-                                  ISC_MSG_EXITING, "event_wait exiting"));
-
-       return ((isc_threadresult_t)0);
-}
-
 /*
  * Create a new socket manager.
  */
 isc_result_t
 isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) {
        isc_socketmgr_t *manager;
-       events_thread_t *evthread = NULL;
        isc_result_t result;
 
        REQUIRE(managerp != NULL && *managerp == NULL);
@@ -2990,33 +2477,10 @@ isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) {
 
        iocompletionport_init(manager); /* Create the Completion Ports */
 
-       /*
-        * Event Wait Thread Initialization
-        */
-       ISC_LIST_INIT(manager->ev_threads);
-
-       /*
-        * Start up the initial event wait thread.
-        */
-       result = event_thread_create(&evthread, manager);
-       if (result != ISC_R_SUCCESS) {
-               isc_condition_destroy(&manager->shutdown_ok);
-               DESTROYLOCK(&manager->lock);
-               isc_mem_put(mctx, manager, sizeof(*manager));
-               return (result);
-       }
-
-       manager->prime_alert = evthread->sockev_list.aEventList[0];
-       manager->event_written = 0;
        manager->bShutdown = ISC_FALSE;
-       manager->totalHandles = 0;
        manager->totalSockets = 0;
-       manager->totalHandleRequests = 0;
        manager->iocp_total = 0;
 
-       /* Initialize the event update list */
-       ISC_LIST_INIT(manager->event_updates);
-
        *managerp = manager;
 
        return (ISC_R_SUCCESS);
@@ -3027,10 +2491,10 @@ isc_socketmgr_destroy(isc_socketmgr_t **managerp) {
        isc_socketmgr_t *manager;
        int i;
        isc_mem_t *mctx;
-       events_thread_t *evthread;
 
 #if XXXMLG_DEBUG
-       fclose(logfile);
+       if (logfile)
+               fclose(logfile);
 #endif
 
        /*
@@ -3060,35 +2524,15 @@ isc_socketmgr_destroy(isc_socketmgr_t **managerp) {
         * Here, we need to had some wait code for the completion port
         * thread.
         */
-       signal_iocompletionport_exit(manager);
-       manager->bShutdown = ISC_TRUE;
-
-       /*
-        * Wait for threads to exit.
-        */
-
-       /*
-        * Shut down the event wait threads
-        */
-       evthread = ISC_LIST_HEAD(manager->ev_threads);
-       while (evthread != NULL) {
-               WSASetEvent(evthread->sockev_list.aEventList[0]);
-               if (isc_thread_join(evthread->thread_handle, NULL) != ISC_R_SUCCESS)
-                       UNEXPECTED_ERROR(__FILE__, __LINE__,
-                                "isc_thread_join() for event_wait %s",
-                                isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
-                                               ISC_MSG_FAILED, "failed"));
-               ISC_LIST_DEQUEUE(manager->ev_threads, evthread, link);
-               isc_mem_put(manager->mctx, evthread, sizeof(*evthread));
-               evthread = ISC_LIST_HEAD(manager->ev_threads);
-       }
+       signal_iocompletionport_exit(manager);
+       manager->bShutdown = ISC_TRUE;
 
        /*
-        * Now the I/O Completion Port Worker Threads
+        * Wait for threads to exit.
         */
        for (i = 0; i < manager->maxIOCPThreads; i++) {
-               if (isc_thread_join((isc_thread_t) manager->hIOCPThreads[i], NULL)
-                   != ISC_R_SUCCESS)
+               if (isc_thread_join((isc_thread_t) manager->hIOCPThreads[i],
+                       NULL) != ISC_R_SUCCESS)
                        UNEXPECTED_ERROR(__FILE__, __LINE__,
                                 "isc_thread_join() for Completion Port %s",
                                 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
@@ -3112,11 +2556,36 @@ isc_socketmgr_destroy(isc_socketmgr_t **managerp) {
        *managerp = NULL;
 }
 
+static void
+queue_receive_event(isc_socket_t *sock, isc_task_t *task, isc_socketevent_t *dev)
+{
+       isc_task_t *ntask = NULL;
+
+       isc_task_attach(task, &ntask);
+       dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED;
+
+       /*
+        * Enqueue the request.
+        */
+       INSIST(!ISC_LINK_LINKED(dev, ev_link));
+       ISC_LIST_ENQUEUE(sock->recv_list, dev, ev_link);
+
+       socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
+                  "queue_receive_event: event %p -> task %p",
+                  dev, ntask);
+}
+
+/*
+ * Check the pending receive queue, and if we have data pending, give it to this
+ * caller.  If we have none, queue an I/O request.  If this caller is not the first
+ * on the list, then we will just queue this event and return.
+ *
+ * Caller must have the socket locked.
+ */
 static isc_result_t
 socket_recv(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task,
            unsigned int flags)
 {
-       int io_state;
        int cc = 0;
        isc_task_t *ntask = NULL;
        isc_result_t result = ISC_R_SUCCESS;
@@ -3124,45 +2593,26 @@ socket_recv(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task,
 
        dev->ev_sender = task;
 
-       LOCK(&sock->lock);
-       io_state = startio_recv(sock, dev, &cc, &recv_errno);
-
-       switch (io_state) {
-       case DOIO_PENDING:      /* I/O Started. Nothing to be done */
-       case DOIO_SOFT:
-               /*
-                * We couldn't read all or part of the request right now, so
-                * queue it.
-                *
-                * Attach to socket and to task
-                */
-               isc_task_attach(task, &ntask);
-               dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED;
-
-               /*
-                * Enqueue the request.
-                */
-               ISC_LIST_ENQUEUE(sock->recv_list, dev, ev_link);
-
-               socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
-                          "socket_recv: event %p -> task %p",
-                          dev, ntask);
+       if (sock->fd == INVALID_SOCKET)
+               return (ISC_R_EOF);
 
-               if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0)
-                       result = ISC_R_INPROGRESS;
-               break;
+       /*
+        * Queue our event on the list of things to do.  Call our function to
+        * attempt to fill buffers as much as possible, and return done events.
+        * We are going to lie about our handling of the ISC_SOCKFLAG_IMMEDIATE
+        * here and tell our caller that we could not satisfy it immediately.
+        */
+       queue_receive_event(sock, task, dev);
+       if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0)
+               result = ISC_R_INPROGRESS;
 
-       case DOIO_EOF:
-               dev->result = ISC_R_EOF;
-               /* fallthrough */
+       completeio_recv(sock);
 
-       case DOIO_HARD:
-       case DOIO_SUCCESS:
-               if ((flags & ISC_SOCKFLAG_IMMEDIATE) == 0)
-                       send_recvdone_event(sock, &dev);
-               break;
-       }
-       UNLOCK(&sock->lock);
+       /*
+        * If there are more receivers waiting for data, queue another receive
+        * here.  If the 
+        */
+       queue_receive_request(sock);
 
        return (result);
 }
@@ -3176,8 +2626,20 @@ isc_socket_recvv(isc_socket_t *sock, isc_bufferlist_t *buflist,
        isc_socketmgr_t *manager;
        unsigned int iocount;
        isc_buffer_t *buffer;
+       isc_result_t ret;
 
        REQUIRE(VALID_SOCKET(sock));
+       LOCK(&sock->lock);
+       CONSISTENT(sock);
+
+       /*
+        * Make sure that the socket is not closed.  XXXMLG change error here?
+        */
+       if (sock->fd == INVALID_SOCKET) {
+               UNLOCK(&sock->lock);
+               return (ISC_R_CONNREFUSED);
+       }
+
        REQUIRE(buflist != NULL);
        REQUIRE(!ISC_LIST_EMPTY(*buflist));
        REQUIRE(task != NULL);
@@ -3193,6 +2655,7 @@ isc_socket_recvv(isc_socket_t *sock, isc_bufferlist_t *buflist,
 
        dev = allocate_socketevent(sock, ISC_SOCKEVENT_RECVDONE, action, arg);
        if (dev == NULL) {
+               UNLOCK(&sock->lock);
                return (ISC_R_NOMEMORY);
        }
 
@@ -3218,7 +2681,10 @@ isc_socket_recvv(isc_socket_t *sock, isc_bufferlist_t *buflist,
                buffer = ISC_LIST_HEAD(*buflist);
        }
 
-       return (socket_recv(sock, dev, task, 0));
+       ret = socket_recv(sock, dev, task, 0);
+
+       UNLOCK(&sock->lock);
+       return (ret);
 }
 
 isc_result_t
@@ -3227,8 +2693,19 @@ isc_socket_recv(isc_socket_t *sock, isc_region_t *region, unsigned int minimum,
 {
        isc_socketevent_t *dev;
        isc_socketmgr_t *manager;
+       isc_result_t ret;
 
        REQUIRE(VALID_SOCKET(sock));
+       LOCK(&sock->lock);
+       CONSISTENT(sock);
+
+       /*
+        * make sure that the socket's not closed
+        */
+       if (sock->fd == INVALID_SOCKET) {
+               UNLOCK(&sock->lock);
+               return (ISC_R_CONNREFUSED);
+       }
        REQUIRE(action != NULL);
 
        manager = sock->manager;
@@ -3237,10 +2714,14 @@ isc_socket_recv(isc_socket_t *sock, isc_region_t *region, unsigned int minimum,
        INSIST(sock->bound);
 
        dev = allocate_socketevent(sock, ISC_SOCKEVENT_RECVDONE, action, arg);
-       if (dev == NULL)
+       if (dev == NULL) {
+               UNLOCK(&sock->lock);
                return (ISC_R_NOMEMORY);
+       }
 
-       return (isc_socket_recv2(sock, region, minimum, task, dev, 0));
+       ret = isc_socket_recv2(sock, region, minimum, task, dev, 0);
+       UNLOCK(&sock->lock);
+       return (ret);   
 }
 
 isc_result_t
@@ -3248,8 +2729,22 @@ isc_socket_recv2(isc_socket_t *sock, isc_region_t *region,
                 unsigned int minimum, isc_task_t *task,
                 isc_socketevent_t *event, unsigned int flags)
 {
-       event->ev_sender = sock;
+       isc_result_t ret;
+
+       REQUIRE(VALID_SOCKET(sock));
+       LOCK(&sock->lock);
+       CONSISTENT(sock);
+
        event->result = ISC_R_UNEXPECTED;
+       event->ev_sender = sock;
+       /*
+        * make sure that the socket's not closed
+        */
+       if (sock->fd == INVALID_SOCKET) {
+               UNLOCK(&sock->lock);
+               return (ISC_R_CONNREFUSED);
+       }
+
        ISC_LIST_INIT(event->bufferlist);
        event->region = *region;
        event->n = 0;
@@ -3268,9 +2763,14 @@ isc_socket_recv2(isc_socket_t *sock, isc_region_t *region,
                        event->minimum = minimum;
        }
 
-       return (socket_recv(sock, event, task, flags));
+       ret = socket_recv(sock, event, task, flags);
+       UNLOCK(&sock->lock);
+       return (ret);
 }
 
+/*
+ * Caller must have the socket locked.
+ */
 static isc_result_t
 socket_send(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task,
            isc_sockaddr_t *address, struct in6_pktinfo *pktinfo,
@@ -3279,7 +2779,6 @@ socket_send(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task,
        int io_state;
        int send_errno = 0;
        int cc = 0;
-       isc_boolean_t have_lock = ISC_FALSE;
        isc_task_t *ntask = NULL;
        isc_result_t result = ISC_R_SUCCESS;
 
@@ -3301,10 +2800,7 @@ socket_send(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task,
                dev->pktinfo.ipi6_ifindex = 0;
        }
 
-       LOCK(&sock->lock);
-       have_lock = ISC_TRUE;
        io_state = startio_send(sock, dev, &cc, &send_errno);
-
        switch (io_state) {
        case DOIO_PENDING:      /* I/O started. Nothing more to do */
        case DOIO_SOFT:
@@ -3315,14 +2811,11 @@ socket_send(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task,
                if ((flags & ISC_SOCKFLAG_NORETRY) == 0) {
                        isc_task_attach(task, &ntask);
                        dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED;
-                       if (!have_lock) {
-                               LOCK(&sock->lock);
-                               have_lock = ISC_TRUE;
-                       }
 
                        /*
                         * Enqueue the request.
                         */
+                       INSIST(!ISC_LINK_LINKED(dev, ev_link));
                        ISC_LIST_ENQUEUE(sock->send_list, dev, ev_link);
 
                        socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
@@ -3338,9 +2831,6 @@ socket_send(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task,
                break;
        }
 
-       if (have_lock)
-               UNLOCK(&sock->lock);
-
        return (result);
 }
 
@@ -3362,8 +2852,20 @@ isc_socket_sendto(isc_socket_t *sock, isc_region_t *region,
 {
        isc_socketevent_t *dev;
        isc_socketmgr_t *manager;
+       isc_result_t ret;
 
        REQUIRE(VALID_SOCKET(sock));
+
+       LOCK(&sock->lock);
+       CONSISTENT(sock);
+
+       /*
+        * make sure that the socket's not closed
+        */
+       if (sock->fd == INVALID_SOCKET) {
+               UNLOCK(&sock->lock);
+               return (ISC_R_CONNREFUSED);
+       }
        REQUIRE(region != NULL);
        REQUIRE(task != NULL);
        REQUIRE(action != NULL);
@@ -3375,11 +2877,14 @@ isc_socket_sendto(isc_socket_t *sock, isc_region_t *region,
 
        dev = allocate_socketevent(sock, ISC_SOCKEVENT_SENDDONE, action, arg);
        if (dev == NULL) {
+               UNLOCK(&sock->lock);
                return (ISC_R_NOMEMORY);
        }
        dev->region = *region;
 
-       return (socket_send(sock, dev, task, address, pktinfo, 0));
+       ret = socket_send(sock, dev, task, address, pktinfo, 0);
+       UNLOCK(&sock->lock);
+       return (ret);
 }
 
 isc_result_t
@@ -3399,8 +2904,20 @@ isc_socket_sendtov(isc_socket_t *sock, isc_bufferlist_t *buflist,
        isc_socketmgr_t *manager;
        unsigned int iocount;
        isc_buffer_t *buffer;
+       isc_result_t ret;
 
        REQUIRE(VALID_SOCKET(sock));
+
+       LOCK(&sock->lock);
+       CONSISTENT(sock);
+
+       /*
+        * make sure that the socket's not closed
+        */
+       if (sock->fd == INVALID_SOCKET) {
+               UNLOCK(&sock->lock);
+               return (ISC_R_CONNREFUSED);
+       }
        REQUIRE(buflist != NULL);
        REQUIRE(!ISC_LIST_EMPTY(*buflist));
        REQUIRE(task != NULL);
@@ -3414,6 +2931,7 @@ isc_socket_sendtov(isc_socket_t *sock, isc_bufferlist_t *buflist,
 
        dev = allocate_socketevent(sock, ISC_SOCKEVENT_SENDDONE, action, arg);
        if (dev == NULL) {
+               UNLOCK(&sock->lock);
                return (ISC_R_NOMEMORY);
        }
 
@@ -3427,7 +2945,9 @@ isc_socket_sendtov(isc_socket_t *sock, isc_bufferlist_t *buflist,
                buffer = ISC_LIST_HEAD(*buflist);
        }
 
-       return (socket_send(sock, dev, task, address, pktinfo, 0));
+       ret = socket_send(sock, dev, task, address, pktinfo, 0);
+       UNLOCK(&sock->lock);
+       return (ret);
 }
 
 isc_result_t
@@ -3436,18 +2956,33 @@ isc_socket_sendto2(isc_socket_t *sock, isc_region_t *region,
                   isc_sockaddr_t *address, struct in6_pktinfo *pktinfo,
                   isc_socketevent_t *event, unsigned int flags)
 {
+       isc_result_t ret;
+
+       REQUIRE(VALID_SOCKET(sock));
+       LOCK(&sock->lock);
+       CONSISTENT(sock);
+
        REQUIRE((flags & ~(ISC_SOCKFLAG_IMMEDIATE|ISC_SOCKFLAG_NORETRY)) == 0);
        if ((flags & ISC_SOCKFLAG_NORETRY) != 0)
                REQUIRE(sock->type == isc_sockettype_udp);
        event->ev_sender = sock;
        event->result = ISC_R_UNEXPECTED;
+       /*
+        * make sure that the socket's not closed
+        */
+       if (sock->fd == INVALID_SOCKET) {
+               UNLOCK(&sock->lock);
+               return (ISC_R_CONNREFUSED);
+       }
        ISC_LIST_INIT(event->bufferlist);
        event->region = *region;
        event->n = 0;
        event->offset = 0;
        event->attributes = 0;
 
-       return (socket_send(sock, event, task, address, pktinfo, flags));
+       ret = socket_send(sock, event, task, address, pktinfo, flags);
+       UNLOCK(&sock->lock);
+       return (ret);
 }
 
 isc_result_t
@@ -3457,7 +2992,17 @@ isc_socket_bind(isc_socket_t *sock, isc_sockaddr_t *sockaddr,
        char strbuf[ISC_STRERRORSIZE];
        int on = 1;
 
+       REQUIRE(VALID_SOCKET(sock));
        LOCK(&sock->lock);
+       CONSISTENT(sock);
+
+       /*
+        * make sure that the socket's not closed
+        */
+       if (sock->fd == INVALID_SOCKET) {
+               UNLOCK(&sock->lock);
+               return (ISC_R_CONNREFUSED);
+       }
 
        INSIST(!sock->bound);
 
@@ -3528,11 +3073,19 @@ isc_socket_filter(isc_socket_t *sock, const char *filter) {
 isc_result_t
 isc_socket_listen(isc_socket_t *sock, unsigned int backlog) {
        char strbuf[ISC_STRERRORSIZE];
-       isc_result_t retstat;
 
        REQUIRE(VALID_SOCKET(sock));
 
        LOCK(&sock->lock);
+       CONSISTENT(sock);
+
+       /*
+        * make sure that the socket's not closed
+        */
+       if (sock->fd == INVALID_SOCKET) {
+               UNLOCK(&sock->lock);
+               return (ISC_R_CONNREFUSED);
+       }
 
        REQUIRE(!sock->listener);
        REQUIRE(sock->bound);
@@ -3550,20 +3103,10 @@ isc_socket_listen(isc_socket_t *sock, unsigned int backlog) {
                return (ISC_R_UNEXPECTED);
        }
 
+       socket_log(__LINE__, sock, NULL, TRACE,
+                  isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND, "listening");
        sock->listener = 1;
-
-       /* Add the socket to the list of events to accept */
-       retstat = socket_event_add(sock, FD_CLOSE);
-       if (retstat != ISC_R_SUCCESS) {
-               UNLOCK(&sock->lock);
-               if (retstat != ISC_R_NOSPACE) {
-                       isc__strerror(WSAGetLastError(), strbuf,
-                                       sizeof(strbuf));
-                       UNEXPECTED_ERROR(__FILE__, __LINE__,
-                               "isc_socket_listen: socket_event_add: %s", strbuf);
-               }
-               return (retstat);
-       }
+       _set_state(sock, SOCK_LISTEN);
 
        UNLOCK(&sock->lock);
        return (ISC_R_SUCCESS);
@@ -3576,17 +3119,28 @@ isc_result_t
 isc_socket_accept(isc_socket_t *sock,
                  isc_task_t *task, isc_taskaction_t action, const void *arg)
 {
-       isc_socket_newconnev_t *dev;
+       isc_socket_newconnev_t *adev;
        isc_socketmgr_t *manager;
        isc_task_t *ntask = NULL;
        isc_socket_t *nsock;
        isc_result_t result;
+       IoCompletionInfo *lpo;
 
        REQUIRE(VALID_SOCKET(sock));
+
        manager = sock->manager;
        REQUIRE(VALID_MANAGER(manager));
 
        LOCK(&sock->lock);
+       CONSISTENT(sock);
+
+       /*
+        * make sure that the socket's not closed
+        */
+       if (sock->fd == INVALID_SOCKET) {
+               UNLOCK(&sock->lock);
+               return (ISC_R_CONNREFUSED);
+       }
 
        REQUIRE(sock->listener);
 
@@ -3595,55 +3149,78 @@ isc_socket_accept(isc_socket_t *sock,
         * this event to.  Just before the actual event is delivered the
         * actual ev_sender will be touched up to be the socket.
         */
-       dev = (isc_socket_newconnev_t *)
+       adev = (isc_socket_newconnev_t *)
                isc_event_allocate(manager->mctx, task, ISC_SOCKEVENT_NEWCONN,
-                                  action, arg, sizeof(*dev));
-       if (dev == NULL) {
+                                  action, arg, sizeof(*adev));
+       if (adev == NULL) {
                UNLOCK(&sock->lock);
                return (ISC_R_NOMEMORY);
        }
-       ISC_LINK_INIT(dev, ev_link);
+       ISC_LINK_INIT(adev, ev_link);
 
        result = allocate_socket(manager, sock->type, &nsock);
        if (result != ISC_R_SUCCESS) {
-               isc_event_free((isc_event_t **)&dev);
+               isc_event_free((isc_event_t **)&adev);
                UNLOCK(&sock->lock);
                return (result);
        }
 
+       /*
+        * AcceptEx() requires we pass in a socket.
+        */
+       nsock->fd = socket(sock->pf, SOCK_STREAM, IPPROTO_TCP);
+       if (nsock->fd == INVALID_SOCKET) {
+               free_socket(&nsock, __LINE__);
+               isc_event_free((isc_event_t **)&adev);
+               UNLOCK(&sock->lock);
+               return (ISC_R_FAILURE); // XXXMLG need real error message
+       }
+
        /*
         * Attach to socket and to task.
         */
        isc_task_attach(task, &ntask);
        nsock->references++;
 
-       dev->ev_sender = ntask;
-       dev->newsocket = nsock;
+       adev->ev_sender = ntask;
+       adev->newsocket = nsock;
+       _set_state(nsock, SOCK_ACCEPT);
 
        /*
-        * Wait for connects.
+        * Queue io completion for an accept().
         */
-       if (ISC_LIST_EMPTY(sock->accept_list) &&
-           WSAEventSelect(sock->fd, sock->hEvent, FD_ACCEPT | FD_CLOSE) != 0) {
-               char strbuf[ISC_STRERRORSIZE];
-               int stat;
-               const char *msg;
-               stat = WSAGetLastError();
-               isc__strerror(stat, strbuf, sizeof(strbuf));
-               msg = isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
-                                    ISC_MSG_FAILED, "failed");
-               UNEXPECTED_ERROR(__FILE__, __LINE__, "WSAEventSelect: %s: %s",
-                                msg, strbuf);
-               isc_task_detach(&ntask);
-               isc_socket_detach(&nsock);
-               isc_event_free((isc_event_t **)&dev);
-               UNLOCK(&sock->lock);
-               return (ISC_R_UNEXPECTED);
-       }
+       lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
+                                           HEAP_ZERO_MEMORY,
+                                           sizeof(IoCompletionInfo));
+       RUNTIME_CHECK(lpo != NULL);
+       lpo->acceptbuffer = (void *)HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY,
+               (sizeof(SOCKADDR) + 16) * 2);
+       RUNTIME_CHECK(lpo->acceptbuffer != NULL);
+
+       lpo->adev = adev;
+       lpo->request_type = SOCKET_ACCEPT;
+
+       ISCAcceptEx(sock->fd, 
+                   nsock->fd,                          /* Accepted Socket */
+                   lpo->acceptbuffer,                  /* Buffer for initial Recv */
+                   0,                                  /* Length of Buffer */
+                   sizeof(SOCKADDR) + 16,              /* Local address length + 16 */
+                   sizeof(SOCKADDR) + 16,              /* Remote address lengh + 16 */
+                   (LPDWORD)&lpo->received_bytes,      /* Bytes Recved */
+                   (LPOVERLAPPED)lpo                   /* Overlapped structure */
+                   );
+       iocompletionport_update(nsock);
+
+       socket_log(__LINE__, sock, NULL, TRACE,
+                  isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND,
+                  "accepting for nsock %p fd %d", nsock, nsock->fd);
+
        /*
         * Enqueue the event
         */
-       ISC_LIST_ENQUEUE(sock->accept_list, dev, ev_link);
+       ISC_LIST_ENQUEUE(sock->accept_list, adev, ev_link);
+       sock->pending_accept++;
+       sock->pending_iocp++;
 
        UNLOCK(&sock->lock);
        return (ISC_R_SUCCESS);
@@ -3653,13 +3230,10 @@ isc_result_t
 isc_socket_connect(isc_socket_t *sock, isc_sockaddr_t *addr,
                   isc_task_t *task, isc_taskaction_t action, const void *arg)
 {
-       isc_socket_connev_t *dev;
+       isc_socket_connev_t *cdev;
        isc_task_t *ntask = NULL;
        isc_socketmgr_t *manager;
-       int cc;
-       int retstat;
-       int errval;
-       char strbuf[ISC_STRERRORSIZE];
+       IoCompletionInfo *lpo;
 
        REQUIRE(VALID_SOCKET(sock));
        REQUIRE(addr != NULL);
@@ -3674,104 +3248,59 @@ isc_socket_connect(isc_socket_t *sock, isc_sockaddr_t *addr,
                return (ISC_R_MULTICAST);
 
        LOCK(&sock->lock);
-
-       REQUIRE(!sock->connecting);
-
-       dev = (isc_socket_connev_t *)isc_event_allocate(manager->mctx, sock,
-                                                       ISC_SOCKEVENT_CONNECT,
-                                                       action, arg,
-                                                       sizeof(*dev));
-       if (dev == NULL) {
-               UNLOCK(&sock->lock);
-               return (ISC_R_NOMEMORY);
-       }
-       ISC_LINK_INIT(dev, ev_link);
+       CONSISTENT(sock);
 
        /*
-        * Try to do the connect right away, as there can be only one
-        * outstanding, and it might happen to complete.
+        * make sure that the socket's not closed
         */
-       sock->address = *addr;
-       cc = connect(sock->fd, &addr->type.sa, addr->length);
-       if (cc < 0) {
-               errval = WSAGetLastError();
-               if (SOFT_ERROR(errval) || errval == WSAEINPROGRESS)
-                       goto queue;
-
-               switch (errval) {
-#define ERROR_MATCH(a, b) case a: dev->result = b; goto err_exit;
-                       ERROR_MATCH(WSAEACCES, ISC_R_NOPERM);
-                       ERROR_MATCH(WSAEADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL);
-                       ERROR_MATCH(WSAEAFNOSUPPORT, ISC_R_ADDRNOTAVAIL);
-                       ERROR_MATCH(WSAECONNREFUSED, ISC_R_CONNREFUSED);
-                       ERROR_MATCH(WSAEHOSTUNREACH, ISC_R_HOSTUNREACH);
-                       ERROR_MATCH(WSAEHOSTDOWN, ISC_R_HOSTUNREACH);
-                       ERROR_MATCH(WSAENETUNREACH, ISC_R_NETUNREACH);
-                       ERROR_MATCH(WSAENOBUFS, ISC_R_NORESOURCES);
-                       ERROR_MATCH(EPERM, ISC_R_HOSTUNREACH);
-                       ERROR_MATCH(EPIPE, ISC_R_NOTCONNECTED);
-#undef ERROR_MATCH
-               }
-
-               sock->connected = 0;
-
-               isc__strerror(errval, strbuf, sizeof(strbuf));
-               UNEXPECTED_ERROR(__FILE__, __LINE__, "%d/%s", errval, strbuf);
-
+       if (sock->fd == INVALID_SOCKET) {
                UNLOCK(&sock->lock);
-               isc_event_free((isc_event_t **)&dev);
-               return (ISC_R_UNEXPECTED);
+               return (ISC_R_CONNREFUSED);
+       }
 
-       err_exit:
-               sock->connected = 0;
-               isc_task_send(task, (isc_event_t **)&dev);
+       REQUIRE(!sock->pending_connect);
 
+       cdev = (isc_socket_connev_t *)isc_event_allocate(manager->mctx, sock,
+                                                       ISC_SOCKEVENT_CONNECT,
+                                                       action, arg,
+                                                       sizeof(*cdev));
+       if (cdev == NULL) {
                UNLOCK(&sock->lock);
-               return (ISC_R_SUCCESS);
+               return (ISC_R_NOMEMORY);
        }
+       ISC_LINK_INIT(cdev, ev_link);
 
        /*
-        * If connect completed, fire off the done event.
+        * Queue io completion for an accept().
         */
-       if (cc == 0) {
-               sock->connected = 1;
-               sock->bound = 1;
-               dev->result = ISC_R_SUCCESS;
-               isc_task_send(task, (isc_event_t **)&dev);
-
-               UNLOCK(&sock->lock);
-               return (ISC_R_SUCCESS);
-       }
+       lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
+                                           HEAP_ZERO_MEMORY,
+                                           sizeof(IoCompletionInfo));
+       lpo->cdev = cdev;
+       lpo->request_type = SOCKET_CONNECT;
 
- queue:
+       sock->address = *addr;
+       ISCConnectEx(sock->fd, &addr->type.sa, addr->length,
+               NULL, 0, NULL, (LPOVERLAPPED)lpo);
 
        /*
         * Attach to task.
         */
        isc_task_attach(task, &ntask);
+       cdev->ev_sender = ntask;
 
-       sock->connecting = 1;
-
-       dev->ev_sender = ntask;
+       sock->pending_connect = 1;
+       _set_state(sock, SOCK_CONNECT);
 
        /*
         * Enqueue the request.
         */
-       sock->connect_ev = dev;
-       /* Add the socket to the list of events to connect */
-       retstat = socket_event_add(sock, FD_CONNECT | FD_CLOSE);
-       if (retstat != ISC_R_SUCCESS) {
-               UNLOCK(&sock->lock);
-               if (retstat != ISC_R_NOSPACE) {
-                       isc__strerror(WSAGetLastError(), strbuf,
-                                       sizeof(strbuf));
-                       UNEXPECTED_ERROR(__FILE__, __LINE__,
-                               "isc_socket_connect: socket_event_add: %s", strbuf);
-               }
-               return (retstat);
-       }
+       sock->connect_ev = cdev;
+       sock->pending_iocp++;
 
+       CONSISTENT(sock);
        UNLOCK(&sock->lock);
+
        return (ISC_R_SUCCESS);
 }
 
@@ -3783,6 +3312,15 @@ isc_socket_getpeername(isc_socket_t *sock, isc_sockaddr_t *addressp) {
        REQUIRE(addressp != NULL);
 
        LOCK(&sock->lock);
+       CONSISTENT(sock);
+
+       /*
+        * make sure that the socket's not closed
+        */
+       if (sock->fd == INVALID_SOCKET) {
+               UNLOCK(&sock->lock);
+               return (ISC_R_CONNREFUSED);
+       }
 
        if (sock->connected) {
                *addressp = sock->address;
@@ -3806,6 +3344,15 @@ isc_socket_getsockname(isc_socket_t *sock, isc_sockaddr_t *addressp) {
        REQUIRE(addressp != NULL);
 
        LOCK(&sock->lock);
+       CONSISTENT(sock);
+
+       /*
+        * make sure that the socket's not closed
+        */
+       if (sock->fd == INVALID_SOCKET) {
+               UNLOCK(&sock->lock);
+               return (ISC_R_CONNREFUSED);
+       }
 
        if (!sock->bound) {
                result = ISC_R_NOTBOUND;
@@ -3847,18 +3394,16 @@ isc_socket_cancel(isc_socket_t *sock, isc_task_t *task, unsigned int how) {
                return;
 
        LOCK(&sock->lock);
-       socket_close(sock);
-       if (sock->iocp == 1)
-               sock->pending_free = 1;
-       UNLOCK(&sock->lock);
+       CONSISTENT(sock);
+
+       /*
+        * make sure that the socket's not closed
+        */
+       if (sock->fd == INVALID_SOCKET) {
+               UNLOCK(&sock->lock);
+               return;
+       }
 
-/*
- * Temporarily disable this.  Windows cannot cancel a single
- * I/O in its current popular varieties, but leave this code
- * here for when/if we belive we can rely on doing so.
- * XXXMLG
- */
-#if 0
        /*
         * All of these do the same thing, more or less.
         * Each will:
@@ -3869,14 +3414,13 @@ isc_socket_cancel(isc_socket_t *sock, isc_task_t *task, unsigned int how) {
         *        its done event with status of "ISC_R_CANCELED".
         *      o Reset any state needed.
         */
-       if (((how & ISC_SOCKCANCEL_RECV) == ISC_SOCKCANCEL_RECV)
-           && !ISC_LIST_EMPTY(sock->recv_list)) {
+
+       if ((how & ISC_SOCKCANCEL_RECV) == ISC_SOCKCANCEL_RECV) {
                isc_socketevent_t      *dev;
                isc_socketevent_t      *next;
                isc_task_t             *current_task;
 
                dev = ISC_LIST_HEAD(sock->recv_list);
-
                while (dev != NULL) {
                        current_task = dev->ev_sender;
                        next = ISC_LIST_NEXT(dev, ev_link);
@@ -3887,9 +3431,9 @@ isc_socket_cancel(isc_socket_t *sock, isc_task_t *task, unsigned int how) {
                        dev = next;
                }
        }
+       how &= ~ISC_SOCKCANCEL_RECV;
 
-       if (((how & ISC_SOCKCANCEL_SEND) == ISC_SOCKCANCEL_SEND)
-           && !ISC_LIST_EMPTY(sock->send_list)) {
+       if ((how & ISC_SOCKCANCEL_SEND) == ISC_SOCKCANCEL_SEND) {
                isc_socketevent_t      *dev;
                isc_socketevent_t      *next;
                isc_task_t             *current_task;
@@ -3906,6 +3450,7 @@ isc_socket_cancel(isc_socket_t *sock, isc_task_t *task, unsigned int how) {
                        dev = next;
                }
        }
+       how &= ~ISC_SOCKCANCEL_SEND;
 
        if (((how & ISC_SOCKCANCEL_ACCEPT) == ISC_SOCKCANCEL_ACCEPT)
            && !ISC_LIST_EMPTY(sock->accept_list)) {
@@ -3914,40 +3459,25 @@ isc_socket_cancel(isc_socket_t *sock, isc_task_t *task, unsigned int how) {
                isc_task_t             *current_task;
 
                dev = ISC_LIST_HEAD(sock->accept_list);
-
                while (dev != NULL) {
                        current_task = dev->ev_sender;
                        next = ISC_LIST_NEXT(dev, ev_link);
 
                        if ((task == NULL) || (task == current_task)) {
 
-                               ISC_LIST_UNLINK(sock->accept_list, dev,
-                                               ev_link);
-
                                dev->newsocket->references--;
-                               free_socket(&dev->newsocket);
+                               closesocket(dev->newsocket->fd);
+                               dev->newsocket->fd = INVALID_SOCKET;
+                               free_socket(&dev->newsocket, __LINE__);
 
                                dev->result = ISC_R_CANCELED;
-                               dev->ev_sender = sock;
-                               isc_task_sendanddetach(&current_task,
-                                                      (isc_event_t **)&dev);
+                               send_acceptdone_event(sock, &dev);
                        }
 
                        dev = next;
                }
-               if (sock->hEvent != NULL &&
-                   WSAEventSelect(sock->fd, sock->hEvent, FD_CLOSE) != 0) {
-                       char strbuf[ISC_STRERRORSIZE];
-                       int stat;
-                       const char *msg;
-                       stat = WSAGetLastError();
-                       isc__strerror(stat, strbuf, sizeof(strbuf));
-                       msg = isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
-                                            ISC_MSG_FAILED, "failed");
-                       UNEXPECTED_ERROR(__FILE__, __LINE__,
-                                        "WSAEventSelect: %s: %s", msg, strbuf);
-               }
        }
+       how &= ~ISC_SOCKCANCEL_ACCEPT;
 
        /*
         * Connecting is not a list.
@@ -3957,38 +3487,64 @@ isc_socket_cancel(isc_socket_t *sock, isc_task_t *task, unsigned int how) {
                isc_socket_connev_t    *dev;
                isc_task_t             *current_task;
 
-               INSIST(sock->connecting);
-               sock->connecting = 0;
+               INSIST(sock->pending_connect);
 
                dev = sock->connect_ev;
                current_task = dev->ev_sender;
 
                if ((task == NULL) || (task == current_task)) {
-                       sock->connect_ev = NULL;
+                       closesocket(sock->fd);
+                       sock->fd = INVALID_SOCKET;
+                       _set_state(sock, SOCK_CLOSED);
 
+                       sock->connect_ev = NULL;
                        dev->result = ISC_R_CANCELED;
-                       dev->ev_sender = sock;
-                       isc_task_sendanddetach(&current_task,
-                                              (isc_event_t **)&dev);
+                       send_connectdone_event(sock, &dev);
                }
        }
+       how &= ~ISC_SOCKCANCEL_CONNECT;
 
-       UNLOCK(&sock->lock);
-#endif
+       maybe_free_socket(&sock, __LINE__);
 }
 
 isc_sockettype_t
 isc_socket_gettype(isc_socket_t *sock) {
+       isc_sockettype_t type;
+
        REQUIRE(VALID_SOCKET(sock));
 
-       return (sock->type);
+       LOCK(&sock->lock);
+
+       /*
+        * make sure that the socket's not closed
+        */
+       if (sock->fd == INVALID_SOCKET) {
+               UNLOCK(&sock->lock);
+               return (ISC_R_CONNREFUSED);
+       }
+
+       type = sock->type;
+       UNLOCK(&sock->lock);
+       return (type);
 }
 
 isc_boolean_t
 isc_socket_isbound(isc_socket_t *sock) {
        isc_boolean_t val;
 
+       REQUIRE(VALID_SOCKET(sock));
+
        LOCK(&sock->lock);
+       CONSISTENT(sock);
+
+       /*
+        * make sure that the socket's not closed
+        */
+       if (sock->fd == INVALID_SOCKET) {
+               UNLOCK(&sock->lock);
+               return (ISC_FALSE);
+       }
+
        val = ((sock->bound) ? ISC_TRUE : ISC_FALSE);
        UNLOCK(&sock->lock);
 
index 7fa77028f67b685b4684fef1d9ad1a8d92f6f753..1100ba782768b2501083e6ef18b04893a360dc5d 100644 (file)
@@ -15,7 +15,7 @@
  * PERFORMANCE OF THIS SOFTWARE.
  */
 
-/* $Id: time.c,v 1.38 2004/03/16 05:52:22 marka Exp $ */
+/* $Id: time.c,v 1.38.850.1 2008/09/04 05:47:09 each Exp $ */
 
 #include <config.h>
 
@@ -65,7 +65,7 @@ isc_interval_set(isc_interval_t *i, unsigned int seconds,
        REQUIRE(nanoseconds < NS_PER_S);
 
        i->interval = (LONGLONG)seconds * INTERVALS_PER_S
-               + nanoseconds / NS_INTERVAL;
+               + (nanoseconds + NS_INTERVAL - 1) / NS_INTERVAL;
 }
 
 isc_boolean_t