From: Evan Hunt Date: Thu, 4 Sep 2008 05:47:09 +0000 (+0000) Subject: prepare final release of 9.4.2-P2-W1 X-Git-Tag: v9.4.2-P2-W1^0 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=40a96669b818722b21964d367e5bef5d8e823b56;p=thirdparty%2Fbind9.git prepare final release of 9.4.2-P2-W1 --- diff --git a/CHANGES b/CHANGES index 0956017aa48..33e85e9f9b7 100644 --- a/CHANGES +++ b/CHANGES @@ -1,11 +1,19 @@ --- 9.4.2-P2-W1 released --- -2420. [bug] Windows socket handling cleanup. Let the IO +2432. [bug] More Windows socket handling improvements. Stop + using I/O events and use IO Completion Ports + throughout. Rewrite the receive path logic to make + it easier to support multiple simultaneous + requestrs in the future. Add stricter consistency + checking as a compile-time option (define + ISC_SOCKET_CONSISTENCY_CHECKS; defaults to off). + +2420. [bug] Windows socket handling cleanup. Let the io completion event send out cancelled read/write - done events, which keeps us from writing to memory - we no longer own. Add debugging socket_log() - function. Rework TCP socket handling to avoid - leaking sockets. + done events, which keeps us from writing to memeory + we no longer have ownership of. Add debugging + socket_log() function. Rework TCP socket handling + to not leak sockets. --- 9.4.2-P2 released --- diff --git a/lib/isc/include/isc/socket.h b/lib/isc/include/isc/socket.h index 951a06316ed..26393a0b100 100644 --- a/lib/isc/include/isc/socket.h +++ b/lib/isc/include/isc/socket.h @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: socket.h,v 1.57.18.6.46.4 2008/07/23 23:16:43 marka Exp $ */ +/* $Id: socket.h,v 1.57.18.6.46.4.6.1 2008/09/04 05:47:09 each Exp $ */ #ifndef ISC_SOCKET_H #define ISC_SOCKET_H 1 @@ -165,6 +165,8 @@ typedef enum { /*@{*/ /*! * What I/O events to cancel in isc_socket_cancel() calls. + * ISC_SOCKCANCEL_ALL *must* contain all the possible bits, + * and only those bits. */ #define ISC_SOCKCANCEL_RECV 0x00000001 /*%< cancel recv */ #define ISC_SOCKCANCEL_SEND 0x00000002 /*%< cancel send */ diff --git a/lib/isc/win32/errno2result.c b/lib/isc/win32/errno2result.c index 1836270be3c..6f4279ddad1 100644 --- a/lib/isc/win32/errno2result.c +++ b/lib/isc/win32/errno2result.c @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: errno2result.c,v 1.9.18.3.62.1 2008/08/21 00:50:09 each Exp $ */ +/* $Id: errno2result.c,v 1.9.18.3.62.2 2008/09/04 05:47:09 each Exp $ */ #include @@ -64,29 +64,36 @@ isc__errno2resultx(int posixerrno, const char *file, int line) { case ERROR_CANCELLED: return (ISC_R_CANCELED); case ERROR_CONNECTION_REFUSED: + case WSAECONNREFUSED: return (ISC_R_CONNREFUSED); + case WSAENOTCONN: case ERROR_CONNECTION_INVALID: return (ISC_R_NOTCONNECTED); case ERROR_HOST_UNREACHABLE: + case WSAEHOSTUNREACH: return (ISC_R_HOSTUNREACH); case ERROR_NETWORK_UNREACHABLE: + case WSAENETUNREACH: return (ISC_R_NETUNREACH); case ERROR_NO_NETWORK: return (ISC_R_NETUNREACH); - case ERROR_OPERATION_ABORTED: - return (ISC_R_CONNECTIONRESET); case ERROR_PORT_UNREACHABLE: return (ISC_R_HOSTUNREACH); + case WSAECONNRESET: + case WSAENETRESET: + case WSAECONNABORTED: + case WSAEDISCON: + case ERROR_OPERATION_ABORTED: + case ERROR_CONNECTION_ABORTED: case ERROR_REQUEST_ABORTED: return (ISC_R_CONNECTIONRESET); case WSAEADDRNOTAVAIL: return (ISC_R_ADDRNOTAVAIL); - case WSAEHOSTUNREACH: - return (ISC_R_HOSTUNREACH); + case ERROR_NETNAME_DELETED: + case WSAENETDOWN: + return (ISC_R_NETUNREACH); case WSAEHOSTDOWN: return (ISC_R_HOSTUNREACH); - case WSAENETUNREACH: - return (ISC_R_NETUNREACH); case WSAENOBUFS: return (ISC_R_NORESOURCES); default: diff --git a/lib/isc/win32/include/isc/mutex.h b/lib/isc/win32/include/isc/mutex.h index bae0ed38e7b..86830b66c10 100644 --- a/lib/isc/win32/include/isc/mutex.h +++ b/lib/isc/win32/include/isc/mutex.h @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: mutex.h,v 1.17 2004/03/05 05:12:05 marka Exp $ */ +/* $Id: mutex.h,v 1.17.956.1 2008/09/04 05:47:09 each Exp $ */ #ifndef ISC_MUTEX_H #define ISC_MUTEX_H 1 @@ -27,10 +27,14 @@ typedef CRITICAL_SECTION isc_mutex_t; -/* This definition is here since WINBASE.H omits it for some reason */ - +/* + * This definition is here since somve versions of WINBASE.H + * omits it for some reason + */ +#if(_WIN32_WINNT < 0x0400) WINBASEAPI BOOL WINAPI TryEnterCriticalSection(LPCRITICAL_SECTION lpCriticalSection); +#endif /* _WIN32_WINNT < 0x0400 */ #define isc_mutex_init(mp) \ (InitializeCriticalSection((mp)), ISC_R_SUCCESS) @@ -46,6 +50,6 @@ TryEnterCriticalSection(LPCRITICAL_SECTION lpCriticalSection); /* * This is a placeholder for now since we are not keeping any mutex stats */ -#define isc_mutex_stats(fp) +#define isc_mutex_stats(fp) do {} while (0) #endif /* ISC_MUTEX_H */ diff --git a/lib/isc/win32/socket.c b/lib/isc/win32/socket.c index fe368ec6ff8..cb7022f8a59 100644 --- a/lib/isc/win32/socket.c +++ b/lib/isc/win32/socket.c @@ -15,37 +15,19 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: socket.c,v 1.30.18.20.12.5.6.1 2008/08/21 00:50:09 each Exp $ */ +/* $Id: socket.c,v 1.30.18.20.12.5.6.2 2008/09/04 05:47:09 each Exp $ */ -/* This code has been rewritten to take advantage of Windows Sockets - * I/O Completion Ports and Events. I/O Completion Ports is ONLY - * available on Windows NT, Windows 2000 and Windows XP series of - * the Windows Operating Systems. In CANNOT run on Windows 95, Windows 98 - * or the follow-ons to those Systems. +/* This code uses functions which are only available on Server 2003 and + * higher, and Windows XP and higher. * * This code is by nature multithreaded and takes advantage of various * features to pass on information through the completion port for - * when I/O is completed. All sends and receives are completed through - * the completion port. Due to an implementation bug in Windows 2000, - * Service Pack 2 must installed on the system for this code to run correctly. - * For details on this problem see Knowledge base article Q263823. - * The code checks for this. The number of Completion Port Worker threads - * used is the total number of CPU's + 1. This increases the likelihood that - * a Worker Thread is available for processing a completed request. + * when I/O is completed. All sends, receives, accepts, and connects are + * completed through the completion port. * - * All accepts and connects are accomplished through the WSAEventSelect() - * function and the event_wait loop. Events are added to and deleted from - * each event_wait thread via a common event_update stack owned by the socket - * manager. If the event_wait thread runs out of array space in the events - * array it will look for another event_wait thread to add the event. If it - * fails to find another one it will create a new thread to handle the - * outstanding event. - * - * A future enhancement is to use AcceptEx to take avantage of Overlapped - * I/O which allows for enhanced performance of TCP connections. - * This will also reduce the number of events that are waited on by the - * event_wait threads to just the connect sockets and reduce the number - * additional threads required. + * The number of Completion Port Worker threads used is the total number + * of CPU's + 1. This increases the likelihood that a Worker Thread is + * available for processing a completed request. * * XXXPDM 5 August, 2002 */ @@ -90,17 +72,38 @@ #include #include +#include + #include "errno2result.h" +/* + * How in the world can Microsoft exist with APIs like this? + * We can't actually call this directly, because it turns out + * no library exports this function. Instead, we need to + * issue a runtime call to get the address. + */ +LPFN_CONNECTEX ISCConnectEx; +LPFN_ACCEPTEX ISCAcceptEx; +LPFN_GETACCEPTEXSOCKADDRS ISCGetAcceptExSockaddrs; + /* * 0 = no debugging, 1 = write to file "socket.log" in working directory. */ #define XXXMLG_DEBUG 0 - #if XXXMLG_DEBUG FILE *logfile = NULL; #endif +/* + * Run expensive internal consistancy checks. + */ +#ifdef ISC_SOCKET_CONSISTENCY_CHECKS +#define CONSISTENT(sock) consistent(sock) +#else +#define CONSISTENT(sock) do {} while (0) +#endif +static void consistent(isc_socket_t *sock); + /* * Define this macro to control the behavior of connection * resets on UDP sockets. See Microsoft KnowledgeBase Article Q263823 @@ -142,6 +145,7 @@ FILE *logfile = NULL; #define DOIO_HARD 2 /* i/o error, event sent */ #define DOIO_EOF 3 /* EOF, no event sent */ #define DOIO_PENDING 4 /* status when i/o is in process */ +#define DOIO_NEEDMORE 5 /* IO was processed, but we need more due to minimum */ #define DLVL(x) ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_DEBUG(x) @@ -166,6 +170,19 @@ FILE *logfile = NULL; typedef isc_event_t intev_t; +/* + * Socket State + */ +enum { + SOCK_INITIALIZED, /* Socket Initialized */ + SOCK_OPEN, /* Socket opened but nothing yet to do */ + SOCK_DATA, /* Socket sending or receiving data */ + SOCK_LISTEN, /* TCP Socket listening for connects */ + SOCK_ACCEPT, /* TCP socket is waiting to accept */ + SOCK_CONNECT, /* TCP Socket connecting */ + SOCK_CLOSED, /* Socket has been closed */ +}; + #define SOCKET_MAGIC ISC_MAGIC('I', 'O', 'i', 'o') #define VALID_SOCKET(t) ISC_MAGIC_VALID(t, SOCKET_MAGIC) @@ -190,16 +207,13 @@ typedef isc_event_t intev_t; * Message header for recvmsg and sendmsg calls. * Used value-result for recvmsg, value only for sendmsg. */ - - struct msghdr { - void *msg_name; /* optional address */ - u_int msg_namelen; /* size of address */ + SOCKADDR to_addr; /* UDP send/recv address */ + int to_addr_len; /* length of the address */ WSABUF *msg_iov; /* scatter/gather array */ u_int msg_iovlen; /* # elements in msg_iov */ void *msg_control; /* ancillary data, see below */ u_int msg_controllen; /* ancillary data buffer len */ - int msg_flags; /* flags on received message */ int msg_totallen; /* total length of this message */ } msghdr; @@ -220,45 +234,63 @@ struct isc_socket { isc_socketmgr_t *manager; isc_mutex_t lock; isc_sockettype_t type; - OVERLAPPED overlapped; + /* Pointers to scatter/gather buffers */ WSABUF iov[ISC_SOCKET_MAXSCATTERGATHER]; - WSAEVENT hEvent; /* Event Handle */ - long wait_type; /* Events to wait on */ - WSAEVENT hAlert; /* Alert Event Handle */ - DWORD evthread_id; /* Event Thread Id for socket */ /* Locked by socket lock. */ ISC_LINK(isc_socket_t) link; - unsigned int references; - SOCKET fd; - int pf; + unsigned int references; /* EXTERNAL references */ + SOCKET fd; /* file handle */ + int pf; /* protocol family */ + + /* + * Each recv() call uses this buffer. It is a per-socket receive + * buffer that allows us to decouple the system recv() from the + * recv_list done events. This means the items on the recv_list + * can be removed without having to cancel pending system recv() + * calls. It also allows us to read-ahead in some cases. + */ + struct { + SOCKADDR from_addr; // UDP send/recv address + int from_addr_len; // length of the address + char *base; // the base of the buffer + char *consume_position; // where to start copying data from next + unsigned int len; // the actual size of this buffer + unsigned int remaining; // the number of bytes remaining + } recvbuf; ISC_LIST(isc_socketevent_t) send_list; ISC_LIST(isc_socketevent_t) recv_list; ISC_LIST(isc_socket_newconnev_t) accept_list; isc_socket_connev_t *connect_ev; - /* - * Internal events. Posted when a descriptor is readable or - * writable. These are statically allocated and never freed. - * They will be set to non-purgable before use. - */ - intev_t readable_ev; - intev_t writable_ev; - isc_sockaddr_t address; /* remote address */ - unsigned int pending_close : 1, - pending_accept : 1, - iocp : 1, /* I/O Completion Port */ - listener : 1, /* listener socket */ + unsigned int listener : 1, /* listener socket */ connected : 1, - connecting : 1, /* connect pending */ - bound : 1, /* bound to local addr */ - pending_free: 1; - unsigned int pending_recv; - unsigned int pending_send; + pending_connect : 1, /* connect pending */ + bound : 1; /* bound to local addr */ + + unsigned int pending_iocp; /* Should equal the counters below. Debug. */ + unsigned int pending_recv; /* Number of outstanding recv() calls. */ + unsigned int pending_send; /* Number of outstanding send() calls. */ + unsigned int pending_accept; /* Number of outstanding accept() calls. */ + unsigned int state; /* Socket state. Debugging and consistency checking. */ + int state_lineno; /* line which last touched state */ +}; + +#define _set_state(sock, _state) do { (sock)->state = (_state); (sock)->state_lineno = __LINE__; } while (0) + +/* + * Buffer structure + */ +typedef struct buflist buflist_t; + +struct buflist { + void *buf; + unsigned int buflen; + ISC_LINK(buflist_t) link; }; /* @@ -267,10 +299,15 @@ struct isc_socket { static HANDLE hHeapHandle = NULL; typedef struct IoCompletionInfo { - OVERLAPPED overlapped; - isc_socketevent_t *dev; - int request_type; - struct msghdr messagehdr; + OVERLAPPED overlapped; + isc_socketevent_t *dev; /* send()/recv() done event */ + isc_socket_connev_t *cdev; /* connect() done event */ + isc_socket_newconnev_t *adev; /* accept() done event */ + void *acceptbuffer; + DWORD received_bytes; + int request_type; + struct msghdr messagehdr; + ISC_LIST(buflist_t) bufferlist; /*%< list of buffers */ } IoCompletionInfo; /* @@ -280,52 +317,6 @@ typedef struct IoCompletionInfo { */ #define MAX_IOCPTHREADS 20 -/* - * event_change structure to handle adds and deletes from the list of - * events in the Wait - */ -typedef struct event_change event_change_t; - -struct event_change { - isc_socket_t *sock; - WSAEVENT hEvent; - DWORD evthread_id; - SOCKET fd; - unsigned int action; - ISC_LINK(event_change_t) link; -}; - -/* - * Note: We are using an array here since *WaitForMultiple* wants an array - * WARNING: This value may not be greater than 64 since the - * WSAWaitForMultipleEvents function is limited to 64 events. - */ - -#define MAX_EVENTS 64 - -/* - * List of events being waited on and their associated sockets - */ -typedef struct sock_event_list { - int max_event; - int total_events; - isc_socket_t *aSockList[MAX_EVENTS]; - WSAEVENT aEventList[MAX_EVENTS]; -} sock_event_list; - -/* - * Thread Event structure for managing the threads handling events - */ -typedef struct events_thread events_thread_t; - -struct events_thread { - isc_thread_t thread_handle; /* Thread's handle */ - DWORD thread_id; /* Thread's id */ - sock_event_list sockev_list; - isc_socketmgr_t *manager; - ISC_LINK(events_thread_t) link; -}; - #define SOCKET_MANAGER_MAGIC ISC_MAGIC('I', 'O', 'm', 'g') #define VALID_MANAGER(m) ISC_MAGIC_VALID(m, SOCKET_MANAGER_MAGIC) @@ -335,21 +326,27 @@ struct isc_socketmgr { isc_mem_t *mctx; isc_mutex_t lock; /* Locked by manager lock. */ - ISC_LIST(event_change_t) event_updates; ISC_LIST(isc_socket_t) socklist; - int event_written; - WSAEVENT prime_alert; isc_boolean_t bShutdown; - ISC_LIST(events_thread_t) ev_threads; isc_condition_t shutdown_ok; HANDLE hIoCompletionPort; int maxIOCPThreads; HANDLE hIOCPThreads[MAX_IOCPTHREADS]; DWORD dwIOCPThreadIds[MAX_IOCPTHREADS]; - unsigned int totalHandles; - unsigned int totalSockets; - unsigned int totalHandleRequests; - unsigned int iocp_total; + + /* + * Debugging. + * Modified by InterlockedIncrement() and InterlockedDecrement() + */ + LONG totalSockets; + LONG iocp_total; +}; + +enum { + SOCKET_RECV, + SOCKET_SEND, + SOCKET_ACCEPT, + SOCKET_CONNECT }; /* @@ -358,21 +355,20 @@ struct isc_socketmgr { #define MAXSCATTERGATHER_SEND (ISC_SOCKET_MAXSCATTERGATHER) #define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER) -static isc_threadresult_t WINAPI event_wait(void *uap); static isc_threadresult_t WINAPI SocketIoThread(LPVOID ThreadContext); -static void free_socket(isc_socket_t **); - -enum { - SOCKET_RECV, - SOCKET_SEND, -}; - -enum { - EVENT_ADD, - EVENT_DELETE -}; +static void maybe_free_socket(isc_socket_t **, int); +static void free_socket(isc_socket_t **, int); +static isc_boolean_t senddone_is_active(isc_socket_t *sock, isc_socketevent_t *dev); +static isc_boolean_t acceptdone_is_active(isc_socket_t *sock, isc_socket_newconnev_t *dev); +static isc_boolean_t connectdone_is_active(isc_socket_t *sock, isc_socket_connev_t *dev); +static void send_recvdone_event(isc_socket_t *sock, isc_socketevent_t **dev); +static void send_senddone_event(isc_socket_t *sock, isc_socketevent_t **dev); +static void send_acceptdone_event(isc_socket_t *sock, isc_socket_newconnev_t **adev); +static void send_connectdone_event(isc_socket_t *sock, isc_socket_connev_t **cdev); +static void send_recvdone_abort(isc_socket_t *sock, isc_result_t result); +static void queue_receive_event(isc_socket_t *sock, isc_task_t *task, isc_socketevent_t *dev); +static void queue_receive_request(isc_socket_t *sock); -#if defined(ISC_SOCKET_DEBUG) /* * This is used to dump the contents of the sock structure * You should make sure that the sock is locked before @@ -383,26 +379,27 @@ void sock_dump(isc_socket_t *sock) { isc_socketevent_t *ldev; isc_socket_newconnev_t *ndev; + +#if 0 isc_sockaddr_t addr; char socktext[256]; - isc_socket_getpeername(sock, &addr); isc_sockaddr_format(&addr, socktext, sizeof(socktext)); printf("Remote Socket: %s\n", socktext); isc_socket_getsockname(sock, &addr); isc_sockaddr_format(&addr, socktext, sizeof(socktext)); printf("This Socket: %s\n", socktext); +#endif printf("\n\t\tSock Dump\n"); printf("\t\tfd: %u\n", sock->fd); printf("\t\treferences: %d\n", sock->references); printf("\t\tpending_accept: %d\n", sock->pending_accept); - printf("\t\tpending_close: %d\n", sock->pending_close); - printf("\t\tconnecting: %d\n", sock->connecting); + printf("\t\tconnecting: %d\n", sock->pending_connect); printf("\t\tconnected: %d\n", sock->connected); printf("\t\tbound: %d\n", sock->bound); - printf("\t\tiocp: %d\n", sock->iocp); + printf("\t\tpending_iocp: %d\n", sock->pending_iocp); printf("\t\tsocket type: %d\n", sock->type); printf("\n\t\tSock Recv List\n"); @@ -411,12 +408,14 @@ sock_dump(isc_socket_t *sock) { printf("\t\tdev: %p\n", ldev); ldev = ISC_LIST_NEXT(ldev, ev_link); } + printf("\n\t\tSock Send List\n"); ldev = ISC_LIST_HEAD(sock->send_list); while (ldev != NULL) { printf("\t\tdev: %p\n", ldev); ldev = ISC_LIST_NEXT(ldev, ev_link); } + printf("\n\t\tSock Accept List\n"); ndev = ISC_LIST_HEAD(sock->accept_list); while (ndev != NULL) { @@ -424,7 +423,6 @@ sock_dump(isc_socket_t *sock) { ndev = ISC_LIST_NEXT(ndev, ev_link); } } -#endif static void socket_log(int lineno, isc_socket_t *sock, isc_sockaddr_t *address, @@ -474,7 +472,7 @@ iocompletionport_createthreads(int total_threads, isc_socketmgr_t *manager) { manager->hIOCPThreads[i] = CreateThread(NULL, 0, SocketIoThread, manager, 0, &manager->dwIOCPThreadIds[i]); - if(manager->hIOCPThreads[i] == NULL) { + if (manager->hIOCPThreads[i] == NULL) { errval = GetLastError(); isc__strerror(errval, strbuf, sizeof(strbuf)); FATAL_ERROR(__FILE__, __LINE__, @@ -482,6 +480,7 @@ iocompletionport_createthreads(int total_threads, isc_socketmgr_t *manager) { ISC_MSG_FAILED, "Can't create IOCP thread: %s"), strbuf); + exit(1); } } } @@ -499,14 +498,25 @@ iocompletionport_init(isc_socketmgr_t *manager) { * Create a private heap to handle the socket overlapped structure * The miniumum number of structures is 10, there is no maximum */ - hHeapHandle = HeapCreate(0, 10*sizeof(IoCompletionInfo), 0); - manager->maxIOCPThreads = min(isc_os_ncpus() + 1, - MAX_IOCPTHREADS); + hHeapHandle = HeapCreate(0, 10 * sizeof(IoCompletionInfo), 0); + if (hHeapHandle == NULL) { + errval = GetLastError(); + isc__strerror(errval, strbuf, sizeof(strbuf)); + FATAL_ERROR(__FILE__, __LINE__, + isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET, + ISC_MSG_FAILED, + "HeapCreate() failed during " + "initialization: %s"), + strbuf); + exit(1); + } + + manager->maxIOCPThreads = min(isc_os_ncpus() + 1, MAX_IOCPTHREADS); /* Now Create the Completion Port */ manager->hIoCompletionPort = CreateIoCompletionPort( - INVALID_HANDLE_VALUE, NULL, - 0, manager->maxIOCPThreads); + INVALID_HANDLE_VALUE, NULL, + 0, manager->maxIOCPThreads); if (manager->hIoCompletionPort == NULL) { errval = GetLastError(); isc__strerror(errval, strbuf, sizeof(strbuf)); @@ -532,571 +542,257 @@ iocompletionport_init(isc_socketmgr_t *manager) { void iocompletionport_update(isc_socket_t *sock) { HANDLE hiocp; + char strbuf[ISC_STRERRORSIZE]; + REQUIRE(VALID_SOCKET(sock)); - INSIST(sock->iocp == 0); - sock->iocp = 1; hiocp = CreateIoCompletionPort((HANDLE)sock->fd, - sock->manager->hIoCompletionPort, (DWORD)sock, - sock->manager->maxIOCPThreads); - InterlockedIncrement(&sock->manager->iocp_total); -} - -isc_result_t -socket_event_minit(sock_event_list *evlist) { - BOOL bReset; - int i; - int stat; - WSAEVENT hEvent; - char strbuf[ISC_STRERRORSIZE]; - - REQUIRE(evlist != NULL); - /* Initialize the Event List */ - evlist->max_event = 0; - evlist->total_events = 0; - for (i = 0; i < MAX_EVENTS; i++) { - evlist->aSockList[i] = NULL; - evlist->aEventList[i] = (WSAEVENT) 0; - } + sock->manager->hIoCompletionPort, (ULONG_PTR)sock, 0); - /* - * The event list needs its own event handle so that when we - * want to change the list the event loop can be notified. - */ - hEvent = WSACreateEvent(); - if (hEvent == WSA_INVALID_EVENT) { - stat = WSAGetLastError(); - isc__strerror(stat, strbuf, sizeof(strbuf)); + if (hiocp == NULL) { + DWORD errval = GetLastError(); + isc__strerror(errval, strbuf, sizeof(strbuf)); isc_log_iwrite(isc_lctx, ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_TOOMANYHANDLES, - "%s: too many open WSA event handles: %s", - "WSACreateEvent", strbuf); - return (ISC_R_UNEXPECTED); + "iocompletionport_update: failed to open" + " io completion port: %s", + strbuf); + + /* XXXMLG temporary hack to make failures detected. + * This function should return errors to the caller, not + * exit here. + */ + FATAL_ERROR(__FILE__, __LINE__, + isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET, + ISC_MSG_FAILED, + "CreateIoCompletionPort() failed " + "during initialization: %s"), + strbuf); + exit(1); } - evlist->aEventList[0] = hEvent; - (evlist->max_event)++; - bReset = WSAResetEvent(evlist->aEventList[0]); - return (ISC_R_SUCCESS); + InterlockedIncrement(&sock->manager->iocp_total); } + /* - * Event Thread Initialization + * Routine to cleanup and then close the socket. + * Only close the socket here if it is NOT associated + * with an event, otherwise the WSAWaitForMultipleEvents + * may fail due to the fact that the the Wait should not + * be running while closing an event or a socket. + * The socket is locked before calling this function */ -isc_result_t -event_thread_create(events_thread_t **evthreadp, isc_socketmgr_t *manager) { - events_thread_t *evthread; +void +socket_close(isc_socket_t *sock) { - REQUIRE(VALID_MANAGER(manager)); - REQUIRE(evthreadp != NULL && *evthreadp == NULL); + REQUIRE(sock != NULL); - evthread = isc_mem_get(manager->mctx, sizeof(*evthread)); - if (socket_event_minit(&evthread->sockev_list) != ISC_R_SUCCESS) { - isc_mem_put(manager->mctx, evthread, sizeof(*evthread)); - return (ISC_R_UNEXPECTED); + if (sock->fd != INVALID_SOCKET) { + closesocket(sock->fd); + sock->fd = INVALID_SOCKET; + _set_state(sock, SOCK_CLOSED); + InterlockedDecrement(&sock->manager->totalSockets); } - ISC_LINK_INIT(evthread, link); - evthread->manager = manager; +} - ISC_LIST_APPEND(manager->ev_threads, evthread, link); +static isc_once_t initialise_once = ISC_ONCE_INIT; +static isc_boolean_t initialised = ISC_FALSE; - /* - * Start up the event wait thread. - */ - if (isc_thread_create(event_wait, evthread, &evthread->thread_handle) != - ISC_R_SUCCESS) { - isc_mem_put(manager->mctx, evthread, sizeof(*evthread)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "isc_thread_create() %s", - isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, - ISC_MSG_FAILED, "failed")); - return (ISC_R_UNEXPECTED); - } - *evthreadp = evthread; - return (ISC_R_SUCCESS); -} -/* - * Locate a thread with space for additional events or create one if - * necessary. The manager is locked at this point so the information - * cannot be changed by another thread while we are searching. - */ -void -locate_available_thread(isc_socketmgr_t *manager) { - events_thread_t *evthread; - DWORD threadid = GetCurrentThreadId(); +static void +initialise(void) { + WORD wVersionRequested; + WSADATA wsaData; + int err; + SOCKET sock; + GUID GUIDConnectEx = WSAID_CONNECTEX; + GUID GUIDAcceptEx = WSAID_ACCEPTEX; + GUID GUIDGetAcceptExSockaddrs = WSAID_GETACCEPTEXSOCKADDRS; + DWORD dwBytes; - evthread = ISC_LIST_HEAD(manager->ev_threads); - while (evthread != NULL) { - /* - * We need to find a thread with space to add an event - * If we find it, alert it to process the event change - * list - */ - if(threadid != evthread->thread_id && - evthread->sockev_list.max_event < MAX_EVENTS) { - WSASetEvent(evthread->sockev_list.aEventList[0]); - return; - } - evthread = ISC_LIST_NEXT(evthread, link); + /* Need Winsock 2.2 or better */ + wVersionRequested = MAKEWORD(2, 2); + + err = WSAStartup(wVersionRequested, &wsaData); + if (err != 0) { + char strbuf[ISC_STRERRORSIZE]; + isc__strerror(err, strbuf, sizeof(strbuf)); + FATAL_ERROR(__FILE__, __LINE__, "WSAStartup() %s: %s", + isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, + ISC_MSG_FAILED, "failed"), + strbuf); + exit(1); } /* - * We need to create a new thread as other threads are full. - * If we succeed in creating the thread, alert it to - * process the event change list since it will have space. - * If we are unable to create one, the event will stay on the - * list and the next event_wait thread will try again to add - * the event. It will call here again if it has no space. + * The following APIs do not exist as functions in a library, but we must + * ask winsock for them. They are "extensions" -- but why they cannot be + * actual functions is beyond me. So, ask winsock for the pointers to the + * functions we need. */ - if (event_thread_create(&evthread, manager) == ISC_R_SUCCESS) { - WSASetEvent(evthread->sockev_list.aEventList[0]); - } + sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + INSIST(sock != INVALID_SOCKET); + err = WSAIoctl(sock, SIO_GET_EXTENSION_FUNCTION_POINTER, + &GUIDConnectEx, sizeof(GUIDConnectEx), + &ISCConnectEx, sizeof(ISCConnectEx), + &dwBytes, NULL, NULL); + INSIST(err == 0); -} + err = WSAIoctl(sock, SIO_GET_EXTENSION_FUNCTION_POINTER, + &GUIDAcceptEx, sizeof(GUIDAcceptEx), + &ISCAcceptEx, sizeof(ISCAcceptEx), + &dwBytes, NULL, NULL); + INSIST(err == 0); -isc_boolean_t -socket_eventlist_add(event_change_t *evchange, sock_event_list *evlist, - isc_socketmgr_t *manager) { - int max_event; - isc_socket_t *sock; - REQUIRE(evchange != NULL); + err = WSAIoctl(sock, SIO_GET_EXTENSION_FUNCTION_POINTER, + &GUIDGetAcceptExSockaddrs, sizeof(GUIDGetAcceptExSockaddrs), + &ISCGetAcceptExSockaddrs, sizeof(ISCGetAcceptExSockaddrs), + &dwBytes, NULL, NULL); + INSIST(err == 0); - sock = evchange->sock; - REQUIRE(sock != NULL); - REQUIRE(sock->hEvent != NULL); - REQUIRE(evlist != NULL); + closesocket(sock); - max_event = evlist->max_event; - if(max_event >= MAX_EVENTS) { - locate_available_thread(manager); - return (ISC_FALSE); - } - /* - * Lock the socket before updating - */ - LOCK(&sock->lock); - evlist->aSockList[max_event] = sock; - evlist->aEventList[max_event] = sock->hEvent; - evlist->max_event++; - evlist->total_events++; - sock->hAlert = evlist->aEventList[0]; - sock->evthread_id = GetCurrentThreadId(); - UNLOCK(&sock->lock); - return (ISC_TRUE); + initialised = ISC_TRUE; } /* - * Delete the event from the list + * Initialize socket services */ -isc_boolean_t -eventlist_event_delete(isc_socket_t *sock, sock_event_list *evlist, - isc_socketmgr_t *manager) +void +InitSockets(void) { + RUNTIME_CHECK(isc_once_do(&initialise_once, + initialise) == ISC_R_SUCCESS); + if (!initialised) + exit(1); +} + +int +internal_sendmsg(isc_socket_t *sock, IoCompletionInfo *lpo, + struct msghdr *messagehdr, int flags, int *Error) { - int i; - WSAEVENT hEvent; - int iEvent = -1; - isc_boolean_t dofree = ISC_FALSE; + int Result; + DWORD BytesSent; + DWORD Flags = flags; + int total_sent; - REQUIRE(sock != NULL); - REQUIRE(evlist != NULL); - REQUIRE(manager != NULL); - REQUIRE(sock->hEvent != NULL); - hEvent = sock->hEvent; - - /* Find the Event */ - for (i = 1; i < evlist->max_event; i++) { - if (evlist->aEventList[i] == hEvent) { - iEvent = i; + *Error = 0; + Result = WSASendTo(sock->fd, messagehdr->msg_iov, + messagehdr->msg_iovlen, &BytesSent, + Flags, &messagehdr->to_addr, + messagehdr->to_addr_len, (LPWSAOVERLAPPED)lpo, + NULL); + + total_sent = (int)BytesSent; + + /* Check for errors.*/ + if (Result == SOCKET_ERROR) { + *Error = WSAGetLastError(); + + switch (*Error) { + case WSA_IO_INCOMPLETE: + case WSA_WAIT_IO_COMPLETION: + case WSA_IO_PENDING: + case NO_ERROR: /* Strange, but okay */ + sock->pending_iocp++; + sock->pending_send++; + break; + + default: + return (-1); break; } + } else { + sock->pending_iocp++; + sock->pending_send++; } + if (lpo != NULL) + return (0); + else + return (total_sent); +} + +static void +queue_receive_request(isc_socket_t *sock) { + DWORD Flags = 0; + DWORD NumBytes = 0; + int total_bytes = 0; + int Result; + int Error; + WSABUF iov[1]; + IoCompletionInfo *lpo; + isc_result_t isc_result; + /* - * Actual event start at 1 - * event at 0 is the thread wakeup + * If we already have a receive pending, do nothing. */ - if (iEvent < 1) - return (ISC_FALSE); + if (sock->pending_recv > 0) + return; - for(i = iEvent; i < (evlist->max_event - 1); i++) { - evlist->aEventList[i] = evlist->aEventList[i + 1]; - evlist->aSockList[i] = evlist->aSockList[i + 1]; - } + /* + * If no one is waiting, do nothing. + */ + if (ISC_LIST_EMPTY(sock->recv_list)) + return; - evlist->aEventList[evlist->max_event - 1] = 0; - evlist->aSockList[evlist->max_event - 1] = NULL; + INSIST(sock->recvbuf.remaining == 0); + INSIST(sock->fd != INVALID_SOCKET); - /* Cleanup */ - WSAEventSelect(sock->fd, hEvent, 0); - WSACloseEvent(hEvent); - InterlockedDecrement(&sock->manager->totalHandles); + iov[0].len = sock->recvbuf.len; + iov[0].buf = sock->recvbuf.base; - LOCK(&sock->lock); - sock->hEvent = NULL; - sock->hAlert = NULL; - sock->wait_type = 0; + lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle, + HEAP_ZERO_MEMORY, + sizeof(IoCompletionInfo)); + RUNTIME_CHECK(lpo != NULL); + lpo->request_type = SOCKET_RECV; - if (sock->pending_close) { - sock->pending_close = 0; - closesocket(sock->fd); - sock->fd = INVALID_SOCKET; - InterlockedDecrement(&sock->manager->totalSockets); - } + sock->recvbuf.from_addr_len = sizeof(sock->recvbuf.from_addr); - UNLOCK(&sock->lock); - evlist->max_event--; - evlist->total_events--; + Error = 0; + Result = WSARecvFrom((SOCKET)sock->fd, iov, 1, + &NumBytes, &Flags, + &sock->recvbuf.from_addr, + &sock->recvbuf.from_addr_len, + (LPWSAOVERLAPPED)lpo, NULL); - return (ISC_TRUE); -} + /* Check for errors. */ + if (Result == SOCKET_ERROR) { + Error = WSAGetLastError(); -/* - * Note that the eventLock is locked before calling this function. - */ -isc_boolean_t -socket_eventlist_delete(event_change_t *evchange, sock_event_list *evlist, - isc_socketmgr_t *manager) -{ + switch (Error) { + case WSA_IO_PENDING: + sock->pending_iocp++; + sock->pending_recv++; + break; - REQUIRE(evchange != NULL); - /* Make sure this is the right thread from which to delete the event */ - if (evchange->evthread_id != GetCurrentThreadId()) - return (ISC_FALSE); + default: + isc_result = isc__errno2result(Result); + if (isc_result == ISC_R_UNEXPECTED) + UNEXPECTED_ERROR(__FILE__, __LINE__, + "WSARecvFrom: Windows error code: %d, isc result %d", + Error, isc_result); + send_recvdone_abort(sock, isc_result); + break; + } + } else { + /* + * The recv() finished immediately, but we will still get + * a completion event. Rather than duplicate code, let + * that thread handle sending the data along its way. + */ + sock->pending_iocp++; + sock->pending_recv++; + } + + socket_log(__LINE__, sock, NULL, IOEVENT, + isc_msgcat, ISC_MSGSET_SOCKET, + ISC_MSG_DOIORECV, + "queue_io_request: fd %d result %d error %d", + sock->fd, Result, Error); - return (eventlist_event_delete(evchange->sock, evlist, manager)); -} -/* - * Get the event changes off of the list and apply the - * requested changes. The manager lock is taken out at - * the start of this function to prevent other event_wait - * threads processing the same information at the same - * time. The queue may not be empty on exit since other - * threads may be involved in processing the queue. - * - * The deletes are done first in order that there be space - * available for the events being added in the same thread - * in case the event list is almost full. This reduces the - * probability of having to create another thread which would - * increase overhead costs. - */ -isc_result_t -process_eventlist(sock_event_list *evlist, isc_socketmgr_t *manager) { - event_change_t *evchange; - event_change_t *next; - isc_boolean_t del; - - REQUIRE(evlist != NULL); - - LOCK(&manager->lock); - - /* - * First the deletes. - */ - evchange = ISC_LIST_HEAD(manager->event_updates); - while (evchange != NULL) { - next = ISC_LIST_NEXT(evchange, link); - del = ISC_FALSE; - if (evchange->action == EVENT_DELETE) { - del = socket_eventlist_delete(evchange, evlist, - manager); - - /* - * Delete only if this thread's socket list was - * updated. - */ - if (del) { - ISC_LIST_DEQUEUE(manager->event_updates, - evchange, link); - HeapFree(hHeapHandle, 0, evchange); - manager->event_written--; - } - } - evchange = next; - } - - /* - * Now the adds. - */ - evchange = ISC_LIST_HEAD(manager->event_updates); - while (evchange != NULL) { - next = ISC_LIST_NEXT(evchange, link); - del = ISC_FALSE; - if (evchange->action == EVENT_ADD) { - del = socket_eventlist_add(evchange, evlist, manager); - - /* - * Delete only if this thread's socket list was - * updated. - */ - if (del) { - ISC_LIST_DEQUEUE(manager->event_updates, - evchange, link); - HeapFree(hHeapHandle, 0, evchange); - manager->event_written--; - } - } - evchange = next; - } - UNLOCK(&manager->lock); - return (ISC_R_SUCCESS); -} - -/* - * Add the event list changes to the queue and notify the - * event loop - */ -static void -notify_eventlist(isc_socket_t *sock, isc_socketmgr_t *manager, - unsigned int action) -{ - - event_change_t *evchange; - - REQUIRE(VALID_MANAGER(manager)); - REQUIRE(sock != NULL); - - evchange = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, - sizeof(event_change_t)); - evchange->sock = sock; - evchange->action = action; - evchange->hEvent = sock->hEvent; - evchange->fd = sock->fd; - evchange->evthread_id = sock->evthread_id; - - LOCK(&manager->lock); - ISC_LIST_APPEND(manager->event_updates, evchange, link); - sock->manager->event_written++; - UNLOCK(&manager->lock); - - /* Alert the Wait List */ - if (sock->hAlert != NULL) - WSASetEvent(sock->hAlert); - else - WSASetEvent(manager->prime_alert); -} - -/* - * Note that the socket is already locked before calling this function - */ -isc_result_t -socket_event_add(isc_socket_t *sock, long type) { - int stat; - WSAEVENT hEvent; - char strbuf[ISC_STRERRORSIZE]; - const char *msg; - - REQUIRE(sock != NULL); - - hEvent = WSACreateEvent(); - if (hEvent == WSA_INVALID_EVENT) { - stat = WSAGetLastError(); - isc__strerror(stat, strbuf, sizeof(strbuf)); - isc_log_iwrite(isc_lctx, - ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, - isc_msgcat, ISC_MSGSET_SOCKET, - ISC_MSG_TOOMANYHANDLES, - "%s: too many open WSA event handles: %s", - "WSACreateEvent", strbuf); - return (ISC_R_UNEXPECTED); - } - if (WSAEventSelect(sock->fd, hEvent, type) != 0) { - stat = WSAGetLastError(); - isc__strerror(stat, strbuf, sizeof(strbuf)); - msg = isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, - ISC_MSG_FAILED, "failed"); - UNEXPECTED_ERROR(__FILE__, __LINE__, "WSAEventSelect: %s: %s", - msg, strbuf); - WSACloseEvent(hEvent); - return (ISC_R_UNEXPECTED); - } - sock->hEvent = hEvent; - InterlockedIncrement(&sock->manager->totalHandles); - InterlockedIncrement(&sock->manager->totalHandleRequests); - - sock->wait_type = type; - notify_eventlist(sock, sock->manager, EVENT_ADD); - return (ISC_R_SUCCESS); -} - -/* - * Note that the socket is locked before calling this function - * Note also that we cannot close the socket here or event handle being - * used since the event is being waited upon and any change to either - * will signal the change. The notify_eventlist will take care of - * these details. - */ -void -socket_event_delete(isc_socket_t *sock) { - - REQUIRE(sock != NULL); - REQUIRE(sock->hEvent != NULL); - - sock->wait_type = 0; - sock->pending_close = 1; - notify_eventlist(sock, sock->manager, EVENT_DELETE); - sock->evthread_id = 0; -} - -/* - * Routine to cleanup and then close the socket. - * Only close the socket here if it is NOT associated - * with an event, otherwise the WSAWaitForMultipleEvents - * may fail due to the fact that the the Wait should not - * be running while closing an event or a socket. - * The socket is locked before calling this function - */ -void -socket_close(isc_socket_t *sock) { - - REQUIRE(sock != NULL); - - if (sock->fd != INVALID_SOCKET) { - sock->pending_close = 0; - if (sock->hEvent != NULL) { - socket_event_delete(sock); - } else { - closesocket(sock->fd); - sock->fd = INVALID_SOCKET; - InterlockedDecrement(&sock->manager->totalSockets); - } - } -} - -static isc_once_t initialise_once = ISC_ONCE_INIT; -static isc_boolean_t initialised = ISC_FALSE; - -static void -initialise(void) { - WORD wVersionRequested; - WSADATA wsaData; - int err; - - /* Need Winsock 2.0 or better */ - wVersionRequested = MAKEWORD(2, 0); - - err = WSAStartup(wVersionRequested, &wsaData); - if (err != 0) { - char strbuf[ISC_STRERRORSIZE]; - isc__strerror(err, strbuf, sizeof(strbuf)); - FATAL_ERROR(__FILE__, __LINE__, "WSAStartup() %s: %s", - isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, - ISC_MSG_FAILED, "failed"), - strbuf); - } else - initialised = ISC_TRUE; -} - -/* - * Initialize socket services - */ -void -InitSockets(void) { - RUNTIME_CHECK(isc_once_do(&initialise_once, - initialise) == ISC_R_SUCCESS); - if (!initialised) - exit(1); -} - -int -internal_sendmsg(isc_socket_t *sock, IoCompletionInfo *lpo, - struct msghdr *messagehdr, int flags, int *Error) -{ - int Result; - DWORD BytesSent; - DWORD Flags = flags; - int total_sent; - - *Error = 0; - Result = WSASendTo(sock->fd, messagehdr->msg_iov, - messagehdr->msg_iovlen, &BytesSent, - Flags, messagehdr->msg_name, - messagehdr->msg_namelen, (LPOVERLAPPED) lpo, - NULL); - - total_sent = (int) BytesSent; - - /* Check for errors.*/ - if (Result == SOCKET_ERROR) { - - *Error = WSAGetLastError(); - - switch (*Error) { - case WSA_IO_INCOMPLETE : - case WSA_WAIT_IO_COMPLETION : - case WSA_IO_PENDING : - case NO_ERROR : /* Strange, but okay */ - sock->pending_send++; - break; - - default : - return (-1); - break; - } - } else - sock->pending_send++; - if (lpo != NULL) - return (0); - else - return (total_sent); -} - -int -internal_recvmsg(isc_socket_t *sock, IoCompletionInfo *lpo, - struct msghdr *messagehdr, int flags, int *Error) -{ - DWORD Flags = 0; - DWORD NumBytes = 0; - int total_bytes = 0; - int Result; - - *Error = 0; - Result = WSARecvFrom((SOCKET) sock->fd, - messagehdr->msg_iov, - messagehdr->msg_iovlen, - &NumBytes, - &Flags, - messagehdr->msg_name, - (int *)&(messagehdr->msg_namelen), - (LPOVERLAPPED) lpo, - NULL); - - total_bytes = (int) NumBytes; - - socket_log(__LINE__, sock, NULL, IOEVENT, - isc_msgcat, ISC_MSGSET_SOCKET, - ISC_MSG_DOIORECV, - "internal_recvmsg: fd %d result %d error %d %d bytes", - sock->fd, Result, *Error, total_bytes); - - /* Check for errors. */ - if (Result == SOCKET_ERROR) { - - *Error = WSAGetLastError(); - - switch (*Error) { - case WSA_IO_INCOMPLETE: - case WSA_WAIT_IO_COMPLETION: - case WSA_IO_PENDING: - case NO_ERROR : /* Strange, but okay */ - sock->pending_recv++; - break; - - default : - return (-1); - break; - } - } else { - sock->pending_recv++; - } - - /* Return the flags received in header */ - messagehdr->msg_flags = Flags; - if (lpo != NULL) - return (-1); - else - return (total_bytes); + CONSISTENT(sock); } static void @@ -1150,20 +846,26 @@ socket_log(int lineno, isc_socket_t *sock, isc_sockaddr_t *address, msgcat, msgset, message, "socket %p line %d: %s", sock, lineno, msgbuf); #if XXXMLG_DEBUG - fprintf(logfile, "%s socket %p line %d: %s:\n", timebuf, sock, lineno, msgbuf); + if (logfile) + fprintf(logfile, "%s socket %p line %d: %s:\n", + timebuf, sock, lineno, msgbuf); #endif } else { isc_sockaddr_format(address, peerbuf, sizeof(peerbuf)); isc_log_iwrite(isc_lctx, category, module, level, msgcat, msgset, message, - "socket %p line %d: %s: %s", sock, lineno, peerbuf, msgbuf); + "socket %p line %d peer %s: %s", sock, lineno, + peerbuf, msgbuf); #if XXXMLG_DEBUG - fprintf(logfile, "%s socket %p line %d: %s: %s\n", timebuf, sock, lineno, peerbuf, msgbuf); + if (logfile) + fprintf(logfile, "%s socket %p line %d: %s: %s\n", + timebuf, sock, lineno, peerbuf, msgbuf); #endif } #if XXXMLG_DEBUG - fflush(logfile); + if (logfile) + fflush(logfile); #endif } @@ -1207,7 +909,7 @@ connection_reset_fix(SOCKET fd) { BOOL bNewBehavior = FALSE; DWORD status; - if(isc_win32os_majorversion() < 5) + if (isc_win32os_majorversion() < 5) return (ISC_R_SUCCESS); /* NT 4.0 has no problem */ /* disable bad behavior using IOCTL: SIO_UDP_CONNRESET */ @@ -1236,24 +938,21 @@ connection_reset_fix(SOCKET fd) { */ static void build_msghdr_send(isc_socket_t *sock, isc_socketevent_t *dev, - struct msghdr *msg, char *cmsg, WSABUF *iov) + struct msghdr *msg, char *cmsg, WSABUF *iov, + IoCompletionInfo *lpo) { unsigned int iovcount; isc_buffer_t *buffer; + buflist_t *cpbuffer; isc_region_t used; size_t write_count; size_t skip_count; memset(msg, 0, sizeof(*msg)); - if (sock->type == isc_sockettype_udp) { - msg->msg_name = (void *)&dev->address.type.sa; - msg->msg_namelen = dev->address.length; - } else { - msg->msg_name = NULL; - msg->msg_namelen = 0; - } - + memcpy(&msg->to_addr, &dev->address.type, dev->address.length); + msg->to_addr_len = dev->address.length; + buffer = ISC_LIST_HEAD(dev->bufferlist); write_count = 0; iovcount = 0; @@ -1263,7 +962,20 @@ build_msghdr_send(isc_socket_t *sock, isc_socketevent_t *dev, */ if (buffer == NULL) { write_count = dev->region.length - dev->n; - iov[0].buf = (void *)(dev->region.base + dev->n); + cpbuffer = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, sizeof(buflist_t)); + RUNTIME_CHECK(cpbuffer != NULL); + cpbuffer->buf = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, write_count); + RUNTIME_CHECK(cpbuffer->buf != NULL); + + socket_log(__LINE__, sock, NULL, TRACE, + isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK, + "alloc_buffer %p %d %p %d", cpbuffer, sizeof(buflist_t), + cpbuffer->buf, write_count); + + memcpy(cpbuffer->buf,(dev->region.base + dev->n), write_count); + cpbuffer->buflen = write_count; + ISC_LIST_ENQUEUE(lpo->bufferlist, cpbuffer, link); + iov[0].buf = cpbuffer->buf; iov[0].len = write_count; iovcount = 1; @@ -1289,10 +1001,22 @@ build_msghdr_send(isc_socket_t *sock, isc_socketevent_t *dev, isc_buffer_usedregion(buffer, &used); if (used.length > 0) { - iov[iovcount].buf = (void *)(used.base - + skip_count); + int uselen = used.length - skip_count; + cpbuffer = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, sizeof(buflist_t)); + RUNTIME_CHECK(cpbuffer != NULL); + cpbuffer->buf = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, uselen); + RUNTIME_CHECK(cpbuffer->buf != NULL); + + socket_log(__LINE__, sock, NULL, TRACE, + isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK, + "alloc_buffer %p %d %p %d", cpbuffer, sizeof(buflist_t), + cpbuffer->buf, write_count); + + memcpy(cpbuffer->buf,(used.base + skip_count), uselen); + cpbuffer->buflen = uselen; + iov[iovcount].buf = cpbuffer->buf; iov[iovcount].len = used.length - skip_count; - write_count += (used.length - skip_count); + write_count += uselen; skip_count = 0; iovcount++; } @@ -1307,99 +1031,14 @@ build_msghdr_send(isc_socket_t *sock, isc_socketevent_t *dev, msg->msg_totallen = write_count; } -/* - * Construct an iov array and attach it to the msghdr passed in. This is - * the RECV constructor, which will use the available region of the buffer - * (if using a buffer list) or will use the internal region (if a single - * buffer I/O is requested). - * - * Nothing can be NULL, and the done event must list at least one buffer - * on the buffer linked list for this function to be meaningful. - */ -static void -build_msghdr_recv(isc_socket_t *sock, isc_socketevent_t *dev, - struct msghdr *msg, char *cmsg, WSABUF *iov) -{ - unsigned int iovcount; - isc_buffer_t *buffer; - isc_region_t available; - size_t read_count; - - memset(msg, 0, sizeof(struct msghdr)); - - if (sock->type == isc_sockettype_udp) { - memset(&dev->address, 0, sizeof(dev->address)); - msg->msg_name = (void *)&dev->address.type.sa; - msg->msg_namelen = sizeof(dev->address.type); - } else { /* TCP */ - msg->msg_name = NULL; - msg->msg_namelen = 0; - dev->address = sock->address; - } - - buffer = ISC_LIST_HEAD(dev->bufferlist); - read_count = 0; - - /* - * Single buffer I/O? Skip what we've done so far in this region. - */ - if (buffer == NULL) { - read_count = dev->region.length - dev->n; - iov[0].buf = (void *)(dev->region.base + dev->n); - iov[0].len = read_count; - iovcount = 1; - } else { - /* - * Multibuffer I/O. - * Skip empty buffers. - */ - while (buffer != NULL) { - REQUIRE(ISC_BUFFER_VALID(buffer)); - if (isc_buffer_availablelength(buffer) != 0) - break; - buffer = ISC_LIST_NEXT(buffer, link); - } - - iovcount = 0; - while (buffer != NULL) { - INSIST(iovcount < MAXSCATTERGATHER_RECV); - - isc_buffer_availableregion(buffer, &available); - - if (available.length > 0) { - iov[iovcount].buf = (void *)(available.base); - iov[iovcount].len = available.length; - read_count += available.length; - iovcount++; - } - buffer = ISC_LIST_NEXT(buffer, link); - } - } - - /* - * If needed, set up to receive that one extra byte. Note that - * we know there is at least one iov left, since we stole it - * at the top of this function. - */ - - msg->msg_iov = iov; - msg->msg_iovlen = iovcount; - msg->msg_totallen = read_count; -} - static void set_dev_address(isc_sockaddr_t *address, isc_socket_t *sock, isc_socketevent_t *dev) { - if (sock->type == isc_sockettype_udp) { - if (address != NULL) - dev->address = *address; - else - dev->address = sock->address; - } else if (sock->type == isc_sockettype_tcp) { - INSIST(address == NULL); + if (address != NULL) + dev->address = *address; + else dev->address = sock->address; - } } static void @@ -1424,7 +1063,7 @@ allocate_socketevent(isc_socket_t *sock, isc_eventtype_t eventtype, if (ev == NULL) return (NULL); - ev->result = ISC_R_UNEXPECTED; + ev->result = ISC_R_IOERROR; // XXXMLG temporary change to detect failure to set ISC_LINK_INIT(ev, ev_link); ISC_LIST_INIT(ev->bufferlist); ev->region.base = NULL; @@ -1452,70 +1091,128 @@ dump_msg(struct msghdr *msg, isc_socket_t *sock) { } #endif -static int -completeio_recv(isc_socket_t *sock, isc_socketevent_t *dev, - struct msghdr *messagehdr, int cc, int recv_errno) -{ - size_t actual_count; - isc_buffer_t *buffer; - -#define SOFT_OR_HARD(_system, _isc) \ - if (recv_errno == _system) { \ - if (sock->connected) { \ - dev->result = _isc; \ - return (DOIO_HARD); \ - } \ - return (DOIO_SOFT); \ +/* + * map the error code + */ +int +map_socket_error(isc_socket_t *sock, int windows_errno, int *isc_errno, + char *errorstring, size_t bufsize) { + + int doreturn; + switch (windows_errno) { + case WSAECONNREFUSED: + *isc_errno = ISC_R_CONNREFUSED; + if (sock->connected) + doreturn = DOIO_HARD; + else + doreturn = DOIO_SOFT; + break; + case WSAENETUNREACH: + case ERROR_NETWORK_UNREACHABLE: + *isc_errno = ISC_R_NETUNREACH; + if (sock->connected) + doreturn = DOIO_HARD; + else + doreturn = DOIO_SOFT; + break; + case ERROR_PORT_UNREACHABLE: + case ERROR_HOST_UNREACHABLE: + case WSAEHOSTUNREACH: + *isc_errno = ISC_R_HOSTUNREACH; + if (sock->connected) + doreturn = DOIO_HARD; + else + doreturn = DOIO_SOFT; + break; + case WSAENETDOWN: + *isc_errno = ISC_R_NETDOWN; + if (sock->connected) + doreturn = DOIO_HARD; + else + doreturn = DOIO_SOFT; + break; + case WSAEHOSTDOWN: + *isc_errno = ISC_R_HOSTDOWN; + if (sock->connected) + doreturn = DOIO_HARD; + else + doreturn = DOIO_SOFT; + break; + case WSAEACCES: + *isc_errno = ISC_R_NOPERM; + if (sock->connected) + doreturn = DOIO_HARD; + else + doreturn = DOIO_SOFT; + break; + case WSAECONNRESET: + case WSAENETRESET: + case WSAECONNABORTED: + case WSAEDISCON: + *isc_errno = ISC_R_CONNECTIONRESET; + if (sock->connected) + doreturn = DOIO_HARD; + else + doreturn = DOIO_SOFT; + break; + case WSAENOTCONN: + *isc_errno = ISC_R_NOTCONNECTED; + if (sock->connected) + doreturn = DOIO_HARD; + else + doreturn = DOIO_SOFT; + break; + case ERROR_OPERATION_ABORTED: + case ERROR_CONNECTION_ABORTED: + case ERROR_REQUEST_ABORTED: + *isc_errno = ISC_R_CONNECTIONRESET; + doreturn = DOIO_HARD; + break; + case WSAENOBUFS: + *isc_errno = ISC_R_NORESOURCES; + doreturn = DOIO_HARD; + break; + case WSAEAFNOSUPPORT: + *isc_errno = ISC_R_FAMILYNOSUPPORT; + doreturn = DOIO_HARD; + break; + case WSAEADDRNOTAVAIL: + *isc_errno = ISC_R_ADDRNOTAVAIL; + doreturn = DOIO_HARD; + break; + case WSAEDESTADDRREQ: + *isc_errno = ISC_R_BADADDRESSFORM; + doreturn = DOIO_HARD; + break; + case ERROR_NETNAME_DELETED: + *isc_errno = ISC_R_NETDOWN; + doreturn = DOIO_HARD; + break; + default: + *isc_errno = ISC_R_IOERROR; + doreturn = DOIO_HARD; + break; } - -#define ALWAYS_HARD(_system, _isc) \ - if (recv_errno == _system) { \ - dev->result = _isc; \ - return (DOIO_HARD); \ + if (doreturn == DOIO_HARD) { + isc__strerror(windows_errno, errorstring, bufsize); } + return (doreturn); +} - if (recv_errno != 0) { - - if (SOFT_ERROR(recv_errno)) - return (DOIO_SOFT); +static void +fill_recv(isc_socket_t *sock, isc_socketevent_t *dev) { + isc_region_t r; + int copylen; + isc_buffer_t *buffer; - SOFT_OR_HARD(WSAECONNREFUSED, ISC_R_CONNREFUSED); - SOFT_OR_HARD(WSAENETUNREACH, ISC_R_NETUNREACH); - SOFT_OR_HARD(WSAEHOSTUNREACH, ISC_R_HOSTUNREACH); - SOFT_OR_HARD(WSAECONNRESET, ISC_R_CONNECTIONRESET); - SOFT_OR_HARD(WSAENETRESET, ISC_R_CONNECTIONRESET); - SOFT_OR_HARD(WSAECONNABORTED, ISC_R_CONNECTIONRESET); - SOFT_OR_HARD(WSAEDISCON, ISC_R_CONNECTIONRESET); - SOFT_OR_HARD(WSAENETDOWN, ISC_R_NETDOWN); - ALWAYS_HARD(ERROR_OPERATION_ABORTED, ISC_R_CONNECTIONRESET); - ALWAYS_HARD(ERROR_REQUEST_ABORTED, ISC_R_CONNECTIONRESET); - ALWAYS_HARD(ERROR_NETNAME_DELETED, ISC_R_CONNECTIONRESET); - ALWAYS_HARD(ERROR_PORT_UNREACHABLE, ISC_R_HOSTUNREACH); - ALWAYS_HARD(ERROR_HOST_UNREACHABLE, ISC_R_HOSTUNREACH); - ALWAYS_HARD(ERROR_NETWORK_UNREACHABLE, ISC_R_NETUNREACH); - ALWAYS_HARD(ERROR_NETNAME_DELETED, ISC_R_NETUNREACH); -// ALWAYS_HARD(WSA_OPERATION_ABORTED, ISC_R_CONNECTIONRESET); - ALWAYS_HARD(WSAENOBUFS, ISC_R_NORESOURCES); -#undef SOFT_OR_HARD -#undef ALWAYS_HARD - - if (recv_errno == WSA_OPERATION_ABORTED) { - return (DOIO_EOF); - } - dev->result = isc__errno2result(recv_errno); - return (DOIO_HARD); - } - - /* - * On TCP, zero length reads indicate EOF, while on - * UDP, zero length reads are perfectly valid, although - * strange. - */ - if ((sock->type == isc_sockettype_tcp) && (cc == 0)) - return (DOIO_EOF); + INSIST(dev->n < dev->minimum); + INSIST(sock->recvbuf.remaining > 0); + INSIST(sock->pending_recv == 0); if (sock->type == isc_sockettype_udp) { - dev->address.length = messagehdr->msg_namelen; + dev->address.length = sock->recvbuf.from_addr_len; + memcpy(&dev->address.type, &sock->recvbuf.from_addr, + sock->recvbuf.from_addr_len); if (isc_sockaddr_getport(&dev->address) == 0) { if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) { socket_log(__LINE__, sock, &dev->address, IOEVENT, @@ -1523,122 +1220,84 @@ completeio_recv(isc_socket_t *sock, isc_socketevent_t *dev, ISC_MSG_ZEROPORT, "dropping source port zero packet"); } - return (DOIO_SOFT); + sock->recvbuf.remaining = 0; + return; } + } else if (sock->type == isc_sockettype_tcp) { + dev->address = sock->address; } - socket_log(__LINE__, sock, &dev->address, IOEVENT, - isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_PKTRECV, - "packet received correctly"); - - /* - * Overflow bit detection. If we received MORE bytes than we should, - * this indicates an overflow situation. Set the flag in the - * dev entry and adjust how much we read by one. - */ -#ifdef ISC_NET_RECVOVERFLOW - if ((sock->type == isc_sockettype_udp) && ((size_t)cc > read_count)) { - dev->attributes |= ISC_SOCKEVENTATTR_TRUNC; - cc--; - } -#endif - /* - * update the buffers (if any) and the i/o count + * Run through the list of buffers we were given, and find the + * first one with space. Once it is found, loop through, filling + * the buffers as much as possible. */ - dev->n += cc; - actual_count = cc; buffer = ISC_LIST_HEAD(dev->bufferlist); - while (buffer != NULL && actual_count > 0) { - REQUIRE(ISC_BUFFER_VALID(buffer)); - if (isc_buffer_availablelength(buffer) <= actual_count) { - actual_count -= isc_buffer_availablelength(buffer); - isc_buffer_add(buffer, - isc_buffer_availablelength(buffer)); - } else { - isc_buffer_add(buffer, actual_count); - actual_count = 0; - break; - } - buffer = ISC_LIST_NEXT(buffer, link); - if (buffer == NULL) { - INSIST(actual_count == 0); + if (buffer != NULL) { // Multi-buffer receive + while (buffer != NULL && sock->recvbuf.remaining > 0) { + REQUIRE(ISC_BUFFER_VALID(buffer)); + if (isc_buffer_availablelength(buffer) > 0) { + isc_buffer_availableregion(buffer, &r); + copylen = min(r.length, sock->recvbuf.remaining); + memcpy(r.base, sock->recvbuf.consume_position, copylen); + sock->recvbuf.consume_position += copylen; + sock->recvbuf.remaining -= copylen; + isc_buffer_add(buffer, copylen); + dev->n += copylen; + } + buffer = ISC_LIST_NEXT(buffer, link); } + } else { // Single-buffer receive + copylen = min(dev->region.length - dev->n, sock->recvbuf.remaining); + memcpy(dev->region.base + dev->n, sock->recvbuf.consume_position, copylen); + sock->recvbuf.consume_position += copylen; + sock->recvbuf.remaining -= copylen; + dev->n += copylen; } /* - * If we read less than we expected, update counters, - * and let the upper layer handle it. - */ - if ((cc != messagehdr->msg_totallen) && (dev->n < dev->minimum)) - return (DOIO_SOFT); - - /* - * Full reads are posted, or partials if partials are ok. + * UDP receives are all-consuming. That is, if we have 4k worth of + * data in our receive buffer, and the caller only gave us + * 1k of space, we will toss the remaining 3k of data. TCP + * will keep the extra data around and use it for later requests. */ - dev->result = ISC_R_SUCCESS; - return (DOIO_SUCCESS); + if (sock->type == isc_sockettype_udp) + sock->recvbuf.remaining = 0; } -static int -startio_recv(isc_socket_t *sock, isc_socketevent_t *dev, int *nbytes, - int *recv_errno) +/* + * Copy out as much data from the internal buffer to done events. + * As each done event is filled, send it along its way. + */ +static void +completeio_recv(isc_socket_t *sock) { - char *cmsg = NULL; - char strbuf[ISC_STRERRORSIZE]; - IoCompletionInfo *lpo; - int status; - struct msghdr *msghdr; - - lpo = (IoCompletionInfo *) HeapAlloc(hHeapHandle, - HEAP_ZERO_MEMORY, - sizeof(IoCompletionInfo)); - lpo->request_type = SOCKET_RECV; - lpo->dev = dev; - msghdr = &lpo->messagehdr; - memset(msghdr, 0, sizeof(struct msghdr)); - - build_msghdr_recv(sock, dev, msghdr, cmsg, sock->iov); + isc_socketevent_t *dev; -#if defined(ISC_SOCKET_DEBUG) - dump_msg(msghdr, sock); -#endif + /* + * If we are in the process of filling our buffer, we cannot + * touch it yet, so don't. + */ + if (sock->pending_recv > 0) + return; - *nbytes = internal_recvmsg(sock, lpo, msghdr, 0, recv_errno); + while (sock->recvbuf.remaining > 0 && !ISC_LIST_EMPTY(sock->recv_list)) { + dev = ISC_LIST_HEAD(sock->recv_list); - if (*nbytes < 0) { /* - * I/O has been initiated - * return will be via the completion port + * See if we have sufficient data in our receive buffer + * to handle this. If we do, copy out the data. */ - if (PENDING_ERROR(*recv_errno)) { - status = DOIO_PENDING; - goto done; - } - if (SOFT_ERROR(*recv_errno)) { - status = DOIO_SOFT; - goto done; - } + fill_recv(sock, dev); /* - * If we got this far something is wrong + * Did we satisfy it? */ - if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) { - isc__strerror(*recv_errno, strbuf, sizeof(strbuf)); - socket_log(__LINE__, sock, NULL, IOEVENT, - isc_msgcat, ISC_MSGSET_SOCKET, - ISC_MSG_DOIORECV, - "startio_recv: recvmsg(%d) %d bytes, " - "err %d/%s", - sock->fd, *nbytes, *recv_errno, strbuf); + if (dev->n >= dev->minimum) { + dev->result = ISC_R_SUCCESS; + send_recvdone_event(sock, &dev); } - status = DOIO_HARD; - goto done; } - dev->result = ISC_R_SUCCESS; - status = DOIO_SOFT; -done: - return (status); } /* @@ -1661,52 +1320,12 @@ completeio_send(isc_socket_t *sock, isc_socketevent_t *dev, char addrbuf[ISC_SOCKADDR_FORMATSIZE]; char strbuf[ISC_STRERRORSIZE]; - if(send_errno != 0) { - - + if (send_errno != 0) { if (SOFT_ERROR(send_errno)) return (DOIO_SOFT); -#define SOFT_OR_HARD(_system, _isc) \ - if (send_errno == _system) { \ - if (sock->connected) { \ - dev->result = _isc; \ - return (DOIO_HARD); \ - } \ - return (DOIO_SOFT); \ - } -#define ALWAYS_HARD(_system, _isc) \ - if (send_errno == _system) { \ - dev->result = _isc; \ - return (DOIO_HARD); \ - } - - SOFT_OR_HARD(WSAEACCES, ISC_R_NOPERM); - SOFT_OR_HARD(WSAEAFNOSUPPORT, ISC_R_ADDRNOTAVAIL); - SOFT_OR_HARD(WSAECONNREFUSED, ISC_R_CONNREFUSED); - SOFT_OR_HARD(WSAENOTCONN, ISC_R_CONNREFUSED); - SOFT_OR_HARD(WSAECONNRESET, ISC_R_CONNECTIONRESET); - SOFT_OR_HARD(WSAECONNABORTED, ISC_R_CONNECTIONRESET); - SOFT_OR_HARD(WSAENETRESET, ISC_R_CONNECTIONRESET); - SOFT_OR_HARD(WSAEDISCON, ISC_R_CONNECTIONRESET); - SOFT_OR_HARD(WSAENETDOWN, ISC_R_NETDOWN); - ALWAYS_HARD(ERROR_OPERATION_ABORTED, ISC_R_CONNECTIONRESET); - ALWAYS_HARD(ERROR_NETNAME_DELETED, ISC_R_CONNECTIONRESET); - ALWAYS_HARD(ERROR_PORT_UNREACHABLE, ISC_R_HOSTUNREACH); - ALWAYS_HARD(ERROR_HOST_UNREACHABLE, ISC_R_HOSTUNREACH); - ALWAYS_HARD(ERROR_NETWORK_UNREACHABLE, ISC_R_NETUNREACH); - ALWAYS_HARD(ERROR_REQUEST_ABORTED, ISC_R_CONNECTIONRESET); - ALWAYS_HARD(WSA_OPERATION_ABORTED, ISC_R_CONNECTIONRESET); - ALWAYS_HARD(WSAEADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL); - ALWAYS_HARD(WSAEHOSTUNREACH, ISC_R_HOSTUNREACH); - ALWAYS_HARD(WSAEHOSTDOWN, ISC_R_HOSTUNREACH); - ALWAYS_HARD(WSAENETUNREACH, ISC_R_NETUNREACH); - ALWAYS_HARD(WSAENOBUFS, ISC_R_NORESOURCES); - ALWAYS_HARD(EPERM, ISC_R_HOSTUNREACH); - ALWAYS_HARD(EPIPE, ISC_R_NOTCONNECTED); - -#undef SOFT_OR_HARD -#undef ALWAYS_HARD + return (map_socket_error(sock, send_errno, &dev->result, + strbuf, sizeof(strbuf))); /* * The other error types depend on whether or not the @@ -1750,19 +1369,20 @@ startio_send(isc_socket_t *sock, isc_socketevent_t *dev, int *nbytes, int status; struct msghdr *msghdr; - lpo = (IoCompletionInfo *) HeapAlloc(hHeapHandle, - HEAP_ZERO_MEMORY, - sizeof(IoCompletionInfo)); + lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle, + HEAP_ZERO_MEMORY, + sizeof(IoCompletionInfo)); + RUNTIME_CHECK(lpo != NULL); lpo->request_type = SOCKET_SEND; lpo->dev = dev; msghdr = &lpo->messagehdr; memset(msghdr, 0, sizeof(struct msghdr)); + ISC_LIST_INIT(lpo->bufferlist); - build_msghdr_send(sock, dev, msghdr, cmsg, sock->iov); + build_msghdr_send(sock, dev, msghdr, cmsg, sock->iov, lpo); *nbytes = internal_sendmsg(sock, lpo, msghdr, 0, send_errno); - if (*nbytes < 0) { /* * I/O has been initiated @@ -1795,40 +1415,10 @@ startio_send(isc_socket_t *sock, isc_socketevent_t *dev, int *nbytes, dev->result = ISC_R_SUCCESS; status = DOIO_SOFT; done: + _set_state(sock, SOCK_DATA); return (status); } -/* - * Kill. - * - * Caller must ensure that the socket is not locked and no external - * references exist. Note that the socket structure does not get - * freed here - */ -static void -destroy_socket(isc_socket_t **sockp) { - isc_socket_t *sock = *sockp; - - REQUIRE(sock != NULL); - - socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET, - ISC_MSG_DESTROYING, "closing socket %d, %p", sock->fd, sock); - - LOCK(&sock->lock); - - INSIST(ISC_LIST_EMPTY(sock->accept_list)); - INSIST(ISC_LIST_EMPTY(sock->recv_list)); - INSIST(ISC_LIST_EMPTY(sock->send_list)); - INSIST(sock->connect_ev == NULL); - - socket_close(sock); - if (sock->pending_recv != 0 || sock->pending_send != 0 || - sock->pending_close != 0 || sock->iocp == 1) { - sock->pending_free = 1; - } - UNLOCK(&sock->lock); -} - static isc_result_t allocate_socket(isc_socketmgr_t *manager, isc_sockettype_t type, isc_socket_t **socketp) { @@ -1840,8 +1430,6 @@ allocate_socket(isc_socketmgr_t *manager, isc_sockettype_t type, if (sock == NULL) return (ISC_R_NOMEMORY); - result = ISC_R_UNEXPECTED; - sock->magic = 0; sock->references = 0; @@ -1859,19 +1447,23 @@ allocate_socket(isc_socketmgr_t *manager, isc_sockettype_t type, ISC_LIST_INIT(sock->accept_list); sock->connect_ev = NULL; sock->pending_accept = 0; - sock->pending_close = 0; sock->pending_recv = 0; sock->pending_send = 0; - sock->pending_free = 0; - sock->iocp = 0; + sock->pending_iocp = 0; sock->listener = 0; sock->connected = 0; - sock->connecting = 0; + sock->pending_connect = 0; sock->bound = 0; - sock->hEvent = NULL; - sock->hAlert = NULL; - sock->evthread_id = 0; - sock->wait_type = 0; + _set_state(sock, SOCK_INITIALIZED); + + sock->recvbuf.len = 65536; + sock->recvbuf.consume_position = sock->recvbuf.base; + sock->recvbuf.remaining = 0; + sock->recvbuf.base = isc_mem_get(manager->mctx, sock->recvbuf.len); // max buffer size + if (sock->recvbuf.base == NULL) { + sock->magic = 0; + goto error; + } /* * initialize the lock @@ -1879,18 +1471,13 @@ allocate_socket(isc_socketmgr_t *manager, isc_sockettype_t type, result = isc_mutex_init(&sock->lock); if (result != ISC_R_SUCCESS) { sock->magic = 0; + isc_mem_put(manager->mctx, sock->recvbuf.base, sock->recvbuf.len); + sock->recvbuf.base = NULL; goto error; } - /* - * Initialize readable and writable events - */ - ISC_EVENT_INIT(&sock->readable_ev, sizeof(intev_t), - ISC_EVENTATTR_NOPURGE, NULL, ISC_SOCKEVENT_INTR, - NULL, sock, sock, NULL, NULL); - ISC_EVENT_INIT(&sock->writable_ev, sizeof(intev_t), - ISC_EVENTATTR_NOPURGE, NULL, ISC_SOCKEVENT_INTW, - NULL, sock, sock, NULL, NULL); + socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, + "allocated"); sock->magic = SOCKET_MAGIC; *socketp = sock; @@ -1904,36 +1491,122 @@ allocate_socket(isc_socketmgr_t *manager, isc_sockettype_t type, } /* - * This event requires that the various lists be empty, that the reference - * count be 1, and that the magic number is valid. The other socket bits, - * like the lock, must be initialized as well. The fd associated must be - * marked as closed, by setting it to INVALID_SOCKET on close, or this - * routine will also close the socket. + * Verify that the socket state is CONSISTENT. + */ +static void +consistent(isc_socket_t *sock) { + + isc_socketevent_t *dev; + isc_socket_newconnev_t *nev; + unsigned int count; + char *crash_reason; + isc_boolean_t crash = ISC_FALSE; + + REQUIRE(sock->pending_iocp == sock->pending_recv + sock->pending_send + + sock->pending_accept + sock->pending_connect); + + dev = ISC_LIST_HEAD(sock->send_list); + count = 0; + while (dev != NULL) { + count++; + dev = ISC_LIST_NEXT(dev, ev_link); + } + if (count > sock->pending_send) { + crash = ISC_TRUE; + crash_reason = "send_list > sock->pending_send"; + } + + nev = ISC_LIST_HEAD(sock->accept_list); + count = 0; + while (nev != NULL) { + count++; + nev = ISC_LIST_NEXT(nev, ev_link); + } + if (count > sock->pending_accept) { + crash = ISC_TRUE; + crash_reason = "send_list > sock->pending_send"; + } + + if (crash) { + socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET, + ISC_MSG_DESTROYING, "SOCKET INCONSISTENT: %s", + crash_reason); + sock_dump(sock); + INSIST(crash == ISC_FALSE); + } +} + +/* + * Maybe free the socket. + * + * This function will veriy tht the socket is no longer in use in any way, + * either internally or externally. This is the only place where this + * check is to be made; if some bit of code believes that IT is done with + * the socket (e.g., some reference counter reaches zero), it should call + * this function. + * + * When calling this function, the socket must be locked, and the manager + * must be unlocked. + * + * When this function returns, *socketp will be NULL. No tricks to try + * to hold on to this pointer are allowed. */ static void -free_socket(isc_socket_t **socketp) { +maybe_free_socket(isc_socket_t **socketp, int lineno) { isc_socket_t *sock = *socketp; + *socketp = NULL; - INSIST(sock->references == 0); INSIST(VALID_SOCKET(sock)); - INSIST(!sock->connecting); - INSIST(!sock->pending_accept); - INSIST(ISC_LIST_EMPTY(sock->recv_list)); - INSIST(ISC_LIST_EMPTY(sock->send_list)); - INSIST(ISC_LIST_EMPTY(sock->accept_list)); - INSIST(!ISC_LINK_LINKED(sock, link)); - INSIST(sock->iocp == 0); + CONSISTENT(sock); + + if (sock->pending_iocp > 0 + || sock->pending_recv > 0 + || sock->pending_send > 0 + || sock->pending_accept > 0 + || sock->references > 0 + || sock->pending_connect == 1 + || !ISC_LIST_EMPTY(sock->recv_list) + || !ISC_LIST_EMPTY(sock->send_list) + || !ISC_LIST_EMPTY(sock->accept_list) + || sock->fd != INVALID_SOCKET) { + UNLOCK(&sock->lock); + return; + } + UNLOCK(&sock->lock); - sock->magic = 0; + free_socket(&sock, lineno); +} - DESTROYLOCK(&sock->lock); +void +free_socket(isc_socket_t **sockp, int lineno) { + isc_socketmgr_t *manager; + isc_socket_t *sock = *sockp; + *sockp = NULL; + manager = sock->manager; + + /* + * Seems we can free the socket after all. + */ + manager = sock->manager; socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET, - ISC_MSG_DESTROYING, "freeing socket (fd %d) %p", sock->fd, sock); + ISC_MSG_DESTROYING, "freeing socket line %d fd %d lock %p semaphore %p", + lineno, sock->fd, &sock->lock, sock->lock.LockSemaphore); - isc_mem_put(sock->manager->mctx, sock, sizeof(*sock)); + sock->magic = 0; + DESTROYLOCK(&sock->lock); - *socketp = NULL; + if (sock->recvbuf.base != NULL) + isc_mem_put(manager->mctx, sock->recvbuf.base, sock->recvbuf.len); + + LOCK(&manager->lock); + if (ISC_LINK_LINKED(sock, link)) + ISC_LIST_UNLINK(manager->socklist, sock, link); + isc_mem_put(manager->mctx, sock, sizeof(*sock)); + + if (ISC_LIST_EMPTY(manager->socklist)) + SIGNAL(&manager->shutdown_ok); + UNLOCK(&manager->lock); } /* @@ -1971,9 +1644,14 @@ isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, if (sock->fd != INVALID_SOCKET) { result = connection_reset_fix(sock->fd); if (result != ISC_R_SUCCESS) { + socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, + "closed %d %d %d con_reset_fix_failed", + sock->pending_recv, sock->pending_send, + sock->references); closesocket(sock->fd); + _set_state(sock, SOCK_CLOSED); sock->fd = INVALID_SOCKET; - free_socket(&sock); + free_socket(&sock, __LINE__); return (result); } } @@ -1985,7 +1663,7 @@ isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, if (sock->fd == INVALID_SOCKET) { socket_errno = WSAGetLastError(); - free_socket(&sock); + free_socket(&sock, __LINE__); switch (socket_errno) { case WSAEMFILE: @@ -2012,9 +1690,13 @@ isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, result = make_nonblock(sock->fd); if (result != ISC_R_SUCCESS) { + socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, + "closed %d %d %d make_nonblock_failed", + sock->pending_recv, sock->pending_send, + sock->references); closesocket(sock->fd); sock->fd = INVALID_SOCKET; - free_socket(&sock); + free_socket(&sock, __LINE__); return (result); } @@ -2080,18 +1762,17 @@ isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, } #endif /* defined(USE_CMSG) || defined(SO_RCVBUF) */ + _set_state(sock, SOCK_OPEN); sock->references = 1; *socketp = sock; iocompletionport_update(sock); - LOCK(&manager->lock); - /* * Note we don't have to lock the socket like we normally would because * there are no external references to it yet. */ - + LOCK(&manager->lock); ISC_LIST_APPEND(manager->socklist, sock, link); InterlockedIncrement(&manager->totalSockets); UNLOCK(&manager->lock); @@ -2111,6 +1792,7 @@ isc_socket_attach(isc_socket_t *sock, isc_socket_t **socketp) { REQUIRE(socketp != NULL && *socketp == NULL); LOCK(&sock->lock); + CONSISTENT(sock); sock->references++; UNLOCK(&sock->lock); @@ -2131,37 +1813,23 @@ isc_socket_detach(isc_socket_t **socketp) { REQUIRE(VALID_SOCKET(sock)); LOCK(&sock->lock); + CONSISTENT(sock); REQUIRE(sock->references > 0); sock->references--; -#if XXXMLG_DEBUG - printf("Detaching socket %p %d (%d %d %d %d %d)\n", - sock, sock->fd, sock->pending_recv, sock->pending_send, sock->pending_close, - sock->pending_free, sock->references); -#endif - - if (sock->references == 0 && sock->pending_recv == 0 && sock->pending_send == 0) - kill_socket = ISC_TRUE; - - UNLOCK(&sock->lock); + socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, + "detach_socket %d %d %d", + sock->pending_recv, sock->pending_send, + sock->references); - if (kill_socket) { - isc_socket_t *s = sock; - destroy_socket(&sock); - sock = s; - if (sock->pending_free) { - isc_socketmgr_t *manager = sock->manager; - LOCK(&manager->lock); - ISC_LIST_UNLINK(manager->socklist, sock, link); - InterlockedDecrement(&manager->iocp_total); - sock->iocp = 0; - free_socket(&sock); - if (ISC_LIST_EMPTY(manager->socklist)) - SIGNAL(&manager->shutdown_ok); - UNLOCK(&manager->lock); - } + if (sock->references == 0 && sock->fd != INVALID_SOCKET) { + closesocket(sock->fd); + sock->fd = INVALID_SOCKET; + _set_state(sock, SOCK_CLOSED); } + maybe_free_socket(&sock, __LINE__); + *socketp = NULL; } @@ -2171,7 +1839,7 @@ isc_socket_detach(isc_socket_t **socketp) { * destined for. * * If the event to be sent is on a list, remove it before sending. If - * asked to, send and detach from the socket as well. + * asked to, send and detach from the task as well. * * Caller must have the socket locked if the event is attached to the socket. */ @@ -2180,24 +1848,22 @@ send_recvdone_event(isc_socket_t *sock, isc_socketevent_t **dev) { isc_task_t *task; task = (*dev)->ev_sender; - (*dev)->ev_sender = sock; - if (ISC_LINK_LINKED(*dev, ev_link)) { + if (ISC_LINK_LINKED(*dev, ev_link)) ISC_LIST_DEQUEUE(sock->recv_list, *dev, ev_link); - } if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED) == ISC_SOCKEVENTATTR_ATTACHED) isc_task_sendanddetach(&task, (isc_event_t **)dev); else isc_task_send(task, (isc_event_t **)dev); + + CONSISTENT(sock); } /* * See comments for send_recvdone_event() above. - * - * Caller must have the socket locked if the event is attached to the socket. */ static void send_senddone_event(isc_socket_t *sock, isc_socketevent_t **dev) { @@ -2208,289 +1874,154 @@ send_senddone_event(isc_socket_t *sock, isc_socketevent_t **dev) { task = (*dev)->ev_sender; (*dev)->ev_sender = sock; - if (ISC_LINK_LINKED(*dev, ev_link)) { - ISC_LIST_DEQUEUE(sock->send_list, *dev, ev_link); - } + if (ISC_LINK_LINKED(*dev, ev_link)) + ISC_LIST_DEQUEUE(sock->send_list, *dev, ev_link); + + if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED) + == ISC_SOCKEVENTATTR_ATTACHED) + isc_task_sendanddetach(&task, (isc_event_t **)dev); + else + isc_task_send(task, (isc_event_t **)dev); + + CONSISTENT(sock); +} + +/* + * See comments for send_recvdone_event() above. + */ +static void +send_acceptdone_event(isc_socket_t *sock, isc_socket_newconnev_t **adev) { + isc_task_t *task; + + INSIST(adev != NULL && *adev != NULL); + + task = (*adev)->ev_sender; + (*adev)->ev_sender = sock; + + if (ISC_LINK_LINKED(*adev, ev_link)) + ISC_LIST_DEQUEUE(sock->accept_list, *adev, ev_link); + + isc_task_sendanddetach(&task, (isc_event_t **)adev); + + CONSISTENT(sock); +} + +/* + * See comments for send_recvdone_event() above. + */ +static void +send_connectdone_event(isc_socket_t *sock, isc_socket_connev_t **cdev) { + isc_task_t *task; + + INSIST(cdev != NULL && *cdev != NULL); + + task = (*cdev)->ev_sender; + (*cdev)->ev_sender = sock; + + sock->connect_ev = NULL; + + isc_task_sendanddetach(&task, (isc_event_t **)cdev); - if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED) - == ISC_SOCKEVENTATTR_ATTACHED) - isc_task_sendanddetach(&task, (isc_event_t **)dev); - else - isc_task_send(task, (isc_event_t **)dev); + CONSISTENT(sock); } /* - * Call accept() on a socket, to get the new file descriptor. The listen - * socket is used as a prototype to create a new isc_socket_t. The new - * socket has one outstanding reference. The task receiving the event - * will be detached from just after the event is delivered. - * * On entry to this function, the event delivered is the internal * readable event, and the first item on the accept_list should be * the done event we want to send. If the list is empty, this is a no-op, - * so just unlock and return. + * so just close the new connection, unlock, and return. * * Note the the socket is locked before entering here */ static void -internal_accept(isc_socket_t *sock, int accept_errno) { - isc_socketmgr_t *manager; - isc_socket_newconnev_t *dev; - isc_task_t *task; - ISC_SOCKADDR_LEN_T addrlen; - SOCKET fd; +internal_accept(isc_socket_t *sock, IoCompletionInfo *lpo, int accept_errno) { + isc_socket_newconnev_t *adev; isc_result_t result = ISC_R_SUCCESS; - char strbuf[ISC_STRERRORSIZE]; + isc_socket_t *nsock; + struct sockaddr_in *localaddr; + int localaddr_len = sizeof(*localaddr); + struct sockaddr_in *remoteaddr; + int remoteaddr_len = sizeof(*remoteaddr); INSIST(VALID_SOCKET(sock)); + LOCK(&sock->lock); + CONSISTENT(sock); socket_log(__LINE__, sock, NULL, TRACE, isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK, - "internal_accept called, locked socket"); - - manager = sock->manager; - INSIST(VALID_MANAGER(manager)); + "internal_accept called"); INSIST(sock->listener); - INSIST(sock->hEvent != NULL); - INSIST(sock->pending_accept == 1); - sock->pending_accept = 0; - /* - * Check any possible error status from the event notification here. - * Note that we don't take any action since it was only - * Windows that was notifying about a network event, not the - * application. - * PDMXXX: Should we care about any of the possible event errors - * signalled? The only possible valid errors are: - * WSAENETDOWN, WSAECONNRESET, & WSAECONNABORTED - */ - if (accept_errno != 0) { - switch (accept_errno) { - case WSAENETDOWN: - case WSAECONNRESET: - case WSAECONNABORTED: - break; /* Expected errors */ - default: - isc__strerror(accept_errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "internal_accept: from event wait: %s", - strbuf); - break; - } - return; - } + INSIST(sock->pending_iocp > 0); + sock->pending_iocp--; + INSIST(sock->pending_accept > 0); + sock->pending_accept--; - /* - * Get the first item off the accept list. - * If it is empty, unlock the socket and return. - */ - dev = ISC_LIST_HEAD(sock->accept_list); - if (dev == NULL) { - isc_sockaddr_t from; - /* - * This should only happen if WSAEventSelect() fails - * or when cancelling a specific event, when we can do that - * again. - */ - addrlen = sizeof(from.type); - fd = accept(sock->fd, &from.type.sa, &addrlen); - if (fd == INVALID_SOCKET) { - accept_errno = WSAGetLastError(); - if (accept_errno == WSAEMFILE) { - isc_log_iwrite(isc_lctx, - ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, - isc_msgcat, ISC_MSGSET_SOCKET, - ISC_MSG_TOOMANYFDS, - "%s: too many open file descriptors", - "accept"); - goto soft_error; - } else if (SOFT_ERROR(accept_errno) || - accept_errno == WSAECONNRESET) { - goto soft_error; - } else { - isc__strerror(accept_errno, strbuf, - sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "internal_accept: accept() %s: %s", - isc_msgcat_get(isc_msgcat, - ISC_MSGSET_GENERAL, - ISC_MSG_FAILED, - "failed"), - strbuf); - fd = INVALID_SOCKET; - result = ISC_R_UNEXPECTED; - } - } else { - char addrbuf[ISC_SOCKADDR_FORMATSIZE]; - isc_sockaddr_format(&from, addrbuf, sizeof(addrbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "sock->accept_list empty: " - "dropping TCP request from %s", - addrbuf); - (void)closesocket(fd); - sock->fd = INVALID_SOCKET; - } - return; - } + adev = lpo->adev; /* - * Try to accept the new connection. If the accept fails with - * WSAEINTR, the event wait will be notified again since - * the event will be reset on return to caller. - */ - addrlen = sizeof(dev->newsocket->address.type); - memset(&dev->newsocket->address.type.sa, 0, addrlen); - fd = accept(sock->fd, &dev->newsocket->address.type.sa, - (void *)&addrlen); - if (fd == INVALID_SOCKET) { - accept_errno = WSAGetLastError(); - if (accept_errno == WSAEMFILE) { - isc_log_iwrite(isc_lctx, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, - isc_msgcat, ISC_MSGSET_SOCKET, - ISC_MSG_TOOMANYFDS, - "%s: too many open file descriptors", - "accept"); - goto soft_error; - } else if (SOFT_ERROR(accept_errno) || - accept_errno == WSAECONNRESET) { - goto soft_error; - } else { - isc__strerror(accept_errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "internal_accept: accept() %s: %s", - isc_msgcat_get(isc_msgcat, - ISC_MSGSET_GENERAL, - ISC_MSG_FAILED, - "failed"), - strbuf); - fd = INVALID_SOCKET; - result = ISC_R_UNEXPECTED; - } - } else { - if (addrlen == 0) { - UNEXPECTED_ERROR(__FILE__, __LINE__, - "internal_accept(): " - "accept() failed to return " - "remote address"); - - (void)closesocket(fd); - dev->newsocket->fd = INVALID_SOCKET; - goto soft_error; - } else if (dev->newsocket->address.type.sa.sa_family != - sock->pf) - { - UNEXPECTED_ERROR(__FILE__, __LINE__, - "internal_accept(): " - "accept() returned peer address " - "family %u (expected %u)", - dev->newsocket->address. - type.sa.sa_family, - sock->pf); - (void)closesocket(fd); - dev->newsocket->fd = INVALID_SOCKET; - goto soft_error; - } - } + * If the event is no longer in the list we can just return. + */ + if (!acceptdone_is_active(sock, adev)) + goto done; - if (fd != INVALID_SOCKET) { - dev->newsocket->address.length = addrlen; - dev->newsocket->pf = sock->pf; - } + nsock = adev->newsocket; /* * Pull off the done event. */ - ISC_LIST_UNLINK(sock->accept_list, dev, ev_link); + ISC_LIST_UNLINK(sock->accept_list, adev, ev_link); /* - * Stop listening for connects. + * Extract the addresses from the socket, copy them into the structure, + * and return the new socket. */ - if (ISC_LIST_EMPTY(sock->accept_list) && - WSAEventSelect(sock->fd, sock->hEvent, FD_CLOSE) != 0) { - int stat; - const char *msg; - stat = WSAGetLastError(); - isc__strerror(stat, strbuf, sizeof(strbuf)); - msg = isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, - ISC_MSG_FAILED, "failed"); - UNEXPECTED_ERROR(__FILE__, __LINE__, "WSAEventSelect: %s: %s", - msg, strbuf); - } + ISCGetAcceptExSockaddrs(lpo->acceptbuffer, 0, + sizeof(SOCKADDR) + 16, sizeof(SOCKADDR) + 16, + (LPSOCKADDR *)&localaddr, &localaddr_len, + (LPSOCKADDR *)&remoteaddr, &remoteaddr_len); + memcpy(&adev->address.type, remoteaddr, remoteaddr_len); + adev->address.length = remoteaddr_len; + nsock->address = adev->address; + nsock->pf = adev->address.type.sa.sa_family; + socket_log(__LINE__, nsock, &nsock->address, TRACE, + isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK, + "internal_accept parent %p", sock); - if (fd != INVALID_SOCKET) { - isc_result_t tresult; - tresult = make_nonblock(fd); - if (tresult != ISC_R_SUCCESS) { - closesocket(fd); - sock->fd = INVALID_SOCKET; - fd = INVALID_SOCKET; - result = tresult; - } - } + result = make_nonblock(adev->newsocket->fd); + INSIST(result == ISC_R_SUCCESS); + + INSIST(setsockopt(nsock->fd, SOL_SOCKET, SO_UPDATE_ACCEPT_CONTEXT, + (char *)&sock->fd, sizeof(sock->fd)) == 0); /* - * INVALID_SOCKET means the new socket didn't happen. + * Hook it up into the manager. */ - if (fd != INVALID_SOCKET) { - LOCK(&manager->lock); - ISC_LIST_APPEND(manager->socklist, dev->newsocket, link); - - dev->newsocket->fd = fd; - dev->newsocket->bound = 1; - dev->newsocket->connected = 1; - InterlockedIncrement(&manager->totalSockets); + nsock->bound = 1; + nsock->connected = 1; + _set_state(nsock, SOCK_OPEN); - /* - * The accept socket inherits the listen socket's - * selected events. Remove this socket from all events - * as it is handled by IOCP. (Joe Quanaim, lucent.com) - */ - if (WSAEventSelect(dev->newsocket->fd, 0, 0) != 0) { - /* this is an unlikely but non-fatal error */ - int stat; - const char *msg; - stat = WSAGetLastError(); - isc__strerror(stat, strbuf, sizeof(strbuf)); - msg = isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, - ISC_MSG_FAILED, "failed"); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "WSAEventSelect: %s: %s", msg, strbuf); - } - - /* - * Save away the remote address - */ - dev->address = dev->newsocket->address; - - socket_log(__LINE__, sock, &dev->newsocket->address, CREATION, - isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTEDCXN, - "accepted connection, new socket %p", - dev->newsocket); - - iocompletionport_update(dev->newsocket); + LOCK(&nsock->manager->lock); + ISC_LIST_APPEND(nsock->manager->socklist, nsock, link); + InterlockedIncrement(&nsock->manager->totalSockets); + UNLOCK(&nsock->manager->lock); - UNLOCK(&manager->lock); - } else { - dev->newsocket->references--; - free_socket(&dev->newsocket); - } + socket_log(__LINE__, sock, &nsock->address, CREATION, + isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTEDCXN, + "accepted_connection new_socket %p fd %d", + nsock, nsock->fd); - /* - * Fill in the done event details and send it off. - */ - dev->result = result; - task = dev->ev_sender; - dev->ev_sender = sock; + adev->result = result; + send_acceptdone_event(sock, &adev); - isc_task_sendanddetach(&task, (isc_event_t **)&dev); - return; +done: + CONSISTENT(sock); + UNLOCK(&sock->lock); - soft_error: - return; + HeapFree(hHeapHandle, 0, lpo->acceptbuffer); + lpo->acceptbuffer = NULL; } /* @@ -2498,25 +2029,35 @@ internal_accept(isc_socket_t *sock, int accept_errno) { * Note that the socket is locked before entering. */ static void -internal_connect(isc_socket_t *sock, int connect_errno) { - isc_socket_connev_t *dev; - isc_task_t *task; +internal_connect(isc_socket_t *sock, IoCompletionInfo *lpo, int connect_errno) { + isc_socket_connev_t *cdev; char strbuf[ISC_STRERRORSIZE]; INSIST(VALID_SOCKET(sock)); + LOCK(&sock->lock); + + INSIST(sock->pending_iocp > 0); + sock->pending_iocp--; + INSIST(sock->pending_connect == 1); + sock->pending_connect = 0; + /* * Has this event been canceled? */ - dev = sock->connect_ev; - if (dev == NULL) { - INSIST(!sock->connecting); + cdev = lpo->cdev; + if (!connectdone_is_active(sock, cdev)) { + sock->pending_connect = 0; + if (sock->fd != INVALID_SOCKET) { + closesocket(sock->fd); + sock->fd = INVALID_SOCKET; + _set_state(sock, SOCK_CLOSED); + } + CONSISTENT(sock); + UNLOCK(&sock->lock); return; } - INSIST(sock->connecting); - sock->connecting = 0; - /* * Check possible Windows network event error status here. */ @@ -2526,9 +2067,10 @@ internal_connect(isc_socket_t *sock, int connect_errno) { * fd and pretend nothing strange happened. */ if (SOFT_ERROR(connect_errno) || - connect_errno == WSAEINPROGRESS) - { - sock->connecting = 1; + connect_errno == WSAEINPROGRESS) { + sock->pending_connect = 1; + CONSISTENT(sock); + UNLOCK(&sock->lock); return; } @@ -2536,7 +2078,7 @@ internal_connect(isc_socket_t *sock, int connect_errno) { * Translate other errors into ISC_R_* flavors. */ switch (connect_errno) { -#define ERROR_MATCH(a, b) case a: dev->result = b; break; +#define ERROR_MATCH(a, b) case a: cdev->result = b; break; ERROR_MATCH(WSAEACCES, ISC_R_NOPERM); ERROR_MATCH(WSAEADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL); ERROR_MATCH(WSAEAFNOSUPPORT, ISC_R_ADDRNOTAVAIL); @@ -2551,89 +2093,105 @@ internal_connect(isc_socket_t *sock, int connect_errno) { ERROR_MATCH(WSAETIMEDOUT, ISC_R_TIMEDOUT); #undef ERROR_MATCH default: - dev->result = ISC_R_UNEXPECTED; + cdev->result = ISC_R_UNEXPECTED; isc__strerror(connect_errno, strbuf, sizeof(strbuf)); UNEXPECTED_ERROR(__FILE__, __LINE__, "internal_connect: connect() %s", strbuf); } } else { - dev->result = ISC_R_SUCCESS; + INSIST(setsockopt(sock->fd, SOL_SOCKET, SO_UPDATE_CONNECT_CONTEXT, NULL, 0) == 0); + cdev->result = ISC_R_SUCCESS; sock->connected = 1; sock->bound = 1; + socket_log(__LINE__, sock, &sock->address, IOEVENT, + isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTEDCXN, + "internal_connect: success"); } - sock->connect_ev = NULL; + send_connectdone_event(sock, &cdev); + + UNLOCK(&sock->lock); +} - task = dev->ev_sender; - dev->ev_sender = sock; - isc_task_sendanddetach(&task, (isc_event_t **)&dev); +/* + * Loop through the socket, returning ISC_R_EOF for each done event pending. + */ +static void +send_recvdone_abort(isc_socket_t *sock, isc_result_t result) { + isc_socketevent_t *dev; + + while (!ISC_LIST_EMPTY(sock->recv_list)) { + dev = ISC_LIST_HEAD(sock->recv_list); + dev->result = result; + send_recvdone_event(sock, &dev); + } } +/* + * Take the data we received in our private buffer, and if any recv() calls on + * our list are satisfied, send the corresponding done event. + * + * If we need more data (there are still items on the recv_list after we consume all + * our data) then arrange for another system recv() call to fill our buffers. + */ static void -internal_recv(isc_socket_t *sock, isc_socketevent_t *dev, - struct msghdr *messagehdr, int nbytes, int recv_errno) +internal_recv(isc_socket_t *sock, int nbytes) { - isc_socketevent_t *ldev; - int io_state; - int cc; - INSIST(VALID_SOCKET(sock)); LOCK(&sock->lock); + CONSISTENT(sock); + socket_log(__LINE__, sock, NULL, IOEVENT, isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_INTERNALRECV, - "internal_recv: task got socket event %p", dev); + "internal_recv: %d bytes received", nbytes); + /* + * If we got here, the I/O operation succeeded. However, we might still have removed this + * event from our notification list (or never placed it on it due to immediate completion.) + * Handle the reference counting here, and handle the cancellation event just after. + */ + INSIST(sock->pending_iocp > 0); + sock->pending_iocp--; INSIST(sock->pending_recv > 0); sock->pending_recv--; - /* If the event is no longer in the list we can just return */ - ldev = ISC_LIST_HEAD(sock->recv_list); - while (ldev != NULL && ldev != dev) { - ldev = ISC_LIST_NEXT(ldev, ev_link); + /* + * The only way we could have gotten here is that our I/O has successfully completed. + * Update our pointers, and move on. The only odd case here is that we might not + * have received enough data on a TCP stream to satisfy the minimum requirements. If + * this is the case, we will re-issue the recv() call for what we need. + * + * We do check for a recv() of 0 bytes on a TCP stream. This means the remote end + * has closed. + */ + if (nbytes == 0) { + send_recvdone_abort(sock, ISC_R_EOF); + maybe_free_socket(&sock, __LINE__); + return; } - if (ldev == NULL) - goto done; + sock->recvbuf.remaining = nbytes; + sock->recvbuf.consume_position = sock->recvbuf.base; + completeio_recv(sock); /* - * Try to do as much I/O as possible on this socket. There are no - * limits here, currently. + * If there are more receivers waiting for data, queue another receive + * here. */ - switch (completeio_recv(sock, dev, messagehdr, nbytes, recv_errno)) { - case DOIO_SOFT: - cc = 0; - recv_errno = 0; - io_state = startio_recv(sock, dev, &cc, &recv_errno); - goto done; - - case DOIO_EOF: - /* - * read of 0 means the remote end was closed. - * Run through the event queue and dispatch all - * the events with an EOF result code. - */ - do { - dev->result = ISC_R_EOF; - send_recvdone_event(sock, &dev); - dev = ISC_LIST_HEAD(sock->recv_list); - } while (dev != NULL); - goto done; + queue_receive_request(sock); - case DOIO_SUCCESS: - case DOIO_HARD: - send_recvdone_event(sock, &dev); - break; - } - done: - UNLOCK(&sock->lock); + /* + * Unlock and/or destroy if we are the last thing this socket has left to do. + */ + maybe_free_socket(&sock, __LINE__); } static void internal_send(isc_socket_t *sock, isc_socketevent_t *dev, - struct msghdr *messagehdr, int nbytes, int send_errno) + struct msghdr *messagehdr, int nbytes, int send_errno, IoCompletionInfo *lpo) { - isc_socketevent_t *ldev; + buflist_t *buffer; /* * Find out what socket this is and lock it. @@ -2641,23 +2199,36 @@ internal_send(isc_socket_t *sock, isc_socketevent_t *dev, INSIST(VALID_SOCKET(sock)); LOCK(&sock->lock); + CONSISTENT(sock); + socket_log(__LINE__, sock, NULL, IOEVENT, isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_INTERNALSEND, "internal_send: task got socket event %p", dev); + buffer = ISC_LIST_HEAD(lpo->bufferlist); + while (buffer != NULL) { + ISC_LIST_DEQUEUE(lpo->bufferlist, buffer, link); + + socket_log(__LINE__, sock, NULL, TRACE, + isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK, + "free_buffer %p %p", buffer, buffer->buf); + + HeapFree(hHeapHandle, 0, buffer->buf); + HeapFree(hHeapHandle, 0, buffer); + buffer = ISC_LIST_HEAD(lpo->bufferlist); + } + + INSIST(sock->pending_iocp > 0); + sock->pending_iocp--; INSIST(sock->pending_send > 0); sock->pending_send--; /* If the event is no longer in the list we can just return */ - ldev = ISC_LIST_HEAD(sock->send_list); - while (ldev != NULL && ldev != dev) { - ldev = ISC_LIST_NEXT(ldev, ev_link); - } - if (ldev == NULL) + if (!senddone_is_active(sock, dev)) goto done; + /* - * Try to do as much I/O as possible on this socket. There are no - * limits here, currently. + * Set the error code and send things on its way. */ switch (completeio_send(sock, dev, messagehdr, nbytes, send_errno)) { case DOIO_SOFT: @@ -2669,7 +2240,42 @@ internal_send(isc_socket_t *sock, isc_socketevent_t *dev, } done: - UNLOCK(&sock->lock); + maybe_free_socket(&sock, __LINE__); +} + +/* + * These return if the done event passed in is on the list (or for connect, is + * the one we're waiting for. Using these ensures we will not double-send an + * event. + */ +static isc_boolean_t +senddone_is_active(isc_socket_t *sock, isc_socketevent_t *dev) +{ + isc_socketevent_t *ldev; + + ldev = ISC_LIST_HEAD(sock->send_list); + while (ldev != NULL && ldev != dev) + ldev = ISC_LIST_NEXT(ldev, ev_link); + + return (ldev == NULL ? ISC_FALSE : ISC_TRUE); +} + +static isc_boolean_t +acceptdone_is_active(isc_socket_t *sock, isc_socket_newconnev_t *dev) +{ + isc_socket_newconnev_t *ldev; + + ldev = ISC_LIST_HEAD(sock->accept_list); + while (ldev != NULL && ldev != dev) + ldev = ISC_LIST_NEXT(ldev, ev_link); + + return (ldev == NULL ? ISC_FALSE : ISC_TRUE); +} + +static isc_boolean_t +connectdone_is_active(isc_socket_t *sock, isc_socket_connev_t *dev) +{ + return (sock->connect_ev == dev ? ISC_TRUE : ISC_FALSE); } /* @@ -2685,7 +2291,6 @@ SocketIoThread(LPVOID ThreadContext) { IoCompletionInfo *lpo = NULL; isc_socket_t *sock = NULL; int request; - isc_socketevent_t *dev = NULL; struct msghdr *messagehdr = NULL; int errval; char strbuf[ISC_STRERRORSIZE]; @@ -2693,13 +2298,13 @@ SocketIoThread(LPVOID ThreadContext) { REQUIRE(VALID_MANAGER(manager)); - /* Set the thread priority high enough so I/O will - * preempt normal recv packet processing, but not - * higher than the timer sync thread. + /* + * Set the thread priority high enough so I/O will + * preempt normal recv packet processing, but not + * higher than the timer sync thread. */ if (!SetThreadPriority(GetCurrentThread(), - THREAD_PRIORITY_ABOVE_NORMAL)) - { + THREAD_PRIORITY_ABOVE_NORMAL)) { errval = GetLastError(); isc__strerror(errval, strbuf, sizeof(strbuf)); FATAL_ERROR(__FILE__, __LINE__, @@ -2714,86 +2319,106 @@ SocketIoThread(LPVOID ThreadContext) { */ while (TRUE) { bSuccess = GetQueuedCompletionStatus(manager->hIoCompletionPort, - &nbytes, (LPDWORD) &sock, - (LPOVERLAPPED *)&lpo, + &nbytes, (LPDWORD)&sock, + (LPWSAOVERLAPPED *)&lpo, INFINITE); if (lpo == NULL) /* Received request to exit */ break; - dev = lpo->dev; - lpo->dev = NULL; + REQUIRE(VALID_SOCKET(sock)); + request = lpo->request_type; errstatus = 0; if (!bSuccess) { - isc_boolean_t dofree = ISC_FALSE; - REQUIRE(VALID_SOCKET(sock)); + isc_result_t isc_result; + /* - * Was this the socket closed under us? + * Did the I/O operation complete? */ - errstatus = GetLastError(); - if (errstatus == WSA_OPERATION_ABORTED) { - LOCK(&sock->lock); - switch (request) { - case SOCKET_RECV: - INSIST(sock->pending_recv > 0); - sock->pending_recv--; - dev->result = ISC_R_CANCELED; -#if XXXMLG_DEBUG - printf("Sending recvdone socket %p %d (%d %d %d %d %d)\n", - sock, sock->fd, sock->pending_recv, sock->pending_send, sock->pending_close, - sock->pending_free, sock->references); -#endif - send_recvdone_event(sock, &dev); + errstatus = WSAGetLastError(); + isc_result = isc__errno2resultx(errstatus, __FILE__, __LINE__); + + LOCK(&sock->lock); + CONSISTENT(sock); + switch (request) { + case SOCKET_RECV: + INSIST(sock->pending_iocp > 0); + sock->pending_iocp--; + INSIST(sock->pending_recv > 0); + sock->pending_recv--; + send_recvdone_abort(sock, isc_result); + if (isc_result == ISC_R_UNEXPECTED) { + UNEXPECTED_ERROR(__FILE__, __LINE__, + "SOCKET_RECV: Windows error code: %d, returning ISC error %d", + errstatus, isc_result); + } + break; - break; - case SOCKET_SEND: - INSIST(sock->pending_send > 0); - sock->pending_send--; - dev->result = ISC_R_CANCELED; -#if XXXMLG_DEBUG - printf("Sending senddone socket %p %d (%d %d %d %d %d)\n", - sock, sock->fd, sock->pending_recv, sock->pending_send, sock->pending_close, - sock->pending_free, sock->references); -#endif - send_senddone_event(sock, &dev); - break; + case SOCKET_SEND: + INSIST(sock->pending_iocp > 0); + sock->pending_iocp--; + INSIST(sock->pending_send > 0); + sock->pending_send--; + if (senddone_is_active(sock, lpo->dev)) { + lpo->dev->result = isc_result; + socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, + "cancelled_send"); + send_senddone_event(sock, &lpo->dev); } - if (sock->pending_recv == 0 && - sock->pending_send == 0 && - sock->pending_close == 0 && - sock->pending_free == 1 && - sock->references == 0) { - sock->pending_free = 0; - dofree = ISC_TRUE; + break; + + case SOCKET_ACCEPT: + INSIST(sock->pending_iocp > 0); + sock->pending_iocp--; + INSIST(sock->pending_accept > 0); + sock->pending_accept--; + if (acceptdone_is_active(sock, lpo->adev)) { + closesocket(lpo->adev->newsocket->fd); + lpo->adev->newsocket->fd = INVALID_SOCKET; + lpo->adev->newsocket->references--; + free_socket(&lpo->adev->newsocket, __LINE__); + lpo->adev->result = isc_result; + socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, + "cancelled_accept"); + send_acceptdone_event(sock, &lpo->adev); } + break; - UNLOCK(&sock->lock); - - if (dofree) { - InterlockedDecrement(&manager->iocp_total); - sock->iocp = 0; - LOCK(&manager->lock); - ISC_LIST_UNLINK(manager->socklist, sock, link); - free_socket(&sock); - if (ISC_LIST_EMPTY(manager->socklist)) - SIGNAL(&manager->shutdown_ok); - UNLOCK(&manager->lock); + case SOCKET_CONNECT: + INSIST(sock->pending_iocp > 0); + sock->pending_iocp--; + INSIST(sock->pending_connect == 1); + sock->pending_connect = 0; + if (connectdone_is_active(sock, lpo->cdev)) { + lpo->cdev->result = isc_result; + socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, + "cancelled_connect"); + send_connectdone_event(sock, &lpo->cdev); } - if (lpo != NULL) - HeapFree(hHeapHandle, 0, lpo); - continue; + break; } + maybe_free_socket(&sock, __LINE__); + + if (lpo != NULL) + HeapFree(hHeapHandle, 0, lpo); + continue; } messagehdr = &lpo->messagehdr; switch (request) { case SOCKET_RECV: - internal_recv(sock, dev, messagehdr, nbytes, errstatus); + internal_recv(sock, nbytes); break; case SOCKET_SEND: - internal_send(sock, dev, messagehdr, nbytes, errstatus); + internal_send(sock, lpo->dev, messagehdr, nbytes, errstatus, lpo); + break; + case SOCKET_ACCEPT: + internal_accept(sock, lpo, errstatus); + break; + case SOCKET_CONNECT: + internal_connect(sock, lpo, errstatus); break; } @@ -2810,150 +2435,12 @@ SocketIoThread(LPVOID ThreadContext) { return ((isc_threadresult_t)0); } -/* - * This is the thread that will loop forever, waiting for an event to - * happen. - * - * When the wait returns something to do, find the signaled event - * and issue the request for the given socket - */ -static isc_threadresult_t WINAPI -event_wait(void *uap) { - events_thread_t *evthread = uap; - isc_socketmgr_t *manager = evthread->manager; - int cc; - int event_errno; - char strbuf[ISC_STRERRORSIZE]; - isc_socket_t *wsock; - int iEvent; - int max_event; - sock_event_list *evlist; - WSANETWORKEVENTS NetworkEvents; - int err; - - REQUIRE(evthread != NULL); - REQUIRE(VALID_MANAGER(manager)); - - /* We need to know the Id of the thread */ - evthread->thread_id = GetCurrentThreadId(); - - evlist = &(evthread->sockev_list); - - /* See if there's anything waiting to add to the event list */ - if (manager->event_written > 0) - process_eventlist(evlist, manager); - - while (!manager->bShutdown) { - do { - - max_event = evlist->max_event; - event_errno = 0; - - WSAResetEvent(evlist->aEventList[0]); - cc = WSAWaitForMultipleEvents(max_event, - evlist->aEventList, FALSE, WSA_INFINITE, - FALSE); - if (cc == WSA_WAIT_FAILED) { - event_errno = WSAGetLastError(); - if (!SOFT_ERROR(event_errno)) { - isc__strerror(event_errno, strbuf, - sizeof(strbuf)); - FATAL_ERROR(__FILE__, __LINE__, - "WSAWaitForMultipleEvents() %s: %s", - isc_msgcat_get(isc_msgcat, - ISC_MSGSET_GENERAL, - ISC_MSG_FAILED, - "failed"), - strbuf); - } - } - - } while (cc < 0 && !manager->bShutdown - && manager->event_written == 0); - - if (manager->bShutdown) - break; - - iEvent = cc - WSA_WAIT_EVENT_0; - - /* - * Add or delete events as requested - */ - if (manager->event_written > 0) - process_eventlist(evlist, manager); - /* - * Stopped to add and delete events on the list - */ - if (iEvent == 0) - continue; - - wsock = evlist->aSockList[iEvent]; - if (wsock == NULL) - continue; - - if (WSAEnumNetworkEvents(wsock->fd, wsock->hEvent, - &NetworkEvents) == SOCKET_ERROR) { - err = WSAGetLastError(); - isc__strerror(err, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "event_wait: WSAEnumNetworkEvents() %s", - strbuf); - /* XXXMPA */ - } - - if(NetworkEvents.lNetworkEvents == 0 ) { - continue; - } - - /* - * Check for FD_CLOSE events first. This takes precedence over - * other possible events as it needs to be handled instead of - * any other event if it happens on the socket. - * The error code found, if any, is fed into the internal_*() - * routines. - */ - if(NetworkEvents.lNetworkEvents & FD_CLOSE) { - event_errno = NetworkEvents.iErrorCode[FD_CLOSE_BIT]; - } else if (NetworkEvents.lNetworkEvents & FD_ACCEPT) { - event_errno = NetworkEvents.iErrorCode[FD_ACCEPT_BIT]; - } else if (NetworkEvents.lNetworkEvents & FD_CONNECT) { - event_errno = NetworkEvents.iErrorCode[FD_CONNECT_BIT]; - } else { - UNEXPECTED_ERROR(__FILE__, __LINE__, - "event_wait: WSAEnumNetworkEvents() " - "unexpected event bit set: %0x", - NetworkEvents.lNetworkEvents); - } - - if (wsock->references > 0 && wsock->pending_close == 0) { - LOCK(&wsock->lock); - if (wsock->listener == 1 && - wsock->pending_accept == 0) { - wsock->pending_accept = 1; - internal_accept(wsock, event_errno); - UNLOCK(&wsock->lock); - } else { - internal_connect(wsock, event_errno); - UNLOCK(&wsock->lock); - eventlist_event_delete(wsock, evlist, manager); - } - } - } - - manager_log(manager, TRACE, - isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, - ISC_MSG_EXITING, "event_wait exiting")); - - return ((isc_threadresult_t)0); -} - /* * Create a new socket manager. */ isc_result_t isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) { isc_socketmgr_t *manager; - events_thread_t *evthread = NULL; isc_result_t result; REQUIRE(managerp != NULL && *managerp == NULL); @@ -2990,33 +2477,10 @@ isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) { iocompletionport_init(manager); /* Create the Completion Ports */ - /* - * Event Wait Thread Initialization - */ - ISC_LIST_INIT(manager->ev_threads); - - /* - * Start up the initial event wait thread. - */ - result = event_thread_create(&evthread, manager); - if (result != ISC_R_SUCCESS) { - isc_condition_destroy(&manager->shutdown_ok); - DESTROYLOCK(&manager->lock); - isc_mem_put(mctx, manager, sizeof(*manager)); - return (result); - } - - manager->prime_alert = evthread->sockev_list.aEventList[0]; - manager->event_written = 0; manager->bShutdown = ISC_FALSE; - manager->totalHandles = 0; manager->totalSockets = 0; - manager->totalHandleRequests = 0; manager->iocp_total = 0; - /* Initialize the event update list */ - ISC_LIST_INIT(manager->event_updates); - *managerp = manager; return (ISC_R_SUCCESS); @@ -3027,10 +2491,10 @@ isc_socketmgr_destroy(isc_socketmgr_t **managerp) { isc_socketmgr_t *manager; int i; isc_mem_t *mctx; - events_thread_t *evthread; #if XXXMLG_DEBUG - fclose(logfile); + if (logfile) + fclose(logfile); #endif /* @@ -3060,35 +2524,15 @@ isc_socketmgr_destroy(isc_socketmgr_t **managerp) { * Here, we need to had some wait code for the completion port * thread. */ - signal_iocompletionport_exit(manager); - manager->bShutdown = ISC_TRUE; - - /* - * Wait for threads to exit. - */ - - /* - * Shut down the event wait threads - */ - evthread = ISC_LIST_HEAD(manager->ev_threads); - while (evthread != NULL) { - WSASetEvent(evthread->sockev_list.aEventList[0]); - if (isc_thread_join(evthread->thread_handle, NULL) != ISC_R_SUCCESS) - UNEXPECTED_ERROR(__FILE__, __LINE__, - "isc_thread_join() for event_wait %s", - isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, - ISC_MSG_FAILED, "failed")); - ISC_LIST_DEQUEUE(manager->ev_threads, evthread, link); - isc_mem_put(manager->mctx, evthread, sizeof(*evthread)); - evthread = ISC_LIST_HEAD(manager->ev_threads); - } + signal_iocompletionport_exit(manager); + manager->bShutdown = ISC_TRUE; /* - * Now the I/O Completion Port Worker Threads + * Wait for threads to exit. */ for (i = 0; i < manager->maxIOCPThreads; i++) { - if (isc_thread_join((isc_thread_t) manager->hIOCPThreads[i], NULL) - != ISC_R_SUCCESS) + if (isc_thread_join((isc_thread_t) manager->hIOCPThreads[i], + NULL) != ISC_R_SUCCESS) UNEXPECTED_ERROR(__FILE__, __LINE__, "isc_thread_join() for Completion Port %s", isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, @@ -3112,11 +2556,36 @@ isc_socketmgr_destroy(isc_socketmgr_t **managerp) { *managerp = NULL; } +static void +queue_receive_event(isc_socket_t *sock, isc_task_t *task, isc_socketevent_t *dev) +{ + isc_task_t *ntask = NULL; + + isc_task_attach(task, &ntask); + dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED; + + /* + * Enqueue the request. + */ + INSIST(!ISC_LINK_LINKED(dev, ev_link)); + ISC_LIST_ENQUEUE(sock->recv_list, dev, ev_link); + + socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, + "queue_receive_event: event %p -> task %p", + dev, ntask); +} + +/* + * Check the pending receive queue, and if we have data pending, give it to this + * caller. If we have none, queue an I/O request. If this caller is not the first + * on the list, then we will just queue this event and return. + * + * Caller must have the socket locked. + */ static isc_result_t socket_recv(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task, unsigned int flags) { - int io_state; int cc = 0; isc_task_t *ntask = NULL; isc_result_t result = ISC_R_SUCCESS; @@ -3124,45 +2593,26 @@ socket_recv(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task, dev->ev_sender = task; - LOCK(&sock->lock); - io_state = startio_recv(sock, dev, &cc, &recv_errno); - - switch (io_state) { - case DOIO_PENDING: /* I/O Started. Nothing to be done */ - case DOIO_SOFT: - /* - * We couldn't read all or part of the request right now, so - * queue it. - * - * Attach to socket and to task - */ - isc_task_attach(task, &ntask); - dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED; - - /* - * Enqueue the request. - */ - ISC_LIST_ENQUEUE(sock->recv_list, dev, ev_link); - - socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, - "socket_recv: event %p -> task %p", - dev, ntask); + if (sock->fd == INVALID_SOCKET) + return (ISC_R_EOF); - if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0) - result = ISC_R_INPROGRESS; - break; + /* + * Queue our event on the list of things to do. Call our function to + * attempt to fill buffers as much as possible, and return done events. + * We are going to lie about our handling of the ISC_SOCKFLAG_IMMEDIATE + * here and tell our caller that we could not satisfy it immediately. + */ + queue_receive_event(sock, task, dev); + if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0) + result = ISC_R_INPROGRESS; - case DOIO_EOF: - dev->result = ISC_R_EOF; - /* fallthrough */ + completeio_recv(sock); - case DOIO_HARD: - case DOIO_SUCCESS: - if ((flags & ISC_SOCKFLAG_IMMEDIATE) == 0) - send_recvdone_event(sock, &dev); - break; - } - UNLOCK(&sock->lock); + /* + * If there are more receivers waiting for data, queue another receive + * here. If the + */ + queue_receive_request(sock); return (result); } @@ -3176,8 +2626,20 @@ isc_socket_recvv(isc_socket_t *sock, isc_bufferlist_t *buflist, isc_socketmgr_t *manager; unsigned int iocount; isc_buffer_t *buffer; + isc_result_t ret; REQUIRE(VALID_SOCKET(sock)); + LOCK(&sock->lock); + CONSISTENT(sock); + + /* + * Make sure that the socket is not closed. XXXMLG change error here? + */ + if (sock->fd == INVALID_SOCKET) { + UNLOCK(&sock->lock); + return (ISC_R_CONNREFUSED); + } + REQUIRE(buflist != NULL); REQUIRE(!ISC_LIST_EMPTY(*buflist)); REQUIRE(task != NULL); @@ -3193,6 +2655,7 @@ isc_socket_recvv(isc_socket_t *sock, isc_bufferlist_t *buflist, dev = allocate_socketevent(sock, ISC_SOCKEVENT_RECVDONE, action, arg); if (dev == NULL) { + UNLOCK(&sock->lock); return (ISC_R_NOMEMORY); } @@ -3218,7 +2681,10 @@ isc_socket_recvv(isc_socket_t *sock, isc_bufferlist_t *buflist, buffer = ISC_LIST_HEAD(*buflist); } - return (socket_recv(sock, dev, task, 0)); + ret = socket_recv(sock, dev, task, 0); + + UNLOCK(&sock->lock); + return (ret); } isc_result_t @@ -3227,8 +2693,19 @@ isc_socket_recv(isc_socket_t *sock, isc_region_t *region, unsigned int minimum, { isc_socketevent_t *dev; isc_socketmgr_t *manager; + isc_result_t ret; REQUIRE(VALID_SOCKET(sock)); + LOCK(&sock->lock); + CONSISTENT(sock); + + /* + * make sure that the socket's not closed + */ + if (sock->fd == INVALID_SOCKET) { + UNLOCK(&sock->lock); + return (ISC_R_CONNREFUSED); + } REQUIRE(action != NULL); manager = sock->manager; @@ -3237,10 +2714,14 @@ isc_socket_recv(isc_socket_t *sock, isc_region_t *region, unsigned int minimum, INSIST(sock->bound); dev = allocate_socketevent(sock, ISC_SOCKEVENT_RECVDONE, action, arg); - if (dev == NULL) + if (dev == NULL) { + UNLOCK(&sock->lock); return (ISC_R_NOMEMORY); + } - return (isc_socket_recv2(sock, region, minimum, task, dev, 0)); + ret = isc_socket_recv2(sock, region, minimum, task, dev, 0); + UNLOCK(&sock->lock); + return (ret); } isc_result_t @@ -3248,8 +2729,22 @@ isc_socket_recv2(isc_socket_t *sock, isc_region_t *region, unsigned int minimum, isc_task_t *task, isc_socketevent_t *event, unsigned int flags) { - event->ev_sender = sock; + isc_result_t ret; + + REQUIRE(VALID_SOCKET(sock)); + LOCK(&sock->lock); + CONSISTENT(sock); + event->result = ISC_R_UNEXPECTED; + event->ev_sender = sock; + /* + * make sure that the socket's not closed + */ + if (sock->fd == INVALID_SOCKET) { + UNLOCK(&sock->lock); + return (ISC_R_CONNREFUSED); + } + ISC_LIST_INIT(event->bufferlist); event->region = *region; event->n = 0; @@ -3268,9 +2763,14 @@ isc_socket_recv2(isc_socket_t *sock, isc_region_t *region, event->minimum = minimum; } - return (socket_recv(sock, event, task, flags)); + ret = socket_recv(sock, event, task, flags); + UNLOCK(&sock->lock); + return (ret); } +/* + * Caller must have the socket locked. + */ static isc_result_t socket_send(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task, isc_sockaddr_t *address, struct in6_pktinfo *pktinfo, @@ -3279,7 +2779,6 @@ socket_send(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task, int io_state; int send_errno = 0; int cc = 0; - isc_boolean_t have_lock = ISC_FALSE; isc_task_t *ntask = NULL; isc_result_t result = ISC_R_SUCCESS; @@ -3301,10 +2800,7 @@ socket_send(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task, dev->pktinfo.ipi6_ifindex = 0; } - LOCK(&sock->lock); - have_lock = ISC_TRUE; io_state = startio_send(sock, dev, &cc, &send_errno); - switch (io_state) { case DOIO_PENDING: /* I/O started. Nothing more to do */ case DOIO_SOFT: @@ -3315,14 +2811,11 @@ socket_send(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task, if ((flags & ISC_SOCKFLAG_NORETRY) == 0) { isc_task_attach(task, &ntask); dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED; - if (!have_lock) { - LOCK(&sock->lock); - have_lock = ISC_TRUE; - } /* * Enqueue the request. */ + INSIST(!ISC_LINK_LINKED(dev, ev_link)); ISC_LIST_ENQUEUE(sock->send_list, dev, ev_link); socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, @@ -3338,9 +2831,6 @@ socket_send(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task, break; } - if (have_lock) - UNLOCK(&sock->lock); - return (result); } @@ -3362,8 +2852,20 @@ isc_socket_sendto(isc_socket_t *sock, isc_region_t *region, { isc_socketevent_t *dev; isc_socketmgr_t *manager; + isc_result_t ret; REQUIRE(VALID_SOCKET(sock)); + + LOCK(&sock->lock); + CONSISTENT(sock); + + /* + * make sure that the socket's not closed + */ + if (sock->fd == INVALID_SOCKET) { + UNLOCK(&sock->lock); + return (ISC_R_CONNREFUSED); + } REQUIRE(region != NULL); REQUIRE(task != NULL); REQUIRE(action != NULL); @@ -3375,11 +2877,14 @@ isc_socket_sendto(isc_socket_t *sock, isc_region_t *region, dev = allocate_socketevent(sock, ISC_SOCKEVENT_SENDDONE, action, arg); if (dev == NULL) { + UNLOCK(&sock->lock); return (ISC_R_NOMEMORY); } dev->region = *region; - return (socket_send(sock, dev, task, address, pktinfo, 0)); + ret = socket_send(sock, dev, task, address, pktinfo, 0); + UNLOCK(&sock->lock); + return (ret); } isc_result_t @@ -3399,8 +2904,20 @@ isc_socket_sendtov(isc_socket_t *sock, isc_bufferlist_t *buflist, isc_socketmgr_t *manager; unsigned int iocount; isc_buffer_t *buffer; + isc_result_t ret; REQUIRE(VALID_SOCKET(sock)); + + LOCK(&sock->lock); + CONSISTENT(sock); + + /* + * make sure that the socket's not closed + */ + if (sock->fd == INVALID_SOCKET) { + UNLOCK(&sock->lock); + return (ISC_R_CONNREFUSED); + } REQUIRE(buflist != NULL); REQUIRE(!ISC_LIST_EMPTY(*buflist)); REQUIRE(task != NULL); @@ -3414,6 +2931,7 @@ isc_socket_sendtov(isc_socket_t *sock, isc_bufferlist_t *buflist, dev = allocate_socketevent(sock, ISC_SOCKEVENT_SENDDONE, action, arg); if (dev == NULL) { + UNLOCK(&sock->lock); return (ISC_R_NOMEMORY); } @@ -3427,7 +2945,9 @@ isc_socket_sendtov(isc_socket_t *sock, isc_bufferlist_t *buflist, buffer = ISC_LIST_HEAD(*buflist); } - return (socket_send(sock, dev, task, address, pktinfo, 0)); + ret = socket_send(sock, dev, task, address, pktinfo, 0); + UNLOCK(&sock->lock); + return (ret); } isc_result_t @@ -3436,18 +2956,33 @@ isc_socket_sendto2(isc_socket_t *sock, isc_region_t *region, isc_sockaddr_t *address, struct in6_pktinfo *pktinfo, isc_socketevent_t *event, unsigned int flags) { + isc_result_t ret; + + REQUIRE(VALID_SOCKET(sock)); + LOCK(&sock->lock); + CONSISTENT(sock); + REQUIRE((flags & ~(ISC_SOCKFLAG_IMMEDIATE|ISC_SOCKFLAG_NORETRY)) == 0); if ((flags & ISC_SOCKFLAG_NORETRY) != 0) REQUIRE(sock->type == isc_sockettype_udp); event->ev_sender = sock; event->result = ISC_R_UNEXPECTED; + /* + * make sure that the socket's not closed + */ + if (sock->fd == INVALID_SOCKET) { + UNLOCK(&sock->lock); + return (ISC_R_CONNREFUSED); + } ISC_LIST_INIT(event->bufferlist); event->region = *region; event->n = 0; event->offset = 0; event->attributes = 0; - return (socket_send(sock, event, task, address, pktinfo, flags)); + ret = socket_send(sock, event, task, address, pktinfo, flags); + UNLOCK(&sock->lock); + return (ret); } isc_result_t @@ -3457,7 +2992,17 @@ isc_socket_bind(isc_socket_t *sock, isc_sockaddr_t *sockaddr, char strbuf[ISC_STRERRORSIZE]; int on = 1; + REQUIRE(VALID_SOCKET(sock)); LOCK(&sock->lock); + CONSISTENT(sock); + + /* + * make sure that the socket's not closed + */ + if (sock->fd == INVALID_SOCKET) { + UNLOCK(&sock->lock); + return (ISC_R_CONNREFUSED); + } INSIST(!sock->bound); @@ -3528,11 +3073,19 @@ isc_socket_filter(isc_socket_t *sock, const char *filter) { isc_result_t isc_socket_listen(isc_socket_t *sock, unsigned int backlog) { char strbuf[ISC_STRERRORSIZE]; - isc_result_t retstat; REQUIRE(VALID_SOCKET(sock)); LOCK(&sock->lock); + CONSISTENT(sock); + + /* + * make sure that the socket's not closed + */ + if (sock->fd == INVALID_SOCKET) { + UNLOCK(&sock->lock); + return (ISC_R_CONNREFUSED); + } REQUIRE(!sock->listener); REQUIRE(sock->bound); @@ -3550,20 +3103,10 @@ isc_socket_listen(isc_socket_t *sock, unsigned int backlog) { return (ISC_R_UNEXPECTED); } + socket_log(__LINE__, sock, NULL, TRACE, + isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND, "listening"); sock->listener = 1; - - /* Add the socket to the list of events to accept */ - retstat = socket_event_add(sock, FD_CLOSE); - if (retstat != ISC_R_SUCCESS) { - UNLOCK(&sock->lock); - if (retstat != ISC_R_NOSPACE) { - isc__strerror(WSAGetLastError(), strbuf, - sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "isc_socket_listen: socket_event_add: %s", strbuf); - } - return (retstat); - } + _set_state(sock, SOCK_LISTEN); UNLOCK(&sock->lock); return (ISC_R_SUCCESS); @@ -3576,17 +3119,28 @@ isc_result_t isc_socket_accept(isc_socket_t *sock, isc_task_t *task, isc_taskaction_t action, const void *arg) { - isc_socket_newconnev_t *dev; + isc_socket_newconnev_t *adev; isc_socketmgr_t *manager; isc_task_t *ntask = NULL; isc_socket_t *nsock; isc_result_t result; + IoCompletionInfo *lpo; REQUIRE(VALID_SOCKET(sock)); + manager = sock->manager; REQUIRE(VALID_MANAGER(manager)); LOCK(&sock->lock); + CONSISTENT(sock); + + /* + * make sure that the socket's not closed + */ + if (sock->fd == INVALID_SOCKET) { + UNLOCK(&sock->lock); + return (ISC_R_CONNREFUSED); + } REQUIRE(sock->listener); @@ -3595,55 +3149,78 @@ isc_socket_accept(isc_socket_t *sock, * this event to. Just before the actual event is delivered the * actual ev_sender will be touched up to be the socket. */ - dev = (isc_socket_newconnev_t *) + adev = (isc_socket_newconnev_t *) isc_event_allocate(manager->mctx, task, ISC_SOCKEVENT_NEWCONN, - action, arg, sizeof(*dev)); - if (dev == NULL) { + action, arg, sizeof(*adev)); + if (adev == NULL) { UNLOCK(&sock->lock); return (ISC_R_NOMEMORY); } - ISC_LINK_INIT(dev, ev_link); + ISC_LINK_INIT(adev, ev_link); result = allocate_socket(manager, sock->type, &nsock); if (result != ISC_R_SUCCESS) { - isc_event_free((isc_event_t **)&dev); + isc_event_free((isc_event_t **)&adev); UNLOCK(&sock->lock); return (result); } + /* + * AcceptEx() requires we pass in a socket. + */ + nsock->fd = socket(sock->pf, SOCK_STREAM, IPPROTO_TCP); + if (nsock->fd == INVALID_SOCKET) { + free_socket(&nsock, __LINE__); + isc_event_free((isc_event_t **)&adev); + UNLOCK(&sock->lock); + return (ISC_R_FAILURE); // XXXMLG need real error message + } + /* * Attach to socket and to task. */ isc_task_attach(task, &ntask); nsock->references++; - dev->ev_sender = ntask; - dev->newsocket = nsock; + adev->ev_sender = ntask; + adev->newsocket = nsock; + _set_state(nsock, SOCK_ACCEPT); /* - * Wait for connects. + * Queue io completion for an accept(). */ - if (ISC_LIST_EMPTY(sock->accept_list) && - WSAEventSelect(sock->fd, sock->hEvent, FD_ACCEPT | FD_CLOSE) != 0) { - char strbuf[ISC_STRERRORSIZE]; - int stat; - const char *msg; - stat = WSAGetLastError(); - isc__strerror(stat, strbuf, sizeof(strbuf)); - msg = isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, - ISC_MSG_FAILED, "failed"); - UNEXPECTED_ERROR(__FILE__, __LINE__, "WSAEventSelect: %s: %s", - msg, strbuf); - isc_task_detach(&ntask); - isc_socket_detach(&nsock); - isc_event_free((isc_event_t **)&dev); - UNLOCK(&sock->lock); - return (ISC_R_UNEXPECTED); - } + lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle, + HEAP_ZERO_MEMORY, + sizeof(IoCompletionInfo)); + RUNTIME_CHECK(lpo != NULL); + lpo->acceptbuffer = (void *)HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, + (sizeof(SOCKADDR) + 16) * 2); + RUNTIME_CHECK(lpo->acceptbuffer != NULL); + + lpo->adev = adev; + lpo->request_type = SOCKET_ACCEPT; + + ISCAcceptEx(sock->fd, + nsock->fd, /* Accepted Socket */ + lpo->acceptbuffer, /* Buffer for initial Recv */ + 0, /* Length of Buffer */ + sizeof(SOCKADDR) + 16, /* Local address length + 16 */ + sizeof(SOCKADDR) + 16, /* Remote address lengh + 16 */ + (LPDWORD)&lpo->received_bytes, /* Bytes Recved */ + (LPOVERLAPPED)lpo /* Overlapped structure */ + ); + iocompletionport_update(nsock); + + socket_log(__LINE__, sock, NULL, TRACE, + isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND, + "accepting for nsock %p fd %d", nsock, nsock->fd); + /* * Enqueue the event */ - ISC_LIST_ENQUEUE(sock->accept_list, dev, ev_link); + ISC_LIST_ENQUEUE(sock->accept_list, adev, ev_link); + sock->pending_accept++; + sock->pending_iocp++; UNLOCK(&sock->lock); return (ISC_R_SUCCESS); @@ -3653,13 +3230,10 @@ isc_result_t isc_socket_connect(isc_socket_t *sock, isc_sockaddr_t *addr, isc_task_t *task, isc_taskaction_t action, const void *arg) { - isc_socket_connev_t *dev; + isc_socket_connev_t *cdev; isc_task_t *ntask = NULL; isc_socketmgr_t *manager; - int cc; - int retstat; - int errval; - char strbuf[ISC_STRERRORSIZE]; + IoCompletionInfo *lpo; REQUIRE(VALID_SOCKET(sock)); REQUIRE(addr != NULL); @@ -3674,104 +3248,59 @@ isc_socket_connect(isc_socket_t *sock, isc_sockaddr_t *addr, return (ISC_R_MULTICAST); LOCK(&sock->lock); - - REQUIRE(!sock->connecting); - - dev = (isc_socket_connev_t *)isc_event_allocate(manager->mctx, sock, - ISC_SOCKEVENT_CONNECT, - action, arg, - sizeof(*dev)); - if (dev == NULL) { - UNLOCK(&sock->lock); - return (ISC_R_NOMEMORY); - } - ISC_LINK_INIT(dev, ev_link); + CONSISTENT(sock); /* - * Try to do the connect right away, as there can be only one - * outstanding, and it might happen to complete. + * make sure that the socket's not closed */ - sock->address = *addr; - cc = connect(sock->fd, &addr->type.sa, addr->length); - if (cc < 0) { - errval = WSAGetLastError(); - if (SOFT_ERROR(errval) || errval == WSAEINPROGRESS) - goto queue; - - switch (errval) { -#define ERROR_MATCH(a, b) case a: dev->result = b; goto err_exit; - ERROR_MATCH(WSAEACCES, ISC_R_NOPERM); - ERROR_MATCH(WSAEADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL); - ERROR_MATCH(WSAEAFNOSUPPORT, ISC_R_ADDRNOTAVAIL); - ERROR_MATCH(WSAECONNREFUSED, ISC_R_CONNREFUSED); - ERROR_MATCH(WSAEHOSTUNREACH, ISC_R_HOSTUNREACH); - ERROR_MATCH(WSAEHOSTDOWN, ISC_R_HOSTUNREACH); - ERROR_MATCH(WSAENETUNREACH, ISC_R_NETUNREACH); - ERROR_MATCH(WSAENOBUFS, ISC_R_NORESOURCES); - ERROR_MATCH(EPERM, ISC_R_HOSTUNREACH); - ERROR_MATCH(EPIPE, ISC_R_NOTCONNECTED); -#undef ERROR_MATCH - } - - sock->connected = 0; - - isc__strerror(errval, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, "%d/%s", errval, strbuf); - + if (sock->fd == INVALID_SOCKET) { UNLOCK(&sock->lock); - isc_event_free((isc_event_t **)&dev); - return (ISC_R_UNEXPECTED); + return (ISC_R_CONNREFUSED); + } - err_exit: - sock->connected = 0; - isc_task_send(task, (isc_event_t **)&dev); + REQUIRE(!sock->pending_connect); + cdev = (isc_socket_connev_t *)isc_event_allocate(manager->mctx, sock, + ISC_SOCKEVENT_CONNECT, + action, arg, + sizeof(*cdev)); + if (cdev == NULL) { UNLOCK(&sock->lock); - return (ISC_R_SUCCESS); + return (ISC_R_NOMEMORY); } + ISC_LINK_INIT(cdev, ev_link); /* - * If connect completed, fire off the done event. + * Queue io completion for an accept(). */ - if (cc == 0) { - sock->connected = 1; - sock->bound = 1; - dev->result = ISC_R_SUCCESS; - isc_task_send(task, (isc_event_t **)&dev); - - UNLOCK(&sock->lock); - return (ISC_R_SUCCESS); - } + lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle, + HEAP_ZERO_MEMORY, + sizeof(IoCompletionInfo)); + lpo->cdev = cdev; + lpo->request_type = SOCKET_CONNECT; - queue: + sock->address = *addr; + ISCConnectEx(sock->fd, &addr->type.sa, addr->length, + NULL, 0, NULL, (LPOVERLAPPED)lpo); /* * Attach to task. */ isc_task_attach(task, &ntask); + cdev->ev_sender = ntask; - sock->connecting = 1; - - dev->ev_sender = ntask; + sock->pending_connect = 1; + _set_state(sock, SOCK_CONNECT); /* * Enqueue the request. */ - sock->connect_ev = dev; - /* Add the socket to the list of events to connect */ - retstat = socket_event_add(sock, FD_CONNECT | FD_CLOSE); - if (retstat != ISC_R_SUCCESS) { - UNLOCK(&sock->lock); - if (retstat != ISC_R_NOSPACE) { - isc__strerror(WSAGetLastError(), strbuf, - sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "isc_socket_connect: socket_event_add: %s", strbuf); - } - return (retstat); - } + sock->connect_ev = cdev; + sock->pending_iocp++; + CONSISTENT(sock); UNLOCK(&sock->lock); + return (ISC_R_SUCCESS); } @@ -3783,6 +3312,15 @@ isc_socket_getpeername(isc_socket_t *sock, isc_sockaddr_t *addressp) { REQUIRE(addressp != NULL); LOCK(&sock->lock); + CONSISTENT(sock); + + /* + * make sure that the socket's not closed + */ + if (sock->fd == INVALID_SOCKET) { + UNLOCK(&sock->lock); + return (ISC_R_CONNREFUSED); + } if (sock->connected) { *addressp = sock->address; @@ -3806,6 +3344,15 @@ isc_socket_getsockname(isc_socket_t *sock, isc_sockaddr_t *addressp) { REQUIRE(addressp != NULL); LOCK(&sock->lock); + CONSISTENT(sock); + + /* + * make sure that the socket's not closed + */ + if (sock->fd == INVALID_SOCKET) { + UNLOCK(&sock->lock); + return (ISC_R_CONNREFUSED); + } if (!sock->bound) { result = ISC_R_NOTBOUND; @@ -3847,18 +3394,16 @@ isc_socket_cancel(isc_socket_t *sock, isc_task_t *task, unsigned int how) { return; LOCK(&sock->lock); - socket_close(sock); - if (sock->iocp == 1) - sock->pending_free = 1; - UNLOCK(&sock->lock); + CONSISTENT(sock); + + /* + * make sure that the socket's not closed + */ + if (sock->fd == INVALID_SOCKET) { + UNLOCK(&sock->lock); + return; + } -/* - * Temporarily disable this. Windows cannot cancel a single - * I/O in its current popular varieties, but leave this code - * here for when/if we belive we can rely on doing so. - * XXXMLG - */ -#if 0 /* * All of these do the same thing, more or less. * Each will: @@ -3869,14 +3414,13 @@ isc_socket_cancel(isc_socket_t *sock, isc_task_t *task, unsigned int how) { * its done event with status of "ISC_R_CANCELED". * o Reset any state needed. */ - if (((how & ISC_SOCKCANCEL_RECV) == ISC_SOCKCANCEL_RECV) - && !ISC_LIST_EMPTY(sock->recv_list)) { + + if ((how & ISC_SOCKCANCEL_RECV) == ISC_SOCKCANCEL_RECV) { isc_socketevent_t *dev; isc_socketevent_t *next; isc_task_t *current_task; dev = ISC_LIST_HEAD(sock->recv_list); - while (dev != NULL) { current_task = dev->ev_sender; next = ISC_LIST_NEXT(dev, ev_link); @@ -3887,9 +3431,9 @@ isc_socket_cancel(isc_socket_t *sock, isc_task_t *task, unsigned int how) { dev = next; } } + how &= ~ISC_SOCKCANCEL_RECV; - if (((how & ISC_SOCKCANCEL_SEND) == ISC_SOCKCANCEL_SEND) - && !ISC_LIST_EMPTY(sock->send_list)) { + if ((how & ISC_SOCKCANCEL_SEND) == ISC_SOCKCANCEL_SEND) { isc_socketevent_t *dev; isc_socketevent_t *next; isc_task_t *current_task; @@ -3906,6 +3450,7 @@ isc_socket_cancel(isc_socket_t *sock, isc_task_t *task, unsigned int how) { dev = next; } } + how &= ~ISC_SOCKCANCEL_SEND; if (((how & ISC_SOCKCANCEL_ACCEPT) == ISC_SOCKCANCEL_ACCEPT) && !ISC_LIST_EMPTY(sock->accept_list)) { @@ -3914,40 +3459,25 @@ isc_socket_cancel(isc_socket_t *sock, isc_task_t *task, unsigned int how) { isc_task_t *current_task; dev = ISC_LIST_HEAD(sock->accept_list); - while (dev != NULL) { current_task = dev->ev_sender; next = ISC_LIST_NEXT(dev, ev_link); if ((task == NULL) || (task == current_task)) { - ISC_LIST_UNLINK(sock->accept_list, dev, - ev_link); - dev->newsocket->references--; - free_socket(&dev->newsocket); + closesocket(dev->newsocket->fd); + dev->newsocket->fd = INVALID_SOCKET; + free_socket(&dev->newsocket, __LINE__); dev->result = ISC_R_CANCELED; - dev->ev_sender = sock; - isc_task_sendanddetach(¤t_task, - (isc_event_t **)&dev); + send_acceptdone_event(sock, &dev); } dev = next; } - if (sock->hEvent != NULL && - WSAEventSelect(sock->fd, sock->hEvent, FD_CLOSE) != 0) { - char strbuf[ISC_STRERRORSIZE]; - int stat; - const char *msg; - stat = WSAGetLastError(); - isc__strerror(stat, strbuf, sizeof(strbuf)); - msg = isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, - ISC_MSG_FAILED, "failed"); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "WSAEventSelect: %s: %s", msg, strbuf); - } } + how &= ~ISC_SOCKCANCEL_ACCEPT; /* * Connecting is not a list. @@ -3957,38 +3487,64 @@ isc_socket_cancel(isc_socket_t *sock, isc_task_t *task, unsigned int how) { isc_socket_connev_t *dev; isc_task_t *current_task; - INSIST(sock->connecting); - sock->connecting = 0; + INSIST(sock->pending_connect); dev = sock->connect_ev; current_task = dev->ev_sender; if ((task == NULL) || (task == current_task)) { - sock->connect_ev = NULL; + closesocket(sock->fd); + sock->fd = INVALID_SOCKET; + _set_state(sock, SOCK_CLOSED); + sock->connect_ev = NULL; dev->result = ISC_R_CANCELED; - dev->ev_sender = sock; - isc_task_sendanddetach(¤t_task, - (isc_event_t **)&dev); + send_connectdone_event(sock, &dev); } } + how &= ~ISC_SOCKCANCEL_CONNECT; - UNLOCK(&sock->lock); -#endif + maybe_free_socket(&sock, __LINE__); } isc_sockettype_t isc_socket_gettype(isc_socket_t *sock) { + isc_sockettype_t type; + REQUIRE(VALID_SOCKET(sock)); - return (sock->type); + LOCK(&sock->lock); + + /* + * make sure that the socket's not closed + */ + if (sock->fd == INVALID_SOCKET) { + UNLOCK(&sock->lock); + return (ISC_R_CONNREFUSED); + } + + type = sock->type; + UNLOCK(&sock->lock); + return (type); } isc_boolean_t isc_socket_isbound(isc_socket_t *sock) { isc_boolean_t val; + REQUIRE(VALID_SOCKET(sock)); + LOCK(&sock->lock); + CONSISTENT(sock); + + /* + * make sure that the socket's not closed + */ + if (sock->fd == INVALID_SOCKET) { + UNLOCK(&sock->lock); + return (ISC_FALSE); + } + val = ((sock->bound) ? ISC_TRUE : ISC_FALSE); UNLOCK(&sock->lock); diff --git a/lib/isc/win32/time.c b/lib/isc/win32/time.c index 7fa77028f67..1100ba78276 100644 --- a/lib/isc/win32/time.c +++ b/lib/isc/win32/time.c @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: time.c,v 1.38 2004/03/16 05:52:22 marka Exp $ */ +/* $Id: time.c,v 1.38.850.1 2008/09/04 05:47:09 each Exp $ */ #include @@ -65,7 +65,7 @@ isc_interval_set(isc_interval_t *i, unsigned int seconds, REQUIRE(nanoseconds < NS_PER_S); i->interval = (LONGLONG)seconds * INTERVALS_PER_S - + nanoseconds / NS_INTERVAL; + + (nanoseconds + NS_INTERVAL - 1) / NS_INTERVAL; } isc_boolean_t