]> git.ipfire.org Git - thirdparty/unbound.git/commitdiff
DNSoverQUIC (#871)
authorWouter Wijngaards <wcawijngaards@users.noreply.github.com>
Wed, 9 Oct 2024 08:32:03 +0000 (10:32 +0200)
committerGitHub <noreply@github.com>
Wed, 9 Oct 2024 08:32:03 +0000 (10:32 +0200)
* - dnsoverquic, configure --with-libngtcp2 option.

* - dnsoverquic, create comm_point for doq and receive cmsg local address.

* - dnsoverquic, less obtrusive debug.

* - dnsoverquic, log and fix local port number. Neater subroutines and ifdefs.

* - dnsoverquic, add testcode/doqclient.

* - dnsoverquic, review fixes on doqclient.

* - dnsoverquic, fix unit test testbound link.

* - dnsoverquic, parse query in doqclient.

* - dnsoverquic, link with libngtcp2_crypto_openssl and code for doqclient.

* - dnsoverquic, random routine for doqclient and fix ngaddr allocation, and
  check ub_initstate return.

* - dnsoverquic, fix doqclient free of allocated ngaddr addresses.

* - dnsoverquic, enable debug output with -v for doqclient.

* - dnsoverquic, create and set TLS object and TLS context in doqclient.

* - dnsoverquic, work on quic tls context in doqclient.

* - dnsoverquic, set default dnsoverquic port to the standardized 853 port.

* - dnsoverquic, remove debug comment.

* - dnsoverquic, dns-over-quic quic-port: 853 config option.

* - dnsoverquic, log type of interface created at start of unbound.

* - dnsoverquic, log type of no tls https as https when interface is created.

* - dnsoverquic, setup client quic tls methods.

* - dnsoverquic, event work in doqclient.

* - dnsoverquic, explain in documentation that QUIC uses UDP.

* - dnsoverquic, make doqclient exit.

* - dnsoverquic, doqclient cleanup run routine.

* - dnsoverquic, doqclient code nicer.

* - dnsoverquic, doqclient read and timer.

* - dnsoverquic, doqclient write work.

* - dnsoverquic, review fixes.

* - dnsoverquic, detect openssl quic support at configure time.

* - dnsoverquic, do not allow QUIC on port 53 to stop confusion of DoQ and DNS.

* - dnsoverquic, in doqclient, when idle close is returned, drop the connection
  without calling ngtcp2_conn_write_connection_close.

* - dnsoverquic, in doqclient, log callbacks.

* - dnsoverquic, in doqclient add extend_max_local_streams_bidi callback.

* - dnsoverquic, in doqclient add client query lists.

* - dnsoverquic, in doqclient, code cleaner, log text nicer.

* - dnsoverquic, in doqclient, work on write_streams.

* - dnsoverquic, in doqclient, use signed int for stream_id, work on the
  ngtcp2_recv_stream_data callback.

* - dnsoverquic, in doqclient, print result and fixes for recv data.

* - dnsoverquic, in doqclient, add the event callbacks to fptr wlist.

* - dnsoverquic, in doqclient, when already expired, use zero timeout timer.

* - dnsoverquic, in doqclient, ignore unused return codes from
  ngtcp2_conn_writev_stream.

* - dnsoverquic, add doqclient event functions to the unbound-dnstap-socket
  test tool for linking.

* - dnsoverquic, in doqclient, fix multiple operands for the commandline.
  neater dns message output.

* - dnsoverquic, in doqclient, store packet when write blocks and try later.

* - dnsoverquic, in doqclient, limit number of packets and number of bytes sent.

* - dnsoverquic, in doqclient, better size estimate for outgoing packet.

* - dnsoverquic, in doqclient, fix that already written next packet is not
  counted for data length to send.

* - dnsoverquic, in doqclient, early data transmission and session resumption.

* - dnsoverquic, send version negotiation packet.

* - dnsoverquic, send retry and accept the connection.

* - dnsoverquic, storage structures.

* - dnsoverquic, doq connection setup.

* - dnsoverquic, neater code layout for new conn. Fix verbosity of log print.

* - dnsoverquic, doq conn callback functions.

* - dnsoverquic, doq_fill_rand routine in header file.

* - dnsoverquic, keep track of connection ids.

* - dnsoverquic, get_new_connection_id callback.

* - dnsoverquic, create doq_conid tree.

* - dnsoverquic, settings for server connection.

* - dnsoverquic, tls context.

* - dnsoverquic, sendmsg error handling.

* - dnsoverquic, neat code.

* - dnsoverquic, track doq connection last error.

* - dnsoverquic, neater packet address parameters.

* - dnsoverquic, fix uninitialized bytes in msg control in doq sendmsg, and
  fix tree cleanup of conid tree.

* - dnsoverquic, better usage text for doqclient.

* - dnsoverquic, neat code.

* - dnsoverquic, connection receive packet handling.

* - dnsoverquic, debug output.

* - dnsoverquic, debug switched meaning of scid and dcid gives
  ERR_TRANSPORT_PARAM.

* - dnsoverquic, remove debug output.

* - dnsoverquic, connection delete routine and error from connection read in
  more detail with less clutter.

* - dnsoverquic, write to stream, and receive stream data, log packet.

* - dnsoverquic, alpn set up.

* - dnsoverquic, connection close.

* - dnsoverquic, doq_table and locks.

* - dnsoverquic, fix tests.

* - dnsoverquic, better locking.

* - dnsoverquic, doq_stream.

* - dnsoverquic, remove compile warning.

* - dnsoverquic, doq_stream receive data.

* - dnsoverquic, fixes for locks and keep length bytes allocated.

* - dnsoverquic, lock connection on initial insertion.

* - dnsoverquic, reply information, and reply buffer.

* - dnsoverquic, reply info from cache, local-zone and recursion lookups.

* - dnsoverquic, spelling in comment about buffer storage.

* - dnsoverquic, stream write list and doqclient fixes to exit and printout.

* - dnsoverquic, doqclient -q option for short printout.

* - dnsoverquic, unit test with local data reply.

* - dnsoverquic, write connection and write event is set.

* - dnsoverquic, neater logging for write event connection stream writes.

* - dnsoverquic, log remote connection when the streams are written for it.

* - dnsoverquic, better threaded use, threads can write to doq connections at
  the same time.

* - dnsoverquic, unit test for the calculation of connection size with a query.

* - dnsoverquic, use less memory per connection.

* - dnsoverquic, remove unit test output.

* - dnsoverquic, add MSG_DONTWAIT so that there is no mistakenly blocking
  socket operations.

* - dnsoverquic, doqclient logs address on connection failures.

* - dnsoverquic, compat code for clock get time routine.

* - dnsoverquic, use skip_test for doq unit test.

* - dnsoverquic, fixes for proxyprotocol, use remote_addr and set proxyprotocol
  disabled on the doq connection.

* - dnsoverquic, doqclient sets log identity to its name, instead of "unbound".

* - dnsoverquic, handle blocked udp packet writes.

* - dnsoverquic, fix function documentation for verbose_print_addr from
  services/listen_dnsport.c.

* - dnsoverquic, fix doq_conn lock protection. The checklock allows to set
  the output file name, and doqclient uses that. Print place of lock_protect.

* - dnsoverquic, neater buffer clear when write of blocked packet fails, make
  sure that memory area does not overlap for blocked packet addresses when
  write of blocked packet fails, and size blocked packet buffer to the pkt buf.

* - dnsoverquic, move lock check after the test to test script in doq test.

* - dnsoverquic, the doq test uses valgrind when enabled.

* - dnsoverquic, git ignore the doqclient test.

* - dnsoverquic, limit the buffer for packets to max packet size with some more.

* - dnsoverquic, spelling fix.

* - dnsoverquic, timer work, structure and adds and deletes.

* - dnsoverquic, timer_tree uses table.lock.

* - dnsoverquic, fix timer tree remove and spelling in header file comment.

* - dnsoverquic, fix testbound for timer compare function linkage.

* - dnsoverquic, timer set add debug output.

* - dnsoverquic, doq_conn_check_timer function.

* - dnsoverquic, doq_done_setup_timer_and_write function.

* - dnsoverquic, fix that doq conn is not deleted whilst editing write and timer.

* - dnsoverquic, Fix #861 make ERROR netevent.h:1073:32: error: field 'blocked_pkt_pi' has incomplete type

* - dnsoverquic, timer element has timeout setup when socket callback complete.

* - dnsoverquic, fix unit test compile.

* - dnsoverquic, timer callback routine, handle timeout and close and delete the
  connection if necessary.

* - dnsoverquic, timer pickup stops at current time.

* - dnsoverquic, timer comparable with the event base time.

* - dnsoverquic, erase marked time when timer disabled.

* - dnsoverquic, fix timer to set correctly and lock popped write connection
  early, before it is modified.

* - dnsoverquic, fix to unlock connection lock when it is unlinked and deleted.

* - dnsoverquic, fix to unlock connection lock when it is deleted because it is
  a duplicate connection.

* - dnsoverquic, fix that doq timer is not disabled when not set.

* - dnsoverquic, quic-size: 8m maximum number of bytes for QUIC buffers.

* - dnsoverquic, flex and bison.

* - dnsoverquic, quic-size turn away new connections when full.

* - dnsoverquic, doqclient outputs stream reset information.

* - dnsoverquic, detect stream close and reset.

* - dnsoverquic, free stream buffers when data is acked and stream is closed.

* - dnsoverquic, delete stream when closed. Unlink it. Allow stream_id 4 as first.

* - dnsoverquic, stats output for mem.quic and num.query.quic.

* - dnsoverquic, review fix.

* - dnsoverquic, fix when compiled without ngtcp2.

* - dnsoverquic, fix to detect ngtcp2_crypto_quictls for openssl crypto, after
  change in libngtcp2.

* - dnsoverquic, fix for newer ngtcp2 versions. detect ngtcp2_ccerr_default,
  ngtcp2/ngtcp2_crypto_quictls.h, struct ngtcp2_pkt_hd.tokenlen,
  struct ngtcp2_settings.tokenlen and struct ngtcp2_version_cid.

* - dnsoverquic, fix for newer ngtcp2 version, detect number of arguments for
  ngtcp2_conn_shutdown_stream.

* - dnsoverquic, fix for newer ngtcp2.

* - dnsoverquic, use the functions from util/timeval_func.h.

* - dnsoverquic, fix in doqclient only write transport parameters once.

* - dnsoverquic, debug log output removed.

* - dnsoverquic, fix in doqclient to work with renamed NGTCP2_CC_ALGO_BBR_V2
  from ngtcp2.

* - dnsoverquic, fix to check in doq_server_socket_create that tls-service-key
  and tls-service-pem have a value.

* - dnsoverquic, fix to error when doq_server_socket_create fails.

* - dnsoverquic, improve linebreaks in configparser additions.

* - dnsoverquic, fix port from interface pickup after main branch change.

* Fix getting user data from SSL, fix calloc warning.

* Fix fwrite return value check in doqclient

* - timeval_substruct from timeval_func.h
- lock_protect also for HAVE_NGTCP2_CCERR_DEFAULT
- fix doq logging for inet_ntop failures

* - memset for consistency
- no value returned from msghdr_get_ecn when S_SPLINT_S is defined

* - dnsoverquic, rerun autoconf.

---------

Co-authored-by: Yorgos Thessalonikefs <yorgos@nlnetlabs.nl>
44 files changed:
.gitignore
Makefile.in
config.h.in
configure
configure.ac
daemon/daemon.c
daemon/daemon.h
daemon/remote.c
daemon/stats.c
daemon/worker.c
dnstap/unbound-dnstap-socket.c
doc/example.conf.in
doc/unbound-control.8.in
doc/unbound.conf.5.in
libunbound/libworker.c
libunbound/unbound.h
services/listen_dnsport.c
services/listen_dnsport.h
smallapp/unbound-control.c
smallapp/worker_cb.c
testcode/checklocks.c
testcode/checklocks.h
testcode/doqclient.c [new file with mode: 0644]
testcode/fake_event.c
testcode/testbound.c
testcode/unitdoq.c [new file with mode: 0644]
testcode/unitmain.c
testcode/unitmain.h
testdata/doq_downstream.tdir/doq_downstream.conf [new file with mode: 0644]
testdata/doq_downstream.tdir/doq_downstream.dsc [new file with mode: 0644]
testdata/doq_downstream.tdir/doq_downstream.post [new file with mode: 0644]
testdata/doq_downstream.tdir/doq_downstream.pre [new file with mode: 0644]
testdata/doq_downstream.tdir/doq_downstream.test [new file with mode: 0644]
testdata/doq_downstream.tdir/doq_downstream.testns [new file with mode: 0644]
testdata/doq_downstream.tdir/unbound_server.key [new file with mode: 0644]
testdata/doq_downstream.tdir/unbound_server.pem [new file with mode: 0644]
util/config_file.c
util/config_file.h
util/configlexer.lex
util/configparser.y
util/fptr_wlist.c
util/locks.h
util/netevent.c
util/netevent.h

index 2d67173eb29969e1ac4ff4a63c1c6c38ec4252c2..b8f38989b379c156234a2028fc49729f3eedd8ae 100644 (file)
@@ -36,6 +36,7 @@
 /asynclook
 /delayer
 /dohclient
+/doqclient
 /lock-verify
 /memstats
 /perf
index 672435e01e9f02aa65a7ceb7f10633cffee780eb..c262250ca2c58edb3f9c50cd15a96c2e3f92f48d 100644 (file)
@@ -179,11 +179,11 @@ testcode/unitlruhash.c testcode/unitmain.c testcode/unitmsgparse.c \
 testcode/unitneg.c testcode/unitregional.c testcode/unitslabhash.c \
 testcode/unitverify.c testcode/readhex.c testcode/testpkts.c testcode/unitldns.c \
 testcode/unitecs.c testcode/unitauth.c testcode/unitzonemd.c \
-testcode/unittcpreuse.c
+testcode/unittcpreuse.c testcode/unitdoq.c
 UNITTEST_OBJ=unitanchor.lo unitdname.lo unitlruhash.lo unitmain.lo \
 unitmsgparse.lo unitneg.lo unitregional.lo unitslabhash.lo unitverify.lo \
 readhex.lo testpkts.lo unitldns.lo unitecs.lo unitauth.lo unitzonemd.lo \
-unittcpreuse.lo
+unittcpreuse.lo unitdoq.lo
 UNITTEST_OBJ_LINK=$(UNITTEST_OBJ) worker_cb.lo $(COMMON_OBJ) $(SLDNS_OBJ) \
 $(COMPAT_OBJ)
 DAEMON_SRC=daemon/acl_list.c daemon/cachedump.c daemon/daemon.c \
@@ -242,6 +242,10 @@ DOHCLIENT_SRC=testcode/dohclient.c
 DOHCLIENT_OBJ=dohclient.lo
 DOHCLIENT_OBJ_LINK=$(DOHCLIENT_OBJ) worker_cb.lo $(COMMON_OBJ) $(COMPAT_OBJ) \
 $(SLDNS_OBJ)
+DOQCLIENT_SRC=testcode/doqclient.c
+DOQCLIENT_OBJ=doqclient.lo
+DOQCLIENT_OBJ_LINK=$(DOQCLIENT_OBJ) $(COMMON_OBJ) $(COMPAT_OBJ) \
+$(SLDNS_OBJ)
 PERF_SRC=testcode/perf.c
 PERF_OBJ=perf.lo
 PERF_OBJ_LINK=$(PERF_OBJ) worker_cb.lo $(COMMON_OBJ) $(COMPAT_OBJ) $(SLDNS_OBJ)
@@ -288,7 +292,7 @@ ALL_SRC=$(COMMON_SRC) $(UNITTEST_SRC) $(DAEMON_SRC) \
        $(CONTROL_SRC) $(UBANCHOR_SRC) $(PETAL_SRC) $(DNSTAP_SOCKET_SRC)\
        $(PYTHONMOD_SRC) $(PYUNBOUND_SRC) $(WIN_DAEMON_THE_SRC) \
        $(SVCINST_SRC) $(SVCUNINST_SRC) $(ANCHORUPD_SRC) $(SLDNS_SRC) \
-       $(DOHCLIENT_SRC) $(READZONE_SRC)
+       $(DOHCLIENT_SRC) $(DOQCLIENT_SRC) $(READZONE_SRC)
 
 ALL_OBJ=$(COMMON_OBJ) $(UNITTEST_OBJ) $(DAEMON_OBJ) \
        $(TESTBOUND_OBJ) $(LOCKVERIFY_OBJ) $(PKTVIEW_OBJ) \
@@ -297,7 +301,7 @@ ALL_OBJ=$(COMMON_OBJ) $(UNITTEST_OBJ) $(DAEMON_OBJ) \
        $(CONTROL_OBJ) $(UBANCHOR_OBJ) $(PETAL_OBJ) $(DNSTAP_SOCKET_OBJ)\
        $(COMPAT_OBJ) $(PYUNBOUND_OBJ) \
        $(SVCINST_OBJ) $(SVCUNINST_OBJ) $(ANCHORUPD_OBJ) $(SLDNS_OBJ) \
-       $(DOHCLIENT_OBJ) $(READZONE_OBJ)
+       $(DOHCLIENT_OBJ) $(DOQCLIENT_OBJ) $(READZONE_OBJ)
 
 COMPILE=$(LIBTOOL) --tag=CC --mode=compile $(CC) $(CPPFLAGS) $(CFLAGS) @PTHREAD_CFLAGS_ONLY@
 LINK=$(LIBTOOL) --tag=CC --mode=link $(CC) $(staticexe) $(RUNTIME_PATH) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
@@ -334,7 +338,7 @@ rsrc_unbound_checkconf.o:   $(srcdir)/winrc/rsrc_unbound_checkconf.rc config.h
 TEST_BIN=asynclook$(EXEEXT) delayer$(EXEEXT) \
        lock-verify$(EXEEXT) memstats$(EXEEXT) perf$(EXEEXT) \
        petal$(EXEEXT) pktview$(EXEEXT) streamtcp$(EXEEXT) \
-       $(DNSTAP_SOCKET_TESTBIN) dohclient$(EXEEXT) \
+       $(DNSTAP_SOCKET_TESTBIN) dohclient$(EXEEXT) doqclient$(EXEEXT) \
        testbound$(EXEEXT) unittest$(EXEEXT) readzone$(EXEEXT)
 tests: all $(TEST_BIN)
 
@@ -416,6 +420,9 @@ streamtcp$(EXEEXT): $(STREAMTCP_OBJ_LINK)
 dohclient$(EXEEXT):    $(DOHCLIENT_OBJ_LINK)
        $(LINK) -o $@ $(DOHCLIENT_OBJ_LINK) $(SSLLIB) $(LIBS)
 
+doqclient$(EXEEXT):    $(DOQCLIENT_OBJ_LINK)
+       $(LINK) -o $@ $(DOQCLIENT_OBJ_LINK) $(SSLLIB) $(LIBS)
+
 perf$(EXEEXT): $(PERF_OBJ_LINK)
        $(LINK) -o $@ $(PERF_OBJ_LINK) $(SSLLIB) $(LIBS)
 
@@ -703,6 +710,8 @@ depend:
 
 # build rules
 ipset.lo ipset.o: $(srcdir)/ipset/ipset.c
+doqclient.lo doqclient.o: $(srcdir)/testcode/doqclient.c
+unitdoq.lo unitdoq.o: $(srcdir)/testcode/unitdoq.c
 
 # Dependencies
 dns.lo dns.o: $(srcdir)/services/cache/dns.c config.h $(srcdir)/iterator/iter_delegpt.h $(srcdir)/util/log.h \
index 099206025a33af44b7ee46b3bc146574777c7399..b3a94fb34cb44ac41438de3805f971126c769577 100644 (file)
    and to 0 if you don't. */
 #undef HAVE_DECL_NGHTTP2_SESSION_SERVER_NEW
 
+/* Define to 1 if you have the declaration of `ngtcp2_conn_server_new', and to
+   0 if you don't. */
+#undef HAVE_DECL_NGTCP2_CONN_SERVER_NEW
+
+/* Define to 1 if you have the declaration of `ngtcp2_crypto_encrypt_cb', and
+   to 0 if you don't. */
+#undef HAVE_DECL_NGTCP2_CRYPTO_ENCRYPT_CB
+
 /* Define to 1 if you have the declaration of `NID_ED25519', and to 0 if you
    don't. */
 #undef HAVE_DECL_NID_ED25519
 /* Define to 1 if you have the <nghttp2/nghttp2.h> header file. */
 #undef HAVE_NGHTTP2_NGHTTP2_H
 
+/* Define this to use ngtcp2. */
+#undef HAVE_NGTCP2
+
+/* Define to 1 if you have the `ngtcp2_ccerr_default' function. */
+#undef HAVE_NGTCP2_CCERR_DEFAULT
+
+/* Define to 1 if you have the `ngtcp2_conn_encode_0rtt_transport_params'
+   function. */
+#undef HAVE_NGTCP2_CONN_ENCODE_0RTT_TRANSPORT_PARAMS
+
+/* Define to 1 if you have the `ngtcp2_conn_get_max_local_streams_uni'
+   function. */
+#undef HAVE_NGTCP2_CONN_GET_MAX_LOCAL_STREAMS_UNI
+
+/* Define to 1 if you have the `ngtcp2_conn_get_num_scid' function. */
+#undef HAVE_NGTCP2_CONN_GET_NUM_SCID
+
+/* Define to 1 if you have the `ngtcp2_conn_in_closing_period' function. */
+#undef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD
+
+/* Define to 1 if you have the `ngtcp2_conn_in_draining_period' function. */
+#undef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD
+
+/* Define if ngtcp2_conn_shutdown_stream has 4 arguments. */
+#undef HAVE_NGTCP2_CONN_SHUTDOWN_STREAM4
+
+/* Define to 1 if you have the `ngtcp2_conn_tls_early_data_rejected' function.
+   */
+#undef HAVE_NGTCP2_CONN_TLS_EARLY_DATA_REJECTED
+
+/* Define to 1 if you have the `ngtcp2_crypto_encrypt_cb' function. */
+#undef HAVE_NGTCP2_CRYPTO_ENCRYPT_CB
+
+/* Define to 1 if you have the
+   `ngtcp2_crypto_quictls_configure_client_context' function. */
+#undef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_CLIENT_CONTEXT
+
+/* Define to 1 if you have the
+   `ngtcp2_crypto_quictls_configure_server_context' function. */
+#undef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT
+
+/* Define to 1 if you have the
+   `ngtcp2_crypto_quictls_from_ossl_encryption_level' function. */
+#undef HAVE_NGTCP2_CRYPTO_QUICTLS_FROM_OSSL_ENCRYPTION_LEVEL
+
+/* Define to 1 if the system has the type `ngtcp2_encryption_level'. */
+#undef HAVE_NGTCP2_ENCRYPTION_LEVEL
+
+/* Define to 1 if you have the <ngtcp2/ngtcp2_crypto_openssl.h> header file.
+   */
+#undef HAVE_NGTCP2_NGTCP2_CRYPTO_OPENSSL_H
+
+/* Define to 1 if you have the <ngtcp2/ngtcp2_crypto_quictls.h> header file.
+   */
+#undef HAVE_NGTCP2_NGTCP2_CRYPTO_QUICTLS_H
+
+/* Define to 1 if you have the <ngtcp2/ngtcp2.h> header file. */
+#undef HAVE_NGTCP2_NGTCP2_H
+
 /* Use libnss for crypto */
 #undef HAVE_NSS
 
 /* Define to 1 if you have the `SSL_get1_peer_certificate' function. */
 #undef HAVE_SSL_GET1_PEER_CERTIFICATE
 
+/* Define to 1 if you have the `SSL_is_quic' function. */
+#undef HAVE_SSL_IS_QUIC
+
 /* Define to 1 if you have the `SSL_set1_host' function. */
 #undef HAVE_SSL_SET1_HOST
 
 /* Define to 1 if `ipi_spec_dst' is a member of `struct in_pktinfo'. */
 #undef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST
 
+/* Define to 1 if `tokenlen' is a member of `struct ngtcp2_pkt_hd'. */
+#undef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
+
+/* Define to 1 if `max_tx_udp_payload_size' is a member of `struct
+   ngtcp2_settings'. */
+#undef HAVE_STRUCT_NGTCP2_SETTINGS_MAX_TX_UDP_PAYLOAD_SIZE
+
+/* Define to 1 if `tokenlen' is a member of `struct ngtcp2_settings'. */
+#undef HAVE_STRUCT_NGTCP2_SETTINGS_TOKENLEN
+
+/* Define to 1 if `original_dcid_present' is a member of `struct
+   ngtcp2_transport_params'. */
+#undef HAVE_STRUCT_NGTCP2_TRANSPORT_PARAMS_ORIGINAL_DCID_PRESENT
+
+/* Define to 1 if the system has the type `struct ngtcp2_version_cid'. */
+#undef HAVE_STRUCT_NGTCP2_VERSION_CID
+
 /* Define to 1 if `sun_len' is a member of `struct sockaddr_un'. */
 #undef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
 
@@ -1521,6 +1608,8 @@ char *unbound_stat_strdup_log(const char *s, const char* file, int line,
 #define UNBOUND_DNS_OVER_TLS_PORT 853
 /** default port for DNS over HTTPS traffic. */
 #define UNBOUND_DNS_OVER_HTTPS_PORT 443
+/** default port for DNS over QUIC traffic. */
+#define UNBOUND_DNS_OVER_QUIC_PORT 853
 /** default port for unbound control traffic, registered port with IANA,
     ub-dns-control  8953/tcp    unbound dns nameserver control */
 #define UNBOUND_CONTROL_PORT 8953
index 5b927544fa36174f40246d5a50c3884a554f3723..918a0632013d0b6b930996c9bb85f8fed676005d 100755 (executable)
--- a/configure
+++ b/configure
@@ -921,6 +921,7 @@ with_libevent
 with_libexpat
 with_libhiredis
 with_libnghttp2
+with_libngtcp2
 enable_static_exe
 enable_fully_static
 enable_lock_checks
@@ -1709,6 +1710,7 @@ Optional Packages:
   --with-libexpat=path    specify explicit path for libexpat.
   --with-libhiredis=path  specify explicit path for libhiredis.
   --with-libnghttp2=path  specify explicit path for libnghttp2.
+  --with-libngtcp2=path   specify explicit path for libngtcp2, for QUIC.
   --with-dnstap-socket-path=pathname
                           set default dnstap socket path
   --with-protobuf-c=path  Path where protobuf-c is installed, for dnstap
@@ -22205,6 +22207,353 @@ printf "%s\n" "#define HAVE_DECL_NGHTTP2_SESSION_SERVER_NEW $ac_have_decl" >>con
 
 fi
 
+# ngtcp2
+
+# Check whether --with-libngtcp2 was given.
+if test ${with_libngtcp2+y}
+then :
+  withval=$with_libngtcp2;
+else $as_nop
+   withval="no"
+fi
+
+found_libngtcp2="no"
+if test x_$withval = x_yes -o x_$withval != x_no; then
+   { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for libngtcp2" >&5
+printf %s "checking for libngtcp2... " >&6; }
+   if test x_$withval = x_ -o x_$withval = x_yes; then
+            withval="/usr/local /opt/local /usr/lib /usr/pkg /usr/sfw /usr"
+   fi
+   for dir in $withval ; do
+            if test -f "$dir/include/ngtcp2/ngtcp2.h"; then
+               found_libngtcp2="yes"
+                               if test "$dir" != "/usr"; then
+                    CPPFLAGS="$CPPFLAGS -I$dir/include"
+                   LDFLAGS="$LDFLAGS -L$dir/lib"
+               fi
+               { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: found in $dir" >&5
+printf "%s\n" "found in $dir" >&6; }
+
+printf "%s\n" "#define HAVE_NGTCP2 1" >>confdefs.h
+
+               LIBS="$LIBS -lngtcp2"
+                break;
+            fi
+    done
+    if test x_$found_libngtcp2 != x_yes; then
+       as_fn_error $? "Could not find libngtcp2, ngtcp2.h" "$LINENO" 5
+    fi
+    ac_fn_c_check_header_compile "$LINENO" "ngtcp2/ngtcp2.h" "ac_cv_header_ngtcp2_ngtcp2_h" "$ac_includes_default
+"
+if test "x$ac_cv_header_ngtcp2_ngtcp2_h" = xyes
+then :
+  printf "%s\n" "#define HAVE_NGTCP2_NGTCP2_H 1" >>confdefs.h
+
+fi
+ac_fn_c_check_header_compile "$LINENO" "ngtcp2/ngtcp2_crypto_openssl.h" "ac_cv_header_ngtcp2_ngtcp2_crypto_openssl_h" "$ac_includes_default
+"
+if test "x$ac_cv_header_ngtcp2_ngtcp2_crypto_openssl_h" = xyes
+then :
+  printf "%s\n" "#define HAVE_NGTCP2_NGTCP2_CRYPTO_OPENSSL_H 1" >>confdefs.h
+
+fi
+ac_fn_c_check_header_compile "$LINENO" "ngtcp2/ngtcp2_crypto_quictls.h" "ac_cv_header_ngtcp2_ngtcp2_crypto_quictls_h" "$ac_includes_default
+"
+if test "x$ac_cv_header_ngtcp2_ngtcp2_crypto_quictls_h" = xyes
+then :
+  printf "%s\n" "#define HAVE_NGTCP2_NGTCP2_CRYPTO_QUICTLS_H 1" >>confdefs.h
+
+fi
+
+    ac_fn_check_decl "$LINENO" "ngtcp2_conn_server_new" "ac_cv_have_decl_ngtcp2_conn_server_new" "$ac_includes_default
+    #include <ngtcp2/ngtcp2.h>
+
+" "$ac_c_undeclared_builtin_options" "CFLAGS"
+if test "x$ac_cv_have_decl_ngtcp2_conn_server_new" = xyes
+then :
+  ac_have_decl=1
+else $as_nop
+  ac_have_decl=0
+fi
+printf "%s\n" "#define HAVE_DECL_NGTCP2_CONN_SERVER_NEW $ac_have_decl" >>confdefs.h
+
+    ac_fn_check_decl "$LINENO" "ngtcp2_crypto_encrypt_cb" "ac_cv_have_decl_ngtcp2_crypto_encrypt_cb" "$ac_includes_default
+    #include <ngtcp2/ngtcp2_crypto.h>
+
+" "$ac_c_undeclared_builtin_options" "CFLAGS"
+if test "x$ac_cv_have_decl_ngtcp2_crypto_encrypt_cb" = xyes
+then :
+  ac_have_decl=1
+else $as_nop
+  ac_have_decl=0
+fi
+printf "%s\n" "#define HAVE_DECL_NGTCP2_CRYPTO_ENCRYPT_CB $ac_have_decl" >>confdefs.h
+
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for ngtcp2_crypto_encrypt_cb in -lngtcp2_crypto_openssl" >&5
+printf %s "checking for ngtcp2_crypto_encrypt_cb in -lngtcp2_crypto_openssl... " >&6; }
+if test ${ac_cv_lib_ngtcp2_crypto_openssl_ngtcp2_crypto_encrypt_cb+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lngtcp2_crypto_openssl  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+char ngtcp2_crypto_encrypt_cb ();
+int
+main (void)
+{
+return ngtcp2_crypto_encrypt_cb ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  ac_cv_lib_ngtcp2_crypto_openssl_ngtcp2_crypto_encrypt_cb=yes
+else $as_nop
+  ac_cv_lib_ngtcp2_crypto_openssl_ngtcp2_crypto_encrypt_cb=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_ngtcp2_crypto_openssl_ngtcp2_crypto_encrypt_cb" >&5
+printf "%s\n" "$ac_cv_lib_ngtcp2_crypto_openssl_ngtcp2_crypto_encrypt_cb" >&6; }
+if test "x$ac_cv_lib_ngtcp2_crypto_openssl_ngtcp2_crypto_encrypt_cb" = xyes
+then :
+   LIBS="$LIBS -lngtcp2_crypto_openssl"
+fi
+
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for ngtcp2_crypto_encrypt_cb in -lngtcp2_crypto_quictls" >&5
+printf %s "checking for ngtcp2_crypto_encrypt_cb in -lngtcp2_crypto_quictls... " >&6; }
+if test ${ac_cv_lib_ngtcp2_crypto_quictls_ngtcp2_crypto_encrypt_cb+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lngtcp2_crypto_quictls  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+char ngtcp2_crypto_encrypt_cb ();
+int
+main (void)
+{
+return ngtcp2_crypto_encrypt_cb ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  ac_cv_lib_ngtcp2_crypto_quictls_ngtcp2_crypto_encrypt_cb=yes
+else $as_nop
+  ac_cv_lib_ngtcp2_crypto_quictls_ngtcp2_crypto_encrypt_cb=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_ngtcp2_crypto_quictls_ngtcp2_crypto_encrypt_cb" >&5
+printf "%s\n" "$ac_cv_lib_ngtcp2_crypto_quictls_ngtcp2_crypto_encrypt_cb" >&6; }
+if test "x$ac_cv_lib_ngtcp2_crypto_quictls_ngtcp2_crypto_encrypt_cb" = xyes
+then :
+   LIBS="$LIBS -lngtcp2_crypto_quictls"
+fi
+
+    ac_fn_c_check_func "$LINENO" "ngtcp2_crypto_encrypt_cb" "ac_cv_func_ngtcp2_crypto_encrypt_cb"
+if test "x$ac_cv_func_ngtcp2_crypto_encrypt_cb" = xyes
+then :
+  printf "%s\n" "#define HAVE_NGTCP2_CRYPTO_ENCRYPT_CB 1" >>confdefs.h
+
+fi
+ac_fn_c_check_func "$LINENO" "ngtcp2_ccerr_default" "ac_cv_func_ngtcp2_ccerr_default"
+if test "x$ac_cv_func_ngtcp2_ccerr_default" = xyes
+then :
+  printf "%s\n" "#define HAVE_NGTCP2_CCERR_DEFAULT 1" >>confdefs.h
+
+fi
+ac_fn_c_check_func "$LINENO" "ngtcp2_conn_in_closing_period" "ac_cv_func_ngtcp2_conn_in_closing_period"
+if test "x$ac_cv_func_ngtcp2_conn_in_closing_period" = xyes
+then :
+  printf "%s\n" "#define HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD 1" >>confdefs.h
+
+fi
+ac_fn_c_check_func "$LINENO" "ngtcp2_conn_in_draining_period" "ac_cv_func_ngtcp2_conn_in_draining_period"
+if test "x$ac_cv_func_ngtcp2_conn_in_draining_period" = xyes
+then :
+  printf "%s\n" "#define HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD 1" >>confdefs.h
+
+fi
+ac_fn_c_check_func "$LINENO" "ngtcp2_conn_get_max_local_streams_uni" "ac_cv_func_ngtcp2_conn_get_max_local_streams_uni"
+if test "x$ac_cv_func_ngtcp2_conn_get_max_local_streams_uni" = xyes
+then :
+  printf "%s\n" "#define HAVE_NGTCP2_CONN_GET_MAX_LOCAL_STREAMS_UNI 1" >>confdefs.h
+
+fi
+ac_fn_c_check_func "$LINENO" "ngtcp2_crypto_quictls_from_ossl_encryption_level" "ac_cv_func_ngtcp2_crypto_quictls_from_ossl_encryption_level"
+if test "x$ac_cv_func_ngtcp2_crypto_quictls_from_ossl_encryption_level" = xyes
+then :
+  printf "%s\n" "#define HAVE_NGTCP2_CRYPTO_QUICTLS_FROM_OSSL_ENCRYPTION_LEVEL 1" >>confdefs.h
+
+fi
+ac_fn_c_check_func "$LINENO" "ngtcp2_crypto_quictls_configure_server_context" "ac_cv_func_ngtcp2_crypto_quictls_configure_server_context"
+if test "x$ac_cv_func_ngtcp2_crypto_quictls_configure_server_context" = xyes
+then :
+  printf "%s\n" "#define HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT 1" >>confdefs.h
+
+fi
+ac_fn_c_check_func "$LINENO" "ngtcp2_crypto_quictls_configure_client_context" "ac_cv_func_ngtcp2_crypto_quictls_configure_client_context"
+if test "x$ac_cv_func_ngtcp2_crypto_quictls_configure_client_context" = xyes
+then :
+  printf "%s\n" "#define HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_CLIENT_CONTEXT 1" >>confdefs.h
+
+fi
+ac_fn_c_check_func "$LINENO" "ngtcp2_conn_get_num_scid" "ac_cv_func_ngtcp2_conn_get_num_scid"
+if test "x$ac_cv_func_ngtcp2_conn_get_num_scid" = xyes
+then :
+  printf "%s\n" "#define HAVE_NGTCP2_CONN_GET_NUM_SCID 1" >>confdefs.h
+
+fi
+ac_fn_c_check_func "$LINENO" "ngtcp2_conn_tls_early_data_rejected" "ac_cv_func_ngtcp2_conn_tls_early_data_rejected"
+if test "x$ac_cv_func_ngtcp2_conn_tls_early_data_rejected" = xyes
+then :
+  printf "%s\n" "#define HAVE_NGTCP2_CONN_TLS_EARLY_DATA_REJECTED 1" >>confdefs.h
+
+fi
+ac_fn_c_check_func "$LINENO" "ngtcp2_conn_encode_0rtt_transport_params" "ac_cv_func_ngtcp2_conn_encode_0rtt_transport_params"
+if test "x$ac_cv_func_ngtcp2_conn_encode_0rtt_transport_params" = xyes
+then :
+  printf "%s\n" "#define HAVE_NGTCP2_CONN_ENCODE_0RTT_TRANSPORT_PARAMS 1" >>confdefs.h
+
+fi
+
+
+  for ac_func in SSL_is_quic
+do :
+  ac_fn_c_check_func "$LINENO" "SSL_is_quic" "ac_cv_func_SSL_is_quic"
+if test "x$ac_cv_func_SSL_is_quic" = xyes
+then :
+  printf "%s\n" "#define HAVE_SSL_IS_QUIC 1" >>confdefs.h
+
+else $as_nop
+  as_fn_error $? "No QUIC support detected in OpenSSL. Need OpenSSL version with QUIC support to enable DNS over QUIC with libngtcp2." "$LINENO" 5
+fi
+
+done
+    ac_fn_c_check_type "$LINENO" "struct ngtcp2_version_cid" "ac_cv_type_struct_ngtcp2_version_cid" "$ac_includes_default
+    #include <ngtcp2/ngtcp2.h>
+
+"
+if test "x$ac_cv_type_struct_ngtcp2_version_cid" = xyes
+then :
+
+printf "%s\n" "#define HAVE_STRUCT_NGTCP2_VERSION_CID 1" >>confdefs.h
+
+
+fi
+ac_fn_c_check_type "$LINENO" "ngtcp2_encryption_level" "ac_cv_type_ngtcp2_encryption_level" "$ac_includes_default
+    #include <ngtcp2/ngtcp2.h>
+
+"
+if test "x$ac_cv_type_ngtcp2_encryption_level" = xyes
+then :
+
+printf "%s\n" "#define HAVE_NGTCP2_ENCRYPTION_LEVEL 1" >>confdefs.h
+
+
+fi
+
+    ac_fn_c_check_member "$LINENO" "struct ngtcp2_pkt_hd" "tokenlen" "ac_cv_member_struct_ngtcp2_pkt_hd_tokenlen" "$ac_includes_default
+    #include <ngtcp2/ngtcp2.h>
+
+"
+if test "x$ac_cv_member_struct_ngtcp2_pkt_hd_tokenlen" = xyes
+then :
+
+printf "%s\n" "#define HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN 1" >>confdefs.h
+
+
+fi
+ac_fn_c_check_member "$LINENO" "struct ngtcp2_settings" "tokenlen" "ac_cv_member_struct_ngtcp2_settings_tokenlen" "$ac_includes_default
+    #include <ngtcp2/ngtcp2.h>
+
+"
+if test "x$ac_cv_member_struct_ngtcp2_settings_tokenlen" = xyes
+then :
+
+printf "%s\n" "#define HAVE_STRUCT_NGTCP2_SETTINGS_TOKENLEN 1" >>confdefs.h
+
+
+fi
+ac_fn_c_check_member "$LINENO" "struct ngtcp2_settings" "max_tx_udp_payload_size" "ac_cv_member_struct_ngtcp2_settings_max_tx_udp_payload_size" "$ac_includes_default
+    #include <ngtcp2/ngtcp2.h>
+
+"
+if test "x$ac_cv_member_struct_ngtcp2_settings_max_tx_udp_payload_size" = xyes
+then :
+
+printf "%s\n" "#define HAVE_STRUCT_NGTCP2_SETTINGS_MAX_TX_UDP_PAYLOAD_SIZE 1" >>confdefs.h
+
+
+fi
+ac_fn_c_check_member "$LINENO" "struct ngtcp2_transport_params" "original_dcid_present" "ac_cv_member_struct_ngtcp2_transport_params_original_dcid_present" "$ac_includes_default
+    #include <ngtcp2/ngtcp2.h>
+
+"
+if test "x$ac_cv_member_struct_ngtcp2_transport_params_original_dcid_present" = xyes
+then :
+
+printf "%s\n" "#define HAVE_STRUCT_NGTCP2_TRANSPORT_PARAMS_ORIGINAL_DCID_PRESENT 1" >>confdefs.h
+
+
+fi
+
+
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether ngtcp2_conn_shutdown_stream has 4 arguments" >&5
+printf %s "checking whether ngtcp2_conn_shutdown_stream has 4 arguments... " >&6; }
+    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$ac_includes_default
+#include <ngtcp2/ngtcp2.h>
+
+int
+main (void)
+{
+
+       (void)ngtcp2_conn_shutdown_stream(NULL, 0, 0, 0);
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+
+       { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+printf "%s\n" "yes" >&6; }
+
+printf "%s\n" "#define HAVE_NGTCP2_CONN_SHUTDOWN_STREAM4 1" >>confdefs.h
+
+
+else $as_nop
+
+       { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+
+fi
+
 # set static linking for uninstalled libraries if requested
 
 staticexe=""
@@ -23788,10 +24137,12 @@ if test x_$enable_lock_checks = x_yes; then
        UBSYMS="-export-symbols clubsyms.def"
        cp ${srcdir}/libunbound/ubsyms.def clubsyms.def
        echo lock_protect >> clubsyms.def
+       echo lock_protect_place >> clubsyms.def
        echo lock_unprotect >> clubsyms.def
        echo lock_get_mem >> clubsyms.def
        echo checklock_start >> clubsyms.def
        echo checklock_stop >> clubsyms.def
+       echo checklock_set_output_name >> clubsyms.def
        echo checklock_lock >> clubsyms.def
        echo checklock_unlock >> clubsyms.def
        echo checklock_init >> clubsyms.def
index eb093c84047fa1596943b4c5d1c8db291984cd4a..a2d9d724b3f7660d14be13c004aea870e1501135 100644 (file)
@@ -1579,6 +1579,64 @@ if test x_$withval = x_yes -o x_$withval != x_no; then
     ])
 fi
 
+# ngtcp2
+AC_ARG_WITH(libngtcp2, AS_HELP_STRING([--with-libngtcp2=path],[specify explicit path for libngtcp2, for QUIC.]),
+    [ ],[ withval="no" ])
+found_libngtcp2="no"
+if test x_$withval = x_yes -o x_$withval != x_no; then
+   AC_MSG_CHECKING(for libngtcp2)
+   if test x_$withval = x_ -o x_$withval = x_yes; then
+            withval="/usr/local /opt/local /usr/lib /usr/pkg /usr/sfw /usr"
+   fi
+   for dir in $withval ; do
+            if test -f "$dir/include/ngtcp2/ngtcp2.h"; then
+               found_libngtcp2="yes"
+               dnl assume /usr is in default path.
+               if test "$dir" != "/usr"; then
+                    CPPFLAGS="$CPPFLAGS -I$dir/include"
+                   LDFLAGS="$LDFLAGS -L$dir/lib"
+               fi
+               AC_MSG_RESULT(found in $dir)
+               AC_DEFINE([HAVE_NGTCP2], [1], [Define this to use ngtcp2.])
+               LIBS="$LIBS -lngtcp2"
+                break;
+            fi
+    done
+    if test x_$found_libngtcp2 != x_yes; then
+       AC_MSG_ERROR([Could not find libngtcp2, ngtcp2.h])
+    fi
+    AC_CHECK_HEADERS([ngtcp2/ngtcp2.h ngtcp2/ngtcp2_crypto_openssl.h ngtcp2/ngtcp2_crypto_quictls.h],,, [AC_INCLUDES_DEFAULT])
+    AC_CHECK_DECLS([ngtcp2_conn_server_new], [], [], [AC_INCLUDES_DEFAULT
+    #include <ngtcp2/ngtcp2.h>
+    ])
+    AC_CHECK_DECLS([ngtcp2_crypto_encrypt_cb], [], [], [AC_INCLUDES_DEFAULT
+    #include <ngtcp2/ngtcp2_crypto.h>
+    ])
+    AC_CHECK_LIB([ngtcp2_crypto_openssl], [ngtcp2_crypto_encrypt_cb], [ LIBS="$LIBS -lngtcp2_crypto_openssl" ])
+    AC_CHECK_LIB([ngtcp2_crypto_quictls], [ngtcp2_crypto_encrypt_cb], [ LIBS="$LIBS -lngtcp2_crypto_quictls" ])
+    AC_CHECK_FUNCS([ngtcp2_crypto_encrypt_cb ngtcp2_ccerr_default ngtcp2_conn_in_closing_period ngtcp2_conn_in_draining_period ngtcp2_conn_get_max_local_streams_uni ngtcp2_crypto_quictls_from_ossl_encryption_level ngtcp2_crypto_quictls_configure_server_context ngtcp2_crypto_quictls_configure_client_context ngtcp2_conn_get_num_scid ngtcp2_conn_tls_early_data_rejected ngtcp2_conn_encode_0rtt_transport_params])
+    AC_CHECK_FUNCS([SSL_is_quic], [], [AC_MSG_ERROR([No QUIC support detected in OpenSSL. Need OpenSSL version with QUIC support to enable DNS over QUIC with libngtcp2.])])
+    AC_CHECK_TYPES([struct ngtcp2_version_cid, ngtcp2_encryption_level],,,[AC_INCLUDES_DEFAULT
+    #include <ngtcp2/ngtcp2.h>
+    ])
+    AC_CHECK_MEMBERS([struct ngtcp2_pkt_hd.tokenlen, struct ngtcp2_settings.tokenlen, struct ngtcp2_settings.max_tx_udp_payload_size, struct ngtcp2_transport_params.original_dcid_present],,,[AC_INCLUDES_DEFAULT
+    #include <ngtcp2/ngtcp2.h>
+    ])
+
+    AC_MSG_CHECKING([whether ngtcp2_conn_shutdown_stream has 4 arguments])
+    AC_COMPILE_IFELSE([AC_LANG_PROGRAM([AC_INCLUDES_DEFAULT
+#include <ngtcp2/ngtcp2.h>
+    ],[
+       (void)ngtcp2_conn_shutdown_stream(NULL, 0, 0, 0);
+    ])],[
+       AC_MSG_RESULT(yes)
+       AC_DEFINE(HAVE_NGTCP2_CONN_SHUTDOWN_STREAM4, 1, [Define if ngtcp2_conn_shutdown_stream has 4 arguments.])
+    ],[
+       AC_MSG_RESULT(no)
+    ])
+
+fi
+
 # set static linking for uninstalled libraries if requested
 AC_SUBST(staticexe)
 staticexe=""
@@ -1894,10 +1952,12 @@ if test x_$enable_lock_checks = x_yes; then
        UBSYMS="-export-symbols clubsyms.def"
        cp ${srcdir}/libunbound/ubsyms.def clubsyms.def
        echo lock_protect >> clubsyms.def
+       echo lock_protect_place >> clubsyms.def
        echo lock_unprotect >> clubsyms.def
        echo lock_get_mem >> clubsyms.def
        echo checklock_start >> clubsyms.def
        echo checklock_stop >> clubsyms.def
+       echo checklock_set_output_name >> clubsyms.def
        echo checklock_lock >> clubsyms.def
        echo checklock_unlock >> clubsyms.def
        echo checklock_init >> clubsyms.def
@@ -2356,6 +2416,8 @@ char *unbound_stat_strdup_log(const char *s, const char* file, int line,
 #define UNBOUND_DNS_OVER_TLS_PORT 853
 /** default port for DNS over HTTPS traffic. */
 #define UNBOUND_DNS_OVER_HTTPS_PORT 443
+/** default port for DNS over QUIC traffic. */
+#define UNBOUND_DNS_OVER_QUIC_PORT 853
 /** default port for unbound control traffic, registered port with IANA,
     ub-dns-control  8953/tcp    unbound dns nameserver control */
 #define UNBOUND_CONTROL_PORT 8953
index 72b4a43be1ad8d0d91c4bb1af03f81e7f6647f67..1c8272b14e81c11f10547d66d237a0c6914fa415 100644 (file)
@@ -557,6 +557,12 @@ daemon_create_workers(struct daemon* daemon)
                fatal_exit("out of memory during daemon init");
        numport = daemon_get_shufport(daemon, shufport);
        verbose(VERB_ALGO, "total of %d outgoing ports available", numport);
+
+#ifdef HAVE_NGTCP2
+       daemon->doq_table = doq_table_create(daemon->cfg, daemon->rand);
+       if(!daemon->doq_table)
+               fatal_exit("could not create doq_table: out of memory");
+#endif
        
        daemon->num = (daemon->cfg->num_threads?daemon->cfg->num_threads:1);
        if(daemon->reuseport && (int)daemon->num < (int)daemon->num_ports) {
@@ -906,6 +912,10 @@ daemon_cleanup(struct daemon* daemon)
 #ifdef USE_DNSCRYPT
        dnsc_delete(daemon->dnscenv);
        daemon->dnscenv = NULL;
+#endif
+#ifdef HAVE_NGTCP2
+       doq_table_delete(daemon->doq_table);
+       daemon->doq_table = NULL;
 #endif
        daemon->cfg = NULL;
 }
index 5c3a114cc7e61559fb8bc0f262126dcf02a04908..fc1bde713cae1d979b1b982d017a29d32b6c86a5 100644 (file)
@@ -58,6 +58,7 @@ struct ub_randstate;
 struct daemon_remote;
 struct respip_set;
 struct shm_main_info;
+struct doq_table;
 struct cookie_secrets;
 
 #include "dnstap/dnstap_config.h"
@@ -147,6 +148,8 @@ struct daemon {
        /** the dnscrypt environment */
        struct dnsc_env* dnscenv;
 #endif
+       /** the doq connection table */
+       struct doq_table* doq_table;
        /** reuse existing cache on reload if other conditions allow it. */
        int reuse_cache;
        /** the EDNS cookie secrets from the cookie-secret-file */
index 5af03328ef97df2e8e93fd1385e397fe0d50105a..8877cd19402bc767822c69213ec6a736b6240421 100644 (file)
@@ -302,7 +302,7 @@ add_open(const char* ip, int nr, struct listen_port** list, int noproto_is_err,
                /* open fd */
                fd = create_tcp_accept_sock(res, 1, &noproto, 0,
                        cfg->ip_transparent, 0, 0, cfg->ip_freebind,
-                       cfg->use_systemd, cfg->ip_dscp);
+                       cfg->use_systemd, cfg->ip_dscp, "unbound-control");
                freeaddrinfo(res);
        }
 
@@ -866,6 +866,10 @@ print_mem(RES* ssl, struct worker* worker, struct daemon* daemon,
        if(!print_longnum(ssl, "mem.http.response_buffer"SQ,
                (size_t)s->svr.mem_http2_response_buffer))
                return 0;
+#ifdef HAVE_NGTCP2
+       if(!print_longnum(ssl, "mem.quic"SQ, (size_t)s->svr.mem_quic))
+               return 0;
+#endif /* HAVE_NGTCP2 */
        return 1;
 }
 
@@ -996,6 +1000,10 @@ print_ext(RES* ssl, struct ub_stats_info* s, int inhibit_zero)
                (unsigned long)s->svr.qipv6)) return 0;
        if(!ssl_printf(ssl, "num.query.https"SQ"%lu\n",
                (unsigned long)s->svr.qhttps)) return 0;
+#ifdef HAVE_NGTCP2
+       if(!ssl_printf(ssl, "num.query.quic"SQ"%lu\n",
+               (unsigned long)s->svr.qquic)) return 0;
+#endif /* HAVE_NGTCP2 */
        /* flags */
        if(!ssl_printf(ssl, "num.query.flags.QR"SQ"%lu\n",
                (unsigned long)s->svr.qbit_QR)) return 0;
index 827110698970e17e0bdbb265bad40b79abc98592..0e17300a150e536fd883f9a860b215bb6bdc651b 100644 (file)
@@ -346,6 +346,12 @@ server_stats_compile(struct worker* worker, struct ub_stats_info* s, int reset)
                (long long)http2_get_query_buffer_size();
        s->svr.mem_http2_response_buffer =
                (long long)http2_get_response_buffer_size();
+#ifdef HAVE_NGTCP2
+       s->svr.mem_quic = (long long)doq_table_quic_size_get(
+               worker->daemon->doq_table);
+#else
+       s->svr.mem_quic = 0;
+#endif /* HAVE_NGTCP2 */
 
        /* Set neg cache usage numbers */
        set_neg_cache_stats(worker, &s->svr, reset);
@@ -474,6 +480,7 @@ void server_stats_add(struct ub_stats_info* total, struct ub_stats_info* a)
                total->svr.qtls += a->svr.qtls;
                total->svr.qtls_resume += a->svr.qtls_resume;
                total->svr.qhttps += a->svr.qhttps;
+               total->svr.qquic += a->svr.qquic;
                total->svr.qipv6 += a->svr.qipv6;
                total->svr.qbit_QR += a->svr.qbit_QR;
                total->svr.qbit_AA += a->svr.qbit_AA;
@@ -533,7 +540,8 @@ void server_stats_insquery(struct ub_server_stats* stats, struct comm_point* c,
        else    stats->qclass_big++;
        stats->qopcode[ LDNS_OPCODE_WIRE(sldns_buffer_begin(c->buffer)) ]++;
        if(c->type != comm_udp) {
-               stats->qtcp++;
+               if(c->type != comm_doq)
+                       stats->qtcp++;
                if(c->ssl != NULL) {
                        stats->qtls++;
 #ifdef HAVE_SSL
@@ -542,6 +550,10 @@ void server_stats_insquery(struct ub_server_stats* stats, struct comm_point* c,
 #endif
                        if(c->type == comm_http)
                                stats->qhttps++;
+#ifdef HAVE_NGTCP2
+                       else if(c->type == comm_doq)
+                               stats->qquic++;
+#endif
                }
        }
        if(repinfo && addr_is_ip6(&repinfo->remote_addr, repinfo->remote_addrlen))
index fe105eb7bb3bfd79badf644ea96df44104528996..713de316373aedd018fae774d474cd6a32a28a74 100644 (file)
@@ -2174,7 +2174,9 @@ worker_init(struct worker* worker, struct config_file *cfg,
                cfg->harden_large_queries, cfg->http_max_streams,
                cfg->http_endpoint, cfg->http_notls_downstream,
                worker->daemon->tcl, worker->daemon->listen_sslctx,
-               dtenv, worker_handle_request, worker);
+               dtenv, worker->daemon->doq_table, worker->env.rnd,
+               cfg->ssl_service_key, cfg->ssl_service_pem, cfg,
+               worker_handle_request, worker);
        if(!worker->front) {
                log_err("could not create listening sockets");
                worker_delete(worker);
@@ -2508,3 +2510,19 @@ void dtio_mainfdcallback(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev),
        log_assert(0);
 }
 #endif
+
+#ifdef HAVE_NGTCP2
+void doq_client_event_cb(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev),
+       void* ATTR_UNUSED(arg))
+{
+       log_assert(0);
+}
+#endif
+
+#ifdef HAVE_NGTCP2
+void doq_client_timer_cb(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev),
+       void* ATTR_UNUSED(arg))
+{
+       log_assert(0);
+}
+#endif
index b6b993d540943727fa57f401fd39f51af36aae53..7f8be4965957c164993a92d4dee5e6ccfad69c9b 100644 (file)
@@ -1785,3 +1785,19 @@ void remote_get_opt_ssl(char* ATTR_UNUSED(str), void* ATTR_UNUSED(arg))
 {
         log_assert(0);
 }
+
+#ifdef HAVE_NGTCP2
+void doq_client_event_cb(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev),
+       void* ATTR_UNUSED(arg))
+{
+       log_assert(0);
+}
+#endif
+
+#ifdef HAVE_NGTCP2
+void doq_client_timer_cb(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev),
+       void* ATTR_UNUSED(arg))
+{
+       log_assert(0);
+}
+#endif
index 2d16ee75fe4f2350e58bbc63a626aae7ad7ff0f8..06e2b4ba8326685e7890b90496d426566f8fce8e 100644 (file)
@@ -920,6 +920,7 @@ server:
        # tls-service-pem: "path/to/publiccertfile.pem"
        # tls-port: 853
        # https-port: 443
+       # quic-port: 853
 
        # cipher setting for TLSv1.2
        # tls-ciphers: "DHE-RSA-AES256-GCM-SHA384:DHE-RSA-AES128-GCM-SHA256:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-SHA256:DHE-RSA-AES128-SHA256:ECDHE-RSA-AES256-SHA384:ECDHE-RSA-AES128-SHA256"
@@ -984,6 +985,9 @@ server:
        # Disable TLS for DNS-over-HTTP downstream service.
        # http-notls-downstream: no
 
+       # Maximum number of bytes used for QUIC buffers.
+       # quic-size: 8m
+
        # The interfaces that use these listed port numbers will support and
        # expect PROXYv2. For UDP and TCP/TLS interfaces.
        # proxy-protocol-port: portno for each of the port numbers.
index 17073f9388ffee109d336d3c2c9716fc487912e0..6470d544ce7026e71db460109a97e00d83085348 100644 (file)
@@ -606,6 +606,10 @@ queries waiting for request stream completion.
 Memory in bytes used by the HTTP/2 response buffers. Containing DNS responses
 waiting to be written back to the clients.
 .TP
+.I mem.quic
+Memory in bytes used by QUIC. Containing connection information, stream
+information, queries read and responses written back to the clients.
+.TP
 .I histogram.<sec>.<usec>.to.<sec>.<usec>
 Shows a histogram, summed over all threads. Every element counts the
 recursive queries whose reply time fit between the lower and upper bound.
@@ -654,6 +658,10 @@ Number of queries that were made using HTTPS towards the Unbound server.
 These are also counted in num.query.tcp and num.query.tls, because HTTPS
 uses TLS and TCP.
 .TP
+.I num.query.quic
+Number of queries that were made using QUIC towards the Unbound server.
+These are also counted in num.query.tls, because TLS is used for these queries.
+.TP
 .I num.query.ipv6
 Number of queries that were made using IPv6 towards the Unbound server.
 .TP
index 2a5f6792a71dc883eb9ec8b6c84dffcf440ccb8a..da494087ccb7bdd7a0cd4d22ff82a83647e22038 100644 (file)
@@ -719,6 +719,18 @@ PROXYv2 is supported for UDP and TCP/TLS listening interfaces.
 There is no support for PROXYv2 on a DoH or DNSCrypt listening interface.
 Can list multiple, each on a new statement.
 .TP
+.B quic\-port: \fI<number>
+The port number on which to provide DNS-over-QUIC service, default 853, only
+interfaces configured with that port number as @number get the QUIC service.
+The interface uses QUIC for the UDP traffic on that port number.
+.TP
+.B quic\-size: \fI<size in bytes>
+Maximum number of bytes for all QUIC buffers and data combined. Default is 8
+megabytes. A plain number is in bytes, append 'k', 'm' or 'g' for kilobytes,
+megabytes or gigabytes (1024*1024 bytes in a megabyte). New connections receive
+connection refused when the limit is exceeded. New streams are reset when the
+limit is exceeded.
+.TP
 .B use\-systemd: \fI<yes or no>
 Enable or disable systemd socket activation.
 Default is no.
index 94b644a49b86eb0e55df7cc259725ad8712b1c55..da7d4c22440da3505555fc84ddfdcf2ed69ee980 100644 (file)
@@ -1058,3 +1058,19 @@ void dtio_mainfdcallback(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev),
        log_assert(0);
 }
 #endif
+
+#ifdef HAVE_NGTCP2
+void doq_client_event_cb(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev),
+       void* ATTR_UNUSED(arg))
+{
+       log_assert(0);
+}
+#endif
+
+#ifdef HAVE_NGTCP2
+void doq_client_timer_cb(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev),
+       void* ATTR_UNUSED(arg))
+{
+       log_assert(0);
+}
+#endif
index bb8e8acf033cead0c537bbc0e2051da2a1f14a27..ef2c5c0679f6255156d71e011ded4c766fdf2dde 100644 (file)
@@ -845,6 +845,10 @@ struct ub_server_stats {
        long long qtls_resume;
        /** RPZ action stats */
        long long rpz_action[UB_STATS_RPZ_ACTION_NUM];
+       /** number of bytes in QUIC buffers */
+       long long mem_quic;
+       /** number of queries over (DNS over) QUIC */
+       long long qquic;
 };
 
 /**
index 6c0691f2a73cae0a85fc7811bea32167e8938d15..078a344d3664cc4c3c522b9efb9c2fed4716224b 100644 (file)
 #include "util/net_help.h"
 #include "sldns/sbuffer.h"
 #include "sldns/parseutil.h"
+#include "sldns/wire2str.h"
 #include "services/mesh.h"
 #include "util/fptr_wlist.h"
 #include "util/locks.h"
+#include "util/timeval_func.h"
 
 #ifdef HAVE_NETDB_H
 #include <netdb.h>
 #ifdef HAVE_NET_IF_H
 #include <net/if.h>
 #endif
+
+#ifdef HAVE_TIME_H
+#include <time.h>
+#endif
+#include <sys/time.h>
+
+#ifdef HAVE_NGTCP2
+#include <ngtcp2/ngtcp2.h>
+#include <ngtcp2/ngtcp2_crypto.h>
+#ifdef HAVE_NGTCP2_NGTCP2_CRYPTO_QUICTLS_H
+#include <ngtcp2/ngtcp2_crypto_quictls.h>
+#else
+#include <ngtcp2/ngtcp2_crypto_openssl.h>
+#endif
+#endif
+
+#ifdef HAVE_OPENSSL_SSL_H
+#include <openssl/ssl.h>
+#endif
+
 #ifdef HAVE_LINUX_NET_TSTAMP_H
 #include <linux/net_tstamp.h>
 #endif
+
 /** number of queued TCP connections for listen() */
 #define TCP_BACKLOG 256
 
@@ -109,9 +132,11 @@ static int http2_response_buffer_lock_inited = 0;
 /**
  * Debug print of the getaddrinfo returned address.
  * @param addr: the address returned.
+ * @param additional: additional text that describes the type of socket,
+ *     or NULL for no text.
  */
 static void
-verbose_print_addr(struct addrinfo *addr)
+verbose_print_addr(struct addrinfo *addr, const char* additional)
 {
        if(verbosity >= VERB_ALGO) {
                char buf[100];
@@ -126,13 +151,14 @@ verbose_print_addr(struct addrinfo *addr)
                        (void)strlcpy(buf, "(null)", sizeof(buf));
                }
                buf[sizeof(buf)-1] = 0;
-               verbose(VERB_ALGO, "creating %s%s socket %s %d",
+               verbose(VERB_ALGO, "creating %s%s socket %s %d%s%s",
                        addr->ai_socktype==SOCK_DGRAM?"udp":
                        addr->ai_socktype==SOCK_STREAM?"tcp":"otherproto",
                        addr->ai_family==AF_INET?"4":
                        addr->ai_family==AF_INET6?"6":
                        "_otherfam", buf,
-                       ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port));
+                       ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port),
+                       (additional?" ":""), (additional?additional:""));
        }
 }
 
@@ -673,7 +699,7 @@ create_udp_sock(int family, int socktype, struct sockaddr* addr,
 int
 create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto,
        int* reuseport, int transparent, int mss, int nodelay, int freebind,
-       int use_systemd, int dscp)
+       int use_systemd, int dscp, const char* additional)
 {
        int s = -1;
        char* err;
@@ -692,7 +718,7 @@ create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto,
 #if !defined(IP_FREEBIND)
        (void)freebind;
 #endif
-       verbose_print_addr(addr);
+       verbose_print_addr(addr, additional);
        *noproto = 0;
 #ifdef HAVE_SYSTEMD
        if (!use_systemd ||
@@ -1008,7 +1034,8 @@ static int
 make_sock(int stype, const char* ifname, const char* port,
        struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
        int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind,
-       int use_systemd, int dscp, struct unbound_socket* ub_sock)
+       int use_systemd, int dscp, struct unbound_socket* ub_sock,
+       const char* additional)
 {
        struct addrinfo *res = NULL;
        int r, s, inuse, noproto;
@@ -1032,7 +1059,7 @@ make_sock(int stype, const char* ifname, const char* port,
                return -1;
        }
        if(stype == SOCK_DGRAM) {
-               verbose_print_addr(res);
+               verbose_print_addr(res, additional);
                s = create_udp_sock(res->ai_family, res->ai_socktype,
                        (struct sockaddr*)res->ai_addr, res->ai_addrlen,
                        v6only, &inuse, &noproto, (int)rcv, (int)snd, 1,
@@ -1045,7 +1072,7 @@ make_sock(int stype, const char* ifname, const char* port,
        } else  {
                s = create_tcp_accept_sock(res, v6only, &noproto, reuseport,
                        transparent, tcp_mss, nodelay, freebind, use_systemd,
-                       dscp);
+                       dscp, additional);
                if(s == -1 && noproto && hints->ai_family == AF_INET6){
                        *noip6 = 1;
                }
@@ -1079,7 +1106,8 @@ static int
 make_sock_port(int stype, const char* ifname, const char* port,
        struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
        int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind,
-       int use_systemd, int dscp, struct unbound_socket* ub_sock)
+       int use_systemd, int dscp, struct unbound_socket* ub_sock,
+       const char* additional)
 {
        char* s = strchr(ifname, '@');
        if(s) {
@@ -1102,11 +1130,11 @@ make_sock_port(int stype, const char* ifname, const char* port,
                p[strlen(s+1)]=0;
                return make_sock(stype, newif, p, hints, v6only, noip6, rcv,
                        snd, reuseport, transparent, tcp_mss, nodelay, freebind,
-                       use_systemd, dscp, ub_sock);
+                       use_systemd, dscp, ub_sock, additional);
        }
        return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd,
                reuseport, transparent, tcp_mss, nodelay, freebind, use_systemd,
-               dscp, ub_sock);
+               dscp, ub_sock, additional);
 }
 
 /**
@@ -1254,6 +1282,8 @@ if_is_ssl(const char* ifname, const char* port, int ssl_port,
  * @param use_systemd: if true, fetch sockets from systemd.
  * @param dnscrypt_port: dnscrypt service port number
  * @param dscp: DSCP to use.
+ * @param quic_port: dns over quic port number.
+ * @param http_notls_downstream: if no tls is used for https downstream.
  * @param sock_queue_timeout: the sock_queue_timeout from config. Seconds to
  *     wait to discard if UDP packets have waited for long in the socket
  *     buffer.
@@ -1267,7 +1297,7 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp,
        struct config_strlist* proxy_protocol_port,
        int* reuseport, int transparent, int tcp_mss, int freebind,
        int http2_nodelay, int use_systemd, int dnscrypt_port, int dscp,
-       int sock_queue_timeout)
+       int quic_port, int http_notls_downstream, int sock_queue_timeout)
 {
        int s, noip6=0;
        int is_https = if_is_https(ifname, port, https_port);
@@ -1275,6 +1305,8 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp,
        int is_pp2 = if_is_pp2(ifname, port, proxy_protocol_port);
        int nodelay = is_https && http2_nodelay;
        struct unbound_socket* ub_sock;
+       int is_doq = if_is_quic(ifname, port, quic_port);
+       const char* add = NULL;
 
        if(!do_udp && !do_tcp)
                return 0;
@@ -1286,6 +1318,9 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp,
                } else if(is_https) {
                        fatal_exit("PROXYv2 and DoH combination not "
                                "supported!");
+               } else if(is_doq) {
+                       fatal_exit("PROXYv2 and DoQ combination not "
+                               "supported!");
                }
        }
 
@@ -1295,7 +1330,8 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp,
                        return 0;
                if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
                        &noip6, rcv, snd, reuseport, transparent,
-                       tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock)) == -1) {
+                       tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock,
+                       (is_dnscrypt?"udpancil_dnscrypt":"udpancil"))) == -1) {
                        free(ub_sock->addr);
                        free(ub_sock);
                        if(noip6) {
@@ -1323,13 +1359,36 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp,
                        return 0;
                }
        } else if(do_udp) {
+               enum listen_type udp_port_type;
                ub_sock = calloc(1, sizeof(struct unbound_socket));
                if(!ub_sock)
                        return 0;
+               if(is_dnscrypt) {
+                       udp_port_type = listen_type_udp_dnscrypt;
+                       add = "dnscrypt";
+               } else if(is_doq) {
+                       udp_port_type = listen_type_doq;
+                       add = "doq";
+                       if(((strchr(ifname, '@') &&
+                               atoi(strchr(ifname, '@')+1) == 53) ||
+                               (!strchr(ifname, '@') && atoi(port) == 53))) {
+                               log_err("DNS over QUIC is not allowed on "
+                                       "port 53. Port 53 is for DNS "
+                                       "datagrams. Error for "
+                                       "interface '%s'.", ifname);
+                               free(ub_sock->addr);
+                               free(ub_sock);
+                               return 0;
+                       }
+               } else {
+                       udp_port_type = listen_type_udp;
+                       add = NULL;
+               }
                /* regular udp socket */
                if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
                        &noip6, rcv, snd, reuseport, transparent,
-                       tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock)) == -1) {
+                       tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock,
+                       add)) == -1) {
                        free(ub_sock->addr);
                        free(ub_sock);
                        if(noip6) {
@@ -1338,14 +1397,25 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp,
                        }
                        return 0;
                }
-               if (sock_queue_timeout && !set_recvtimestamp(s)) {
-                       log_warn("socket timestamping is not available");
+               if(udp_port_type == listen_type_doq) {
+                       if(!set_recvpktinfo(s, hints->ai_family)) {
+                               sock_close(s);
+                               free(ub_sock->addr);
+                               free(ub_sock);
+                               return 0;
+                       }
                }
-               if(!port_insert(list, s, is_dnscrypt
-                       ?listen_type_udp_dnscrypt :
-                       (sock_queue_timeout ?
-                               listen_type_udpancil:listen_type_udp),
-                       is_pp2, ub_sock)) {
+               if(udp_port_type == listen_type_udp && sock_queue_timeout)
+                       udp_port_type = listen_type_udpancil;
+               if (sock_queue_timeout) {
+                       if(!set_recvtimestamp(s)) {
+                               log_warn("socket timestamping is not available");
+                       } else {
+                               if(udp_port_type == listen_type_udp)
+                                       udp_port_type = listen_type_udpancil;
+                       }
+               }
+               if(!port_insert(list, s, udp_port_type, is_pp2, ub_sock)) {
                        sock_close(s);
                        free(ub_sock->addr);
                        free(ub_sock);
@@ -1359,17 +1429,24 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp,
                ub_sock = calloc(1, sizeof(struct unbound_socket));
                if(!ub_sock)
                        return 0;
-               if(is_ssl)
+               if(is_ssl) {
                        port_type = listen_type_ssl;
-               else if(is_https)
+                       add = "tls";
+               } else if(is_https) {
                        port_type = listen_type_http;
-               else if(is_dnscrypt)
+                       add = "https";
+                       if(http_notls_downstream)
+                               add = "http";
+               } else if(is_dnscrypt) {
                        port_type = listen_type_tcp_dnscrypt;
-               else
+                       add = "dnscrypt";
+               } else {
                        port_type = listen_type_tcp;
+                       add = NULL;
+               }
                if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1,
                        &noip6, 0, 0, reuseport, transparent, tcp_mss, nodelay,
-                       freebind, use_systemd, dscp, ub_sock)) == -1) {
+                       freebind, use_systemd, dscp, ub_sock, add)) == -1) {
                        free(ub_sock->addr);
                        free(ub_sock);
                        if(noip6) {
@@ -1446,8 +1523,10 @@ listen_create(struct comm_base* base, struct listen_port* ports,
        size_t bufsize, int tcp_accept_count, int tcp_idle_timeout,
        int harden_large_queries, uint32_t http_max_streams,
        char* http_endpoint, int http_notls, struct tcl_list* tcp_conn_limit,
-       void* sslctx, struct dt_env* dtenv, comm_point_callback_type* cb,
-       void *cb_arg)
+       void* sslctx, struct dt_env* dtenv, struct doq_table* doq_table,
+       struct ub_randstate* rnd, const char* ssl_service_key,
+       const char* ssl_service_pem, struct config_file* cfg,
+       comm_point_callback_type* cb, void *cb_arg)
 {
        struct listen_dnsport* front = (struct listen_dnsport*)
                malloc(sizeof(struct listen_dnsport));
@@ -1471,6 +1550,16 @@ listen_create(struct comm_base* base, struct listen_port* ports,
                        cp = comm_point_create_udp(base, ports->fd,
                                front->udp_buff, ports->pp2_enabled, cb,
                                cb_arg, ports->socket);
+               } else if(ports->ftype == listen_type_doq) {
+#ifndef HAVE_NGTCP2
+                       log_warn("Unbound is not compiled with "
+                               "ngtcp2. This is required to use DNS "
+                               "over QUIC.");
+#endif
+                       cp = comm_point_create_doq(base, ports->fd,
+                               front->udp_buff, cb, cb_arg, ports->socket,
+                               doq_table, rnd, ssl_service_key,
+                               ssl_service_pem, cfg);
                } else if(ports->ftype == listen_type_tcp ||
                                ports->ftype == listen_type_tcp_dnscrypt) {
                        cp = comm_point_create_tcp(base, ports->fd,
@@ -1858,7 +1947,9 @@ listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs,
                                                reuseport, cfg->ip_transparent,
                                                cfg->tcp_mss, cfg->ip_freebind,
                                                cfg->http_nodelay, cfg->use_systemd,
-                                               cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) {
+                                               cfg->dnscrypt_port, cfg->ip_dscp,
+                                               cfg->quic_port, cfg->http_notls_downstream,
+                                               cfg->sock_queue_timeout)) {
                                                listening_ports_free(list);
                                                return NULL;
                                        }
@@ -1875,7 +1966,9 @@ listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs,
                                                reuseport, cfg->ip_transparent,
                                                cfg->tcp_mss, cfg->ip_freebind,
                                                cfg->http_nodelay, cfg->use_systemd,
-                                               cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) {
+                                               cfg->dnscrypt_port, cfg->ip_dscp,
+                                               cfg->quic_port, cfg->http_notls_downstream,
+                                               cfg->sock_queue_timeout)) {
                                                listening_ports_free(list);
                                                return NULL;
                                        }
@@ -1894,7 +1987,9 @@ listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs,
                                reuseport, cfg->ip_transparent,
                                cfg->tcp_mss, cfg->ip_freebind,
                                cfg->http_nodelay, cfg->use_systemd,
-                               cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) {
+                               cfg->dnscrypt_port, cfg->ip_dscp,
+                               cfg->quic_port, cfg->http_notls_downstream,
+                               cfg->sock_queue_timeout)) {
                                listening_ports_free(list);
                                return NULL;
                        }
@@ -1910,7 +2005,9 @@ listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs,
                                reuseport, cfg->ip_transparent,
                                cfg->tcp_mss, cfg->ip_freebind,
                                cfg->http_nodelay, cfg->use_systemd,
-                               cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) {
+                               cfg->dnscrypt_port, cfg->ip_dscp,
+                               cfg->quic_port, cfg->http_notls_downstream,
+                               cfg->sock_queue_timeout)) {
                                listening_ports_free(list);
                                return NULL;
                        }
@@ -1928,7 +2025,9 @@ listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs,
                                reuseport, cfg->ip_transparent,
                                cfg->tcp_mss, cfg->ip_freebind,
                                cfg->http_nodelay, cfg->use_systemd,
-                               cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) {
+                               cfg->dnscrypt_port, cfg->ip_dscp,
+                               cfg->quic_port, cfg->http_notls_downstream,
+                               cfg->sock_queue_timeout)) {
                                listening_ports_free(list);
                                return NULL;
                        }
@@ -1944,7 +2043,9 @@ listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs,
                                reuseport, cfg->ip_transparent,
                                cfg->tcp_mss, cfg->ip_freebind,
                                cfg->http_nodelay, cfg->use_systemd,
-                               cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) {
+                               cfg->dnscrypt_port, cfg->ip_dscp,
+                               cfg->quic_port, cfg->http_notls_downstream,
+                               cfg->sock_queue_timeout)) {
                                listening_ports_free(list);
                                return NULL;
                        }
@@ -3154,3 +3255,2365 @@ nghttp2_session_callbacks* http2_req_callbacks_create(void)
        return callbacks;
 }
 #endif /* HAVE_NGHTTP2 */
+
+#ifdef HAVE_NGTCP2
+struct doq_table*
+doq_table_create(struct config_file* cfg, struct ub_randstate* rnd)
+{
+       struct doq_table* table = calloc(1, sizeof(*table));
+       if(!table)
+               return NULL;
+       table->idle_timeout = ((uint64_t)cfg->tcp_idle_timeout)*
+               NGTCP2_MILLISECONDS;
+       table->sv_scidlen = 16;
+       table->static_secret_len = 16;
+       table->static_secret = malloc(table->static_secret_len);
+       if(!table->static_secret) {
+               free(table);
+               return NULL;
+       }
+       doq_fill_rand(rnd, table->static_secret, table->static_secret_len);
+       table->conn_tree = rbtree_create(doq_conn_cmp);
+       if(!table->conn_tree) {
+               free(table->static_secret);
+               free(table);
+               return NULL;
+       }
+       table->conid_tree = rbtree_create(doq_conid_cmp);
+       if(!table->conid_tree) {
+               free(table->static_secret);
+               free(table->conn_tree);
+               free(table);
+               return NULL;
+       }
+       table->timer_tree = rbtree_create(doq_timer_cmp);
+       if(!table->timer_tree) {
+               free(table->static_secret);
+               free(table->conn_tree);
+               free(table->conid_tree);
+               free(table);
+               return NULL;
+       }
+       lock_rw_init(&table->lock);
+       lock_rw_init(&table->conid_lock);
+       lock_basic_init(&table->size_lock);
+       lock_protect(&table->lock, &table->static_secret,
+               sizeof(table->static_secret));
+       lock_protect(&table->lock, &table->static_secret_len,
+               sizeof(table->static_secret_len));
+       lock_protect(&table->lock, table->static_secret,
+               table->static_secret_len);
+       lock_protect(&table->lock, &table->sv_scidlen,
+               sizeof(table->sv_scidlen));
+       lock_protect(&table->lock, &table->idle_timeout,
+               sizeof(table->idle_timeout));
+       lock_protect(&table->lock, &table->conn_tree, sizeof(table->conn_tree));
+       lock_protect(&table->lock, table->conn_tree, sizeof(*table->conn_tree));
+       lock_protect(&table->conid_lock, table->conid_tree,
+               sizeof(*table->conid_tree));
+       lock_protect(&table->lock, table->timer_tree,
+               sizeof(*table->timer_tree));
+       lock_protect(&table->size_lock, &table->current_size,
+               sizeof(table->current_size));
+       return table;
+}
+
+/** delete elements from the connection tree */
+static void
+conn_tree_del(rbnode_type* node, void* arg)
+{
+       struct doq_table* table = (struct doq_table*)arg;
+       struct doq_conn* conn;
+       if(!node)
+               return;
+       conn = (struct doq_conn*)node->key;
+       if(conn->timer.timer_in_list) {
+               /* Remove timer from list first, because finding the rbnode
+                * element of the setlist of same timeouts needs tree lookup.
+                * Edit the tree structure after that lookup. */
+               doq_timer_list_remove(conn->table, &conn->timer);
+       }
+       if(conn->timer.timer_in_tree)
+               doq_timer_tree_remove(conn->table, &conn->timer);
+       doq_table_quic_size_subtract(table, sizeof(*conn)+conn->key.dcidlen);
+       doq_conn_delete(conn, table);
+}
+
+/** delete elements from the connection id tree */
+static void
+conid_tree_del(rbnode_type* node, void* ATTR_UNUSED(arg))
+{
+       if(!node)
+               return;
+       doq_conid_delete((struct doq_conid*)node->key);
+}
+
+void
+doq_table_delete(struct doq_table* table)
+{
+       if(!table)
+               return;
+       lock_rw_destroy(&table->lock);
+       free(table->static_secret);
+       if(table->conn_tree) {
+               traverse_postorder(table->conn_tree, conn_tree_del, table);
+               free(table->conn_tree);
+       }
+       lock_rw_destroy(&table->conid_lock);
+       if(table->conid_tree) {
+               /* The tree should be empty, because the doq_conn_delete calls
+                * above should have also removed their conid elements. */
+               traverse_postorder(table->conid_tree, conid_tree_del, NULL);
+               free(table->conid_tree);
+       }
+       lock_basic_destroy(&table->size_lock);
+       if(table->timer_tree) {
+               /* The tree should be empty, because the conn_tree_del calls
+                * above should also have removed them. Also the doq_timer
+                * is part of the doq_conn struct, so is already freed. */
+               free(table->timer_tree);
+       }
+       table->write_list_first = NULL;
+       table->write_list_last = NULL;
+       free(table);
+}
+
+struct doq_timer*
+doq_timer_find_time(struct doq_table* table, struct timeval* tv)
+{
+       struct doq_timer key;
+       struct rbnode_type* node;
+       memset(&key, 0, sizeof(key));
+       key.time.tv_sec = tv->tv_sec;
+       key.time.tv_usec = tv->tv_usec;
+       node = rbtree_search(table->timer_tree, &key);
+       if(node)
+               return (struct doq_timer*)node->key;
+       return NULL;
+}
+
+void
+doq_timer_tree_remove(struct doq_table* table, struct doq_timer* timer)
+{
+       if(!timer->timer_in_tree)
+               return;
+       rbtree_delete(table->timer_tree, timer);
+       timer->timer_in_tree = 0;
+       /* This item could have more timers in the same set. */
+       if(timer->setlist_first) {
+               struct doq_timer* rb_timer = timer->setlist_first;
+               /* del first element from setlist */
+               if(rb_timer->setlist_next)
+                       rb_timer->setlist_next->setlist_prev = NULL;
+               else
+                       timer->setlist_last = NULL;
+               timer->setlist_first = rb_timer->setlist_next;
+               rb_timer->setlist_prev = NULL;
+               rb_timer->setlist_next = NULL;
+               rb_timer->timer_in_list = 0;
+               /* insert it into the tree as new rb element */
+               memset(&rb_timer->node, 0, sizeof(rb_timer->node));
+               rb_timer->node.key = rb_timer;
+               rbtree_insert(table->timer_tree, &rb_timer->node);
+               rb_timer->timer_in_tree = 1;
+               /* the setlist, if any remainder, moves to the rb element */
+               rb_timer->setlist_first = timer->setlist_first;
+               rb_timer->setlist_last = timer->setlist_last;
+               timer->setlist_first = NULL;
+               timer->setlist_last = NULL;
+               rb_timer->worker_doq_socket = timer->worker_doq_socket;
+       }
+       timer->worker_doq_socket = NULL;
+}
+
+void
+doq_timer_list_remove(struct doq_table* table, struct doq_timer* timer)
+{
+       struct doq_timer* rb_timer;
+       if(!timer->timer_in_list)
+               return;
+       /* The item in the rbtree has the list start and end. */
+       rb_timer = doq_timer_find_time(table, &timer->time);
+       if(rb_timer) {
+               if(timer->setlist_prev)
+                       timer->setlist_prev->setlist_next = timer->setlist_next;
+               else
+                       rb_timer->setlist_first = timer->setlist_next;
+               if(timer->setlist_next)
+                       timer->setlist_next->setlist_prev = timer->setlist_prev;
+               else
+                       rb_timer->setlist_last = timer->setlist_prev;
+               timer->setlist_prev = NULL;
+               timer->setlist_next = NULL;
+       }
+       timer->timer_in_list = 0;
+}
+
+/** doq append timer to setlist */
+static void
+doq_timer_list_append(struct doq_timer* rb_timer, struct doq_timer* timer)
+{
+       log_assert(timer->timer_in_list == 0);
+       timer->timer_in_list = 1;
+       timer->setlist_next = NULL;
+       timer->setlist_prev = rb_timer->setlist_last;
+       if(rb_timer->setlist_last)
+               rb_timer->setlist_last->setlist_next = timer;
+       else
+               rb_timer->setlist_first = timer;
+       rb_timer->setlist_last = timer;
+}
+
+void
+doq_timer_unset(struct doq_table* table, struct doq_timer* timer)
+{
+       if(timer->timer_in_list) {
+               /* Remove timer from list first, because finding the rbnode
+                * element of the setlist of same timeouts needs tree lookup.
+                * Edit the tree structure after that lookup. */
+               doq_timer_list_remove(table, timer);
+       }
+       if(timer->timer_in_tree)
+               doq_timer_tree_remove(table, timer);
+       timer->worker_doq_socket = NULL;
+}
+
+void doq_timer_set(struct doq_table* table, struct doq_timer* timer,
+       struct doq_server_socket* worker_doq_socket, struct timeval* tv)
+{
+       struct doq_timer* rb_timer;
+       if(verbosity >= VERB_ALGO && timer->conn) {
+               char a[256];
+               struct timeval rel;
+               addr_to_str((void*)&timer->conn->key.paddr.addr,
+                       timer->conn->key.paddr.addrlen, a, sizeof(a));
+               timeval_subtract(&rel, tv, worker_doq_socket->now_tv);
+               verbose(VERB_ALGO, "doq %s timer set %d.%6.6d in %d.%6.6d",
+                       a, (int)tv->tv_sec, (int)tv->tv_usec,
+                       (int)rel.tv_sec, (int)rel.tv_usec);
+       }
+       if(timer->timer_in_tree || timer->timer_in_list) {
+               if(timer->time.tv_sec == tv->tv_sec &&
+                       timer->time.tv_usec == tv->tv_usec)
+                       return; /* already set on that time */
+               doq_timer_unset(table, timer);
+       }
+       timer->time.tv_sec = tv->tv_sec;
+       timer->time.tv_usec = tv->tv_usec;
+       rb_timer = doq_timer_find_time(table, tv);
+       if(rb_timer) {
+               /* There is a timeout already with this value. Timer is
+                * added to the setlist. */
+               doq_timer_list_append(rb_timer, timer);
+       } else {
+               /* There is no timeout with this value. Make timer a new
+                * tree element. */
+               memset(&timer->node, 0, sizeof(timer->node));
+               timer->node.key = timer;
+               rbtree_insert(table->timer_tree, &timer->node);
+               timer->timer_in_tree = 1;
+               timer->setlist_first = NULL;
+               timer->setlist_last = NULL;
+               timer->worker_doq_socket = worker_doq_socket;
+       }
+}
+
+struct doq_conn*
+doq_conn_create(struct comm_point* c, struct doq_pkt_addr* paddr,
+       const uint8_t* dcid, size_t dcidlen, uint32_t version)
+{
+       struct doq_conn* conn = calloc(1, sizeof(*conn));
+       if(!conn)
+               return NULL;
+       conn->node.key = conn;
+       conn->doq_socket = c->doq_socket;
+       conn->table = c->doq_socket->table;
+       memmove(&conn->key.paddr.addr, &paddr->addr, paddr->addrlen);
+       conn->key.paddr.addrlen = paddr->addrlen;
+       memmove(&conn->key.paddr.localaddr, &paddr->localaddr,
+               paddr->localaddrlen);
+       conn->key.paddr.localaddrlen = paddr->localaddrlen;
+       conn->key.paddr.ifindex = paddr->ifindex;
+       conn->key.dcid = memdup((void*)dcid, dcidlen);
+       if(!conn->key.dcid) {
+               free(conn);
+               return NULL;
+       }
+       conn->key.dcidlen = dcidlen;
+       conn->version = version;
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+       ngtcp2_ccerr_default(&conn->ccerr);
+#else
+       ngtcp2_connection_close_error_default(&conn->last_error);
+#endif
+       rbtree_init(&conn->stream_tree, &doq_stream_cmp);
+       conn->timer.conn = conn;
+       lock_basic_init(&conn->lock);
+       lock_protect(&conn->lock, &conn->key, sizeof(conn->key));
+       lock_protect(&conn->lock, &conn->doq_socket, sizeof(conn->doq_socket));
+       lock_protect(&conn->lock, &conn->table, sizeof(conn->table));
+       lock_protect(&conn->lock, &conn->is_deleted, sizeof(conn->is_deleted));
+       lock_protect(&conn->lock, &conn->version, sizeof(conn->version));
+       lock_protect(&conn->lock, &conn->conn, sizeof(conn->conn));
+       lock_protect(&conn->lock, &conn->conid_list, sizeof(conn->conid_list));
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+       lock_protect(&conn->lock, &conn->ccerr, sizeof(conn->ccerr));
+#else
+       lock_protect(&conn->lock, &conn->last_error, sizeof(conn->last_error));
+#endif
+       lock_protect(&conn->lock, &conn->tls_alert, sizeof(conn->tls_alert));
+       lock_protect(&conn->lock, &conn->ssl, sizeof(conn->ssl));
+       lock_protect(&conn->lock, &conn->close_pkt, sizeof(conn->close_pkt));
+       lock_protect(&conn->lock, &conn->close_pkt_len, sizeof(conn->close_pkt_len));
+       lock_protect(&conn->lock, &conn->close_ecn, sizeof(conn->close_ecn));
+       lock_protect(&conn->lock, &conn->stream_tree, sizeof(conn->stream_tree));
+       lock_protect(&conn->lock, &conn->stream_write_first, sizeof(conn->stream_write_first));
+       lock_protect(&conn->lock, &conn->stream_write_last, sizeof(conn->stream_write_last));
+       lock_protect(&conn->lock, &conn->write_interest, sizeof(conn->write_interest));
+       lock_protect(&conn->lock, &conn->on_write_list, sizeof(conn->on_write_list));
+       lock_protect(&conn->lock, &conn->write_prev, sizeof(conn->write_prev));
+       lock_protect(&conn->lock, &conn->write_next, sizeof(conn->write_next));
+       return conn;
+}
+
+/** delete stream tree node */
+static void
+stream_tree_del(rbnode_type* node, void* arg)
+{
+       struct doq_table* table = (struct doq_table*)arg;
+       struct doq_stream* stream;
+       if(!node)
+               return;
+       stream = (struct doq_stream*)node;
+       if(stream->in)
+               doq_table_quic_size_subtract(table, stream->inlen);
+       if(stream->out)
+               doq_table_quic_size_subtract(table, stream->outlen);
+       doq_table_quic_size_subtract(table, sizeof(*stream));
+       doq_stream_delete(stream);
+}
+
+void
+doq_conn_delete(struct doq_conn* conn, struct doq_table* table)
+{
+       if(!conn)
+               return;
+       lock_basic_destroy(&conn->lock);
+       lock_rw_wrlock(&conn->table->conid_lock);
+       doq_conn_clear_conids(conn);
+       lock_rw_unlock(&conn->table->conid_lock);
+       ngtcp2_conn_del(conn->conn);
+       if(conn->stream_tree.count != 0) {
+               traverse_postorder(&conn->stream_tree, stream_tree_del, table);
+       }
+       free(conn->key.dcid);
+       SSL_free(conn->ssl);
+       free(conn->close_pkt);
+       free(conn);
+}
+
+int
+doq_conn_cmp(const void* key1, const void* key2)
+{
+       struct doq_conn* c = (struct doq_conn*)key1;
+       struct doq_conn* d = (struct doq_conn*)key2;
+       int r;
+       /* Compared in the order destination address, then
+        * local address, ifindex and then dcid.
+        * So that for a search for findlessorequal for the destination
+        * address will find connections to that address, with different
+        * dcids.
+        * Also a printout in sorted order prints the connections by IP
+        * address of destination, and then a number of them depending on the
+        * dcids. */
+       if(c->key.paddr.addrlen != d->key.paddr.addrlen) {
+               if(c->key.paddr.addrlen < d->key.paddr.addrlen)
+                       return -1;
+               return 1;
+       }
+       if((r=memcmp(&c->key.paddr.addr, &d->key.paddr.addr,
+               c->key.paddr.addrlen))!=0)
+               return r;
+       if(c->key.paddr.localaddrlen != d->key.paddr.localaddrlen) {
+               if(c->key.paddr.localaddrlen < d->key.paddr.localaddrlen)
+                       return -1;
+               return 1;
+       }
+       if((r=memcmp(&c->key.paddr.localaddr, &d->key.paddr.localaddr,
+               c->key.paddr.localaddrlen))!=0)
+               return r;
+       if(c->key.paddr.ifindex != d->key.paddr.ifindex) {
+               if(c->key.paddr.ifindex < d->key.paddr.ifindex)
+                       return -1;
+               return 1;
+       }
+       if(c->key.dcidlen != d->key.dcidlen) {
+               if(c->key.dcidlen < d->key.dcidlen)
+                       return -1;
+               return 1;
+       }
+       if((r=memcmp(c->key.dcid, d->key.dcid, c->key.dcidlen))!=0)
+               return r;
+       return 0;
+}
+
+int doq_conid_cmp(const void* key1, const void* key2)
+{
+       struct doq_conid* c = (struct doq_conid*)key1;
+       struct doq_conid* d = (struct doq_conid*)key2;
+       if(c->cidlen != d->cidlen) {
+               if(c->cidlen < d->cidlen)
+                       return -1;
+               return 1;
+       }
+       return memcmp(c->cid, d->cid, c->cidlen);
+}
+
+int doq_timer_cmp(const void* key1, const void* key2)
+{
+       struct doq_timer* e = (struct doq_timer*)key1;
+       struct doq_timer* f = (struct doq_timer*)key2;
+       if(e->time.tv_sec < f->time.tv_sec)
+               return -1;
+       if(e->time.tv_sec > f->time.tv_sec)
+               return 1;
+       if(e->time.tv_usec < f->time.tv_usec)
+               return -1;
+       if(e->time.tv_usec > f->time.tv_usec)
+               return 1;
+       return 0;
+}
+
+int doq_stream_cmp(const void* key1, const void* key2)
+{
+       struct doq_stream* c = (struct doq_stream*)key1;
+       struct doq_stream* d = (struct doq_stream*)key2;
+       if(c->stream_id != d->stream_id) {
+               if(c->stream_id < d->stream_id)
+                       return -1;
+               return 1;
+       }
+       return 0;
+}
+
+/** doq store a local address in repinfo */
+static void
+doq_repinfo_store_localaddr(struct comm_reply* repinfo,
+       struct doq_addr_storage* localaddr, socklen_t localaddrlen)
+{
+       /* use the pktinfo that we have for ancillary udp data otherwise,
+        * this saves space for a sockaddr */
+       memset(&repinfo->pktinfo, 0, sizeof(repinfo->pktinfo));
+       if(addr_is_ip6((void*)localaddr, localaddrlen)) {
+#ifdef IPV6_PKTINFO
+               struct sockaddr_in6* sa6 = (struct sockaddr_in6*)localaddr;
+               memmove(&repinfo->pktinfo.v6info.ipi6_addr,
+                       &sa6->sin6_addr, sizeof(struct in6_addr));
+               repinfo->doq_srcport = sa6->sin6_port;
+#endif
+               repinfo->srctype = 6;
+       } else {
+#ifdef IP_PKTINFO
+               struct sockaddr_in* sa = (struct sockaddr_in*)localaddr;
+               memmove(&repinfo->pktinfo.v4info.ipi_addr,
+                       &sa->sin_addr, sizeof(struct in_addr));
+               repinfo->doq_srcport = sa->sin_port;
+#elif defined(IP_RECVDSTADDR)
+               struct sockaddr_in* sa = (struct sockaddr_in*)localaddr;
+               memmove(&repinfo->pktinfo.v4addr, &sa->sin_addr,
+                       sizeof(struct in_addr));
+               repinfo->doq_srcport = sa->sin_port;
+#endif
+               repinfo->srctype = 4;
+       }
+}
+
+/** doq retrieve localaddr from repinfo */
+static void
+doq_repinfo_retrieve_localaddr(struct comm_reply* repinfo,
+       struct doq_addr_storage* localaddr, socklen_t* localaddrlen)
+{
+       if(repinfo->srctype == 6) {
+#ifdef IPV6_PKTINFO
+               struct sockaddr_in6* sa6 = (struct sockaddr_in6*)localaddr;
+               *localaddrlen = (socklen_t)sizeof(struct sockaddr_in6);
+               memset(sa6, 0, *localaddrlen);
+               sa6->sin6_family = AF_INET6;
+               memmove(&sa6->sin6_addr, &repinfo->pktinfo.v6info.ipi6_addr,
+                       *localaddrlen);
+               sa6->sin6_port = repinfo->doq_srcport;
+#endif
+       } else {
+#ifdef IP_PKTINFO
+               struct sockaddr_in* sa = (struct sockaddr_in*)localaddr;
+               *localaddrlen = (socklen_t)sizeof(struct sockaddr_in);
+               memset(sa, 0, *localaddrlen);
+               sa->sin_family = AF_INET;
+               memmove(&sa->sin_addr, &repinfo->pktinfo.v4info.ipi_addr,
+                       *localaddrlen);
+               sa->sin_port = repinfo->doq_srcport;
+#elif defined(IP_RECVDSTADDR)
+               struct sockaddr_in* sa = (struct sockaddr_in*)localaddr;
+               *localaddrlen = (socklen_t)sizeof(struct sockaddr_in);
+               memset(sa, 0, *localaddrlen);
+               sa->sin_family = AF_INET;
+               memmove(&sa->sin_addr, &repinfo->pktinfo.v4addr,
+                       sizeof(struct in_addr));
+               sa->sin_port = repinfo->doq_srcport;
+#endif
+       }
+}
+
+/** doq write a connection key into repinfo, false if it does not fit */
+static int
+doq_conn_key_store_repinfo(struct doq_conn_key* key,
+       struct comm_reply* repinfo)
+{
+       repinfo->is_proxied = 0;
+       repinfo->doq_ifindex = key->paddr.ifindex;
+       repinfo->remote_addrlen = key->paddr.addrlen;
+       memmove(&repinfo->remote_addr, &key->paddr.addr,
+               repinfo->remote_addrlen);
+       repinfo->client_addrlen = key->paddr.addrlen;
+       memmove(&repinfo->client_addr, &key->paddr.addr,
+               repinfo->client_addrlen);
+       doq_repinfo_store_localaddr(repinfo, &key->paddr.localaddr,
+               key->paddr.localaddrlen);
+       if(key->dcidlen > sizeof(repinfo->doq_dcid))
+               return 0;
+       repinfo->doq_dcidlen = key->dcidlen;
+       memmove(repinfo->doq_dcid, key->dcid, key->dcidlen);
+       return 1;
+}
+
+void
+doq_conn_key_from_repinfo(struct doq_conn_key* key, struct comm_reply* repinfo)
+{
+       key->paddr.ifindex = repinfo->doq_ifindex;
+       key->paddr.addrlen = repinfo->remote_addrlen;
+       memmove(&key->paddr.addr, &repinfo->remote_addr,
+               repinfo->remote_addrlen);
+       doq_repinfo_retrieve_localaddr(repinfo, &key->paddr.localaddr,
+               &key->paddr.localaddrlen);
+       key->dcidlen = repinfo->doq_dcidlen;
+       key->dcid = repinfo->doq_dcid;
+}
+
+/** doq add a stream to the connection */
+static void
+doq_conn_add_stream(struct doq_conn* conn, struct doq_stream* stream)
+{
+       (void)rbtree_insert(&conn->stream_tree, &stream->node);
+}
+
+/** doq delete a stream from the connection */
+static void
+doq_conn_del_stream(struct doq_conn* conn, struct doq_stream* stream)
+{
+       (void)rbtree_delete(&conn->stream_tree, &stream->node);
+}
+
+/** doq create new stream */
+static struct doq_stream*
+doq_stream_create(int64_t stream_id)
+{
+       struct doq_stream* stream = calloc(1, sizeof(*stream));
+       if(!stream)
+               return NULL;
+       stream->node.key = stream;
+       stream->stream_id = stream_id;
+       return stream;
+}
+
+void doq_stream_delete(struct doq_stream* stream)
+{
+       if(!stream)
+               return;
+       free(stream->in);
+       free(stream->out);
+       free(stream);
+}
+
+struct doq_stream*
+doq_stream_find(struct doq_conn* conn, int64_t stream_id)
+{
+       rbnode_type* node;
+       struct doq_stream key;
+       key.node.key = &key;
+       key.stream_id = stream_id;
+       node = rbtree_search(&conn->stream_tree, &key);
+       if(node)
+               return (struct doq_stream*)node->key;
+       return NULL;
+}
+
+/** doq put stream on the conn write list */
+static void
+doq_stream_on_write_list(struct doq_conn* conn, struct doq_stream* stream)
+{
+       if(stream->on_write_list)
+               return;
+       stream->write_prev = conn->stream_write_last;
+       if(conn->stream_write_last)
+               conn->stream_write_last->write_next = stream;
+       else
+               conn->stream_write_first = stream;
+       conn->stream_write_last = stream;
+       stream->write_next = NULL;
+       stream->on_write_list = 1;
+}
+
+/** doq remove stream from the conn write list */
+static void
+doq_stream_off_write_list(struct doq_conn* conn, struct doq_stream* stream)
+{
+       if(!stream->on_write_list)
+               return;
+       if(stream->write_next)
+               stream->write_next->write_prev = stream->write_prev;
+       else conn->stream_write_last = stream->write_prev;
+       if(stream->write_prev)
+               stream->write_prev->write_next = stream->write_next;
+       else conn->stream_write_first = stream->write_next;
+       stream->write_prev = NULL;
+       stream->write_next = NULL;
+       stream->on_write_list = 0;
+}
+
+/** doq stream remove in buffer */
+static void
+doq_stream_remove_in_buffer(struct doq_stream* stream, struct doq_table* table)
+{
+       if(stream->in) {
+               doq_table_quic_size_subtract(table, stream->inlen);
+               free(stream->in);
+               stream->in = NULL;
+               stream->inlen = 0;
+       }
+}
+
+/** doq stream remove out buffer */
+static void
+doq_stream_remove_out_buffer(struct doq_stream* stream,
+       struct doq_table* table)
+{
+       if(stream->out) {
+               doq_table_quic_size_subtract(table, stream->outlen);
+               free(stream->out);
+               stream->out = NULL;
+               stream->outlen = 0;
+       }
+}
+
+int
+doq_stream_close(struct doq_conn* conn, struct doq_stream* stream,
+       int send_shutdown)
+{
+       int ret;
+       if(stream->is_closed)
+               return 1;
+       stream->is_closed = 1;
+       doq_stream_off_write_list(conn, stream);
+       if(send_shutdown) {
+               verbose(VERB_ALGO, "doq: shutdown stream_id %d with app_error_code %d",
+                       (int)stream->stream_id, (int)DOQ_APP_ERROR_CODE);
+               ret = ngtcp2_conn_shutdown_stream(conn->conn,
+#ifdef HAVE_NGTCP2_CONN_SHUTDOWN_STREAM4
+                       0,
+#endif
+                       stream->stream_id, DOQ_APP_ERROR_CODE);
+               if(ret != 0) {
+                       log_err("doq ngtcp2_conn_shutdown_stream %d failed: %s",
+                               (int)stream->stream_id, ngtcp2_strerror(ret));
+                       return 0;
+               }
+               doq_conn_write_enable(conn);
+       }
+       doq_stream_remove_in_buffer(stream, conn->doq_socket->table);
+       doq_stream_remove_out_buffer(stream, conn->doq_socket->table);
+       doq_table_quic_size_subtract(conn->doq_socket->table, sizeof(*stream));
+       doq_conn_del_stream(conn, stream);
+       doq_stream_delete(stream);
+       return 1;
+}
+
+/** doq stream pick up answer data from buffer */
+static int
+doq_stream_pickup_answer(struct doq_stream* stream, struct sldns_buffer* buf)
+{
+       stream->is_answer_available = 1;
+       if(stream->out) {
+               free(stream->out);
+               stream->out = NULL;
+               stream->outlen = 0;
+       }
+       stream->nwrite = 0;
+       stream->outlen = sldns_buffer_limit(buf);
+       /* For quic the output bytes have to stay allocated and available,
+        * for potential resends, until the remote end has acknowledged them.
+        * This includes the tcplen start uint16_t, in outlen_wire. */
+       stream->outlen_wire = htons(stream->outlen);
+       stream->out = memdup(sldns_buffer_begin(buf), sldns_buffer_limit(buf));
+       if(!stream->out) {
+               log_err("doq could not send answer: out of memory");
+               return 0;
+       }
+       return 1;
+}
+
+int
+doq_stream_send_reply(struct doq_conn* conn, struct doq_stream* stream,
+       struct sldns_buffer* buf)
+{
+       if(verbosity >= VERB_ALGO) {
+               char* s = sldns_wire2str_pkt(sldns_buffer_begin(buf),
+                       sldns_buffer_limit(buf));
+               verbose(VERB_ALGO, "doq stream %d response\n%s",
+                       (int)stream->stream_id, (s?s:"null"));
+               free(s);
+       }
+       if(stream->out)
+               doq_table_quic_size_subtract(conn->doq_socket->table,
+                       stream->outlen);
+       if(!doq_stream_pickup_answer(stream, buf))
+               return 0;
+       doq_table_quic_size_add(conn->doq_socket->table, stream->outlen);
+       doq_stream_on_write_list(conn, stream);
+       doq_conn_write_enable(conn);
+       return 1;
+}
+
+/** doq stream data length has completed, allocations can be done. False on
+ * allocation failure. */
+static int
+doq_stream_datalen_complete(struct doq_stream* stream, struct doq_table* table)
+{
+       if(stream->inlen > 1024*1024) {
+               log_err("doq stream in length too large %d",
+                       (int)stream->inlen);
+               return 0;
+       }
+       stream->in = calloc(1, stream->inlen);
+       if(!stream->in) {
+               log_err("doq could not read stream, calloc failed: "
+                       "out of memory");
+               return 0;
+       }
+       doq_table_quic_size_add(table, stream->inlen);
+       return 1;
+}
+
+/** doq stream data is complete, the input data has been received. */
+static int
+doq_stream_data_complete(struct doq_conn* conn, struct doq_stream* stream)
+{
+       struct comm_point* c;
+       if(verbosity >= VERB_ALGO) {
+               char* s = sldns_wire2str_pkt(stream->in, stream->inlen);
+               char a[128];
+               addr_to_str((void*)&conn->key.paddr.addr,
+                       conn->key.paddr.addrlen, a, sizeof(a));
+               verbose(VERB_ALGO, "doq %s stream %d incoming query\n%s",
+                       a, (int)stream->stream_id, (s?s:"null"));
+               free(s);
+       }
+       stream->is_query_complete = 1;
+       c = conn->doq_socket->cp;
+       if(!stream->in) {
+               verbose(VERB_ALGO, "doq_stream_data_complete: no in buffer");
+               return 0;
+       }
+       if(stream->inlen > sldns_buffer_capacity(c->buffer)) {
+               verbose(VERB_ALGO, "doq_stream_data_complete: query too long");
+               return 0;
+       }
+       sldns_buffer_clear(c->buffer);
+       sldns_buffer_write(c->buffer, stream->in, stream->inlen);
+       sldns_buffer_flip(c->buffer);
+       c->repinfo.c = c;
+       if(!doq_conn_key_store_repinfo(&conn->key, &c->repinfo)) {
+               verbose(VERB_ALGO, "doq_stream_data_complete: connection "
+                       "DCID too long");
+               return 0;
+       }
+       c->repinfo.doq_streamid = stream->stream_id;
+       conn->doq_socket->current_conn = conn;
+       fptr_ok(fptr_whitelist_comm_point(c->callback));
+       if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo)) {
+               conn->doq_socket->current_conn = NULL;
+               if(!doq_stream_send_reply(conn, stream, c->buffer)) {
+                       verbose(VERB_ALGO, "doq: failed to send_reply");
+                       return 0;
+               }
+               return 1;
+       }
+       conn->doq_socket->current_conn = NULL;
+       return 1;
+}
+
+/** doq receive data for a stream, more bytes of the incoming data */
+static int
+doq_stream_recv_data(struct doq_stream* stream, const uint8_t* data,
+       size_t datalen, int* recv_done, struct doq_table* table)
+{
+       int got_data = 0;
+       /* read the tcplength uint16_t at the start */
+       if(stream->nread < 2) {
+               uint16_t tcplen = 0;
+               size_t todolen = 2 - stream->nread;
+
+               if(stream->nread > 0) {
+                       /* put in the already read byte if there is one */
+                       tcplen = stream->inlen;
+               }
+               if(datalen < todolen)
+                       todolen = datalen;
+               memmove(((uint8_t*)&tcplen)+stream->nread, data, todolen);
+               stream->nread += todolen;
+               data += todolen;
+               datalen -= todolen;
+               if(stream->nread == 2) {
+                       /* the initial length value is completed */
+                       stream->inlen = ntohs(tcplen);
+                       if(!doq_stream_datalen_complete(stream, table))
+                               return 0;
+               } else {
+                       /* store for later */
+                       stream->inlen = tcplen;
+                       return 1;
+               }
+       }
+       /* if there are more data bytes */
+       if(datalen > 0) {
+               size_t to_write = datalen;
+               if(stream->nread-2 > stream->inlen) {
+                       verbose(VERB_ALGO, "doq stream buffer too small");
+                       return 0;
+               }
+               if(datalen > stream->inlen - (stream->nread-2))
+                       to_write = stream->inlen - (stream->nread-2);
+               if(to_write > 0) {
+                       if(!stream->in) {
+                               verbose(VERB_ALGO, "doq: stream has "
+                                       "no buffer");
+                               return 0;
+                       }
+                       memmove(stream->in+(stream->nread-2), data, to_write);
+                       stream->nread += to_write;
+                       data += to_write;
+                       datalen -= to_write;
+                       got_data = 1;
+               }
+       }
+       /* Are there extra bytes received after the end? If so, log them. */
+       if(datalen > 0) {
+               if(verbosity >= VERB_ALGO)
+                       log_hex("doq stream has extra bytes received after end",
+                               (void*)data, datalen);
+       }
+       /* Is the input data complete? */
+       if(got_data && stream->nread >= stream->inlen+2) {
+               if(!stream->in) {
+                       verbose(VERB_ALGO, "doq: completed stream has "
+                               "no buffer");
+                       return 0;
+               }
+               *recv_done = 1;
+       }
+       return 1;
+}
+
+/** doq receive FIN for a stream. No more bytes are going to arrive. */
+static int
+doq_stream_recv_fin(struct doq_conn* conn, struct doq_stream* stream, int
+       recv_done)
+{
+       if(!stream->is_query_complete && !recv_done) {
+               verbose(VERB_ALGO, "doq: stream recv FIN, but is "
+                       "not complete, have %d of %d bytes",
+                       ((int)stream->nread)-2, (int)stream->inlen);
+               if(!doq_stream_close(conn, stream, 1))
+                       return 0;
+       }
+       return 1;
+}
+
+void doq_fill_rand(struct ub_randstate* rnd, uint8_t* buf, size_t len)
+{
+       size_t i;
+       for(i=0; i<len; i++)
+               buf[i] = ub_random(rnd)&0xff;
+}
+
+/** generate new connection id, checks for duplicates.
+ * caller must hold lock on conid tree. */
+static int
+doq_conn_generate_new_conid(struct doq_conn* conn, uint8_t* data,
+       size_t datalen)
+{
+       int max_try = 100;
+       int i;
+       for(i=0; i<max_try; i++) {
+               doq_fill_rand(conn->doq_socket->rnd, data, datalen);
+               if(!doq_conid_find(conn->table, data, datalen)) {
+                       /* Found an unused connection id. */
+                       return 1;
+               }
+       }
+       verbose(VERB_ALGO, "doq_conn_generate_new_conid failed: could not "
+               "generate random unused connection id value in %d attempts.",
+               max_try);
+       return 0;
+}
+
+/** ngtcp2 rand callback function */
+static void
+doq_rand_cb(uint8_t* dest, size_t destlen, const ngtcp2_rand_ctx* rand_ctx)
+{
+       struct ub_randstate* rnd = (struct ub_randstate*)
+               rand_ctx->native_handle;
+       doq_fill_rand(rnd, dest, destlen);
+}
+
+/** ngtcp2 get_new_connection_id callback function */
+static int
+doq_get_new_connection_id_cb(ngtcp2_conn* ATTR_UNUSED(conn), ngtcp2_cid* cid,
+       uint8_t* token, size_t cidlen, void* user_data)
+{
+       struct doq_conn* doq_conn = (struct doq_conn*)user_data;
+       /* Lock the conid tree, so we can check for duplicates while
+        * generating the id, and then insert it, whilst keeping the tree
+        * locked against other modifications, guaranteeing uniqueness. */
+       lock_rw_wrlock(&doq_conn->table->conid_lock);
+       if(!doq_conn_generate_new_conid(doq_conn, cid->data, cidlen)) {
+               lock_rw_unlock(&doq_conn->table->conid_lock);
+               return NGTCP2_ERR_CALLBACK_FAILURE;
+       }
+       cid->datalen = cidlen;
+       if(ngtcp2_crypto_generate_stateless_reset_token(token,
+               doq_conn->doq_socket->static_secret,
+               doq_conn->doq_socket->static_secret_len, cid) != 0) {
+               lock_rw_unlock(&doq_conn->table->conid_lock);
+               return NGTCP2_ERR_CALLBACK_FAILURE;
+       }
+       if(!doq_conn_associate_conid(doq_conn, cid->data, cid->datalen)) {
+               lock_rw_unlock(&doq_conn->table->conid_lock);
+               return NGTCP2_ERR_CALLBACK_FAILURE;
+       }
+       lock_rw_unlock(&doq_conn->table->conid_lock);
+       return 0;
+}
+
+/** ngtcp2 remove_connection_id callback function */
+static int
+doq_remove_connection_id_cb(ngtcp2_conn* ATTR_UNUSED(conn),
+       const ngtcp2_cid* cid, void* user_data)
+{
+       struct doq_conn* doq_conn = (struct doq_conn*)user_data;
+       lock_rw_wrlock(&doq_conn->table->conid_lock);
+       doq_conn_dissociate_conid(doq_conn, cid->data, cid->datalen);
+       lock_rw_unlock(&doq_conn->table->conid_lock);
+       return 0;
+}
+
+/** doq submit a new token */
+static int
+doq_submit_new_token(struct doq_conn* conn)
+{
+       uint8_t token[NGTCP2_CRYPTO_MAX_REGULAR_TOKENLEN];
+       ngtcp2_ssize tokenlen;
+       int ret;
+       const ngtcp2_path* path = ngtcp2_conn_get_path(conn->conn);
+       ngtcp2_tstamp ts = doq_get_timestamp_nanosec();
+
+       tokenlen = ngtcp2_crypto_generate_regular_token(token,
+               conn->doq_socket->static_secret,
+               conn->doq_socket->static_secret_len, path->remote.addr,
+               path->remote.addrlen, ts);
+       if(tokenlen < 0) {
+               log_err("doq ngtcp2_crypto_generate_regular_token failed");
+               return 1;
+       }
+
+       verbose(VERB_ALGO, "doq submit new token");
+       ret = ngtcp2_conn_submit_new_token(conn->conn, token, tokenlen);
+       if(ret != 0) {
+               log_err("doq ngtcp2_conn_submit_new_token failed: %s",
+                       ngtcp2_strerror(ret));
+               return 0;
+       }
+       return 1;
+}
+
+/** ngtcp2 handshake_completed callback function */
+static int
+doq_handshake_completed_cb(ngtcp2_conn* ATTR_UNUSED(conn), void* user_data)
+{
+       struct doq_conn* doq_conn = (struct doq_conn*)user_data;
+       verbose(VERB_ALGO, "doq handshake_completed callback");
+       verbose(VERB_ALGO, "ngtcp2_conn_get_max_data_left is %d",
+               (int)ngtcp2_conn_get_max_data_left(doq_conn->conn));
+#ifdef HAVE_NGTCP2_CONN_GET_MAX_LOCAL_STREAMS_UNI
+       verbose(VERB_ALGO, "ngtcp2_conn_get_max_local_streams_uni is %d",
+               (int)ngtcp2_conn_get_max_local_streams_uni(doq_conn->conn));
+#endif
+       verbose(VERB_ALGO, "ngtcp2_conn_get_streams_uni_left is %d",
+               (int)ngtcp2_conn_get_streams_uni_left(doq_conn->conn));
+       verbose(VERB_ALGO, "ngtcp2_conn_get_streams_bidi_left is %d",
+               (int)ngtcp2_conn_get_streams_bidi_left(doq_conn->conn));
+       verbose(VERB_ALGO, "negotiated cipher name is %s",
+               SSL_get_cipher_name(doq_conn->ssl));
+       if(verbosity > VERB_ALGO) {
+               const unsigned char* alpn = NULL;
+               unsigned int alpnlen = 0;
+               char alpnstr[128];
+               SSL_get0_alpn_selected(doq_conn->ssl, &alpn, &alpnlen);
+               if(alpnlen > sizeof(alpnstr)-1)
+                       alpnlen = sizeof(alpnstr)-1;
+               memmove(alpnstr, alpn, alpnlen);
+               alpnstr[alpnlen]=0;
+               verbose(VERB_ALGO, "negotiated ALPN is '%s'", alpnstr);
+       }
+
+       if(!doq_submit_new_token(doq_conn))
+               return -1;
+       return 0;
+}
+
+/** ngtcp2 stream_open callback function */
+static int
+doq_stream_open_cb(ngtcp2_conn* ATTR_UNUSED(conn), int64_t stream_id,
+       void* user_data)
+{
+       struct doq_conn* doq_conn = (struct doq_conn*)user_data;
+       struct doq_stream* stream;
+       verbose(VERB_ALGO, "doq new stream %x", (int)stream_id);
+       if(doq_stream_find(doq_conn, stream_id)) {
+               verbose(VERB_ALGO, "doq: stream with this id already exists");
+               return 0;
+       }
+       if(stream_id != 0 && stream_id != 4 && /* allow one stream on a new connection */
+               !doq_table_quic_size_available(doq_conn->doq_socket->table,
+               doq_conn->doq_socket->cfg, sizeof(*stream)
+               + 100 /* estimated query in */
+               + 512 /* estimated response out */
+               )) {
+               int rv;
+               verbose(VERB_ALGO, "doq: no mem for new stream");
+               rv = ngtcp2_conn_shutdown_stream(doq_conn->conn,
+#ifdef HAVE_NGTCP2_CONN_SHUTDOWN_STREAM4
+                       0,
+#endif
+                       stream_id, NGTCP2_CONNECTION_REFUSED);
+               if(rv != 0) {
+                       log_err("ngtcp2_conn_shutdown_stream failed: %s",
+                               ngtcp2_strerror(rv));
+                       return NGTCP2_ERR_CALLBACK_FAILURE;
+               }
+               return 0;
+       }
+       stream = doq_stream_create(stream_id);
+       if(!stream) {
+               log_err("doq: could not doq_stream_create: out of memory");
+               return NGTCP2_ERR_CALLBACK_FAILURE;
+       }
+       doq_table_quic_size_add(doq_conn->doq_socket->table, sizeof(*stream));
+       doq_conn_add_stream(doq_conn, stream);
+       return 0;
+}
+
+/** ngtcp2 recv_stream_data callback function */
+static int
+doq_recv_stream_data_cb(ngtcp2_conn* ATTR_UNUSED(conn), uint32_t flags,
+       int64_t stream_id, uint64_t offset, const uint8_t* data,
+       size_t datalen, void* user_data, void* ATTR_UNUSED(stream_user_data))
+{
+       int recv_done = 0;
+       struct doq_conn* doq_conn = (struct doq_conn*)user_data;
+       struct doq_stream* stream;
+       verbose(VERB_ALGO, "doq recv stream data stream id %d offset %d "
+               "datalen %d%s%s", (int)stream_id, (int)offset, (int)datalen,
+               ((flags&NGTCP2_STREAM_DATA_FLAG_FIN)!=0?" FIN":""),
+#ifdef NGTCP2_STREAM_DATA_FLAG_0RTT
+               ((flags&NGTCP2_STREAM_DATA_FLAG_0RTT)!=0?" 0RTT":"")
+#else
+               ((flags&NGTCP2_STREAM_DATA_FLAG_EARLY)!=0?" EARLY":"")
+#endif
+               );
+       stream = doq_stream_find(doq_conn, stream_id);
+       if(!stream) {
+               verbose(VERB_ALGO, "doq: received stream data for "
+                       "unknown stream %d", (int)stream_id);
+               return 0;
+       }
+       if(stream->is_closed) {
+               verbose(VERB_ALGO, "doq: stream is closed, ignore recv data");
+               return 0;
+       }
+       if(datalen != 0) {
+               if(!doq_stream_recv_data(stream, data, datalen, &recv_done,
+                       doq_conn->doq_socket->table))
+                       return NGTCP2_ERR_CALLBACK_FAILURE;
+       }
+       if((flags&NGTCP2_STREAM_DATA_FLAG_FIN)!=0) {
+               if(!doq_stream_recv_fin(doq_conn, stream, recv_done))
+                       return NGTCP2_ERR_CALLBACK_FAILURE;
+       }
+       ngtcp2_conn_extend_max_stream_offset(doq_conn->conn, stream_id,
+               datalen);
+       ngtcp2_conn_extend_max_offset(doq_conn->conn, datalen);
+       if(recv_done) {
+               if(!doq_stream_data_complete(doq_conn, stream))
+                       return NGTCP2_ERR_CALLBACK_FAILURE;
+       }
+       return 0;
+}
+
+/** ngtcp2 stream_close callback function */
+static int
+doq_stream_close_cb(ngtcp2_conn* ATTR_UNUSED(conn), uint32_t flags,
+       int64_t stream_id, uint64_t app_error_code, void* user_data,
+       void* ATTR_UNUSED(stream_user_data))
+{
+       struct doq_conn* doq_conn = (struct doq_conn*)user_data;
+       struct doq_stream* stream;
+       if((flags&NGTCP2_STREAM_CLOSE_FLAG_APP_ERROR_CODE_SET)!=0)
+               verbose(VERB_ALGO, "doq stream close for stream id %d %sapp_error_code %d",
+               (int)stream_id,
+               (((flags&NGTCP2_STREAM_CLOSE_FLAG_APP_ERROR_CODE_SET)!=0)?
+               "APP_ERROR_CODE_SET ":""),
+               (int)app_error_code);
+       else
+               verbose(VERB_ALGO, "doq stream close for stream id %d",
+                       (int)stream_id);
+
+       stream = doq_stream_find(doq_conn, stream_id);
+       if(!stream) {
+               verbose(VERB_ALGO, "doq: stream close for "
+                       "unknown stream %d", (int)stream_id);
+               return 0;
+       }
+       if(!doq_stream_close(doq_conn, stream, 0))
+               return NGTCP2_ERR_CALLBACK_FAILURE;
+       return 0;
+}
+
+/** ngtcp2 stream_reset callback function */
+static int
+doq_stream_reset_cb(ngtcp2_conn* ATTR_UNUSED(conn), int64_t stream_id,
+       uint64_t final_size, uint64_t app_error_code, void* user_data,
+       void* ATTR_UNUSED(stream_user_data))
+{
+       struct doq_conn* doq_conn = (struct doq_conn*)user_data;
+       struct doq_stream* stream;
+       verbose(VERB_ALGO, "doq stream reset for stream id %d final_size %d "
+               "app_error_code %d", (int)stream_id, (int)final_size,
+               (int)app_error_code);
+
+       stream = doq_stream_find(doq_conn, stream_id);
+       if(!stream) {
+               verbose(VERB_ALGO, "doq: stream reset for "
+                       "unknown stream %d", (int)stream_id);
+               return 0;
+       }
+       if(!doq_stream_close(doq_conn, stream, 0))
+               return NGTCP2_ERR_CALLBACK_FAILURE;
+       return 0;
+}
+
+/** ngtcp2 acked_stream_data_offset callback function */
+static int
+doq_acked_stream_data_offset_cb(ngtcp2_conn* ATTR_UNUSED(conn),
+       int64_t stream_id, uint64_t offset, uint64_t datalen, void* user_data,
+       void* ATTR_UNUSED(stream_user_data))
+{
+       struct doq_conn* doq_conn = (struct doq_conn*)user_data;
+       struct doq_stream* stream;
+       verbose(VERB_ALGO, "doq stream acked data for stream id %d offset %d "
+               "datalen %d", (int)stream_id, (int)offset, (int)datalen);
+
+       stream = doq_stream_find(doq_conn, stream_id);
+       if(!stream) {
+               verbose(VERB_ALGO, "doq: stream acked data for "
+                       "unknown stream %d", (int)stream_id);
+               return 0;
+       }
+       /* Acked the data from [offset .. offset+datalen). */
+       if(stream->is_closed)
+               return 0;
+       if(offset+datalen >= stream->outlen) {
+               doq_stream_remove_in_buffer(stream,
+                       doq_conn->doq_socket->table);
+               doq_stream_remove_out_buffer(stream,
+                       doq_conn->doq_socket->table);
+       }
+       return 0;
+}
+
+/** ngtc2p log_printf callback function */
+static void
+doq_log_printf_cb(void* ATTR_UNUSED(user_data), const char* fmt, ...)
+{
+       char buf[1024];
+       va_list ap;
+       va_start(ap, fmt);
+       vsnprintf(buf, sizeof(buf), fmt, ap);
+       verbose(VERB_ALGO, "libngtcp2: %s", buf);
+       va_end(ap);
+}
+
+#ifndef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT
+/** the doq application tx key callback, false on failure */
+static int
+doq_application_tx_key_cb(struct doq_conn* conn)
+{
+       verbose(VERB_ALGO, "doq application tx key cb");
+       /* The server does not want to open streams to the client,
+        * the client instead initiates by opening bidi streams. */
+       verbose(VERB_ALGO, "doq ngtcp2_conn_get_max_data_left is %d",
+               (int)ngtcp2_conn_get_max_data_left(conn->conn));
+#ifdef HAVE_NGTCP2_CONN_GET_MAX_LOCAL_STREAMS_UNI
+       verbose(VERB_ALGO, "doq ngtcp2_conn_get_max_local_streams_uni is %d",
+               (int)ngtcp2_conn_get_max_local_streams_uni(conn->conn));
+#endif
+       verbose(VERB_ALGO, "doq ngtcp2_conn_get_streams_uni_left is %d",
+               (int)ngtcp2_conn_get_streams_uni_left(conn->conn));
+       verbose(VERB_ALGO, "doq ngtcp2_conn_get_streams_bidi_left is %d",
+               (int)ngtcp2_conn_get_streams_bidi_left(conn->conn));
+       return 1;
+}
+
+/** quic_method set_encryption_secrets function */
+static int
+doq_set_encryption_secrets(SSL *ssl, OSSL_ENCRYPTION_LEVEL ossl_level,
+       const uint8_t *read_secret, const uint8_t *write_secret,
+       size_t secret_len)
+{
+       struct doq_conn* doq_conn = (struct doq_conn*)SSL_get_app_data(ssl);
+#ifdef HAVE_NGTCP2_ENCRYPTION_LEVEL
+       ngtcp2_encryption_level
+#else
+       ngtcp2_crypto_level
+#endif
+               level =
+#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_FROM_OSSL_ENCRYPTION_LEVEL
+               ngtcp2_crypto_quictls_from_ossl_encryption_level(ossl_level);
+#else
+               ngtcp2_crypto_openssl_from_ossl_encryption_level(ossl_level);
+#endif
+
+       if(read_secret) {
+               verbose(VERB_ALGO, "doq: ngtcp2_crypto_derive_and_install_rx_key for level %d ossl %d", (int)level, (int)ossl_level);
+               if(ngtcp2_crypto_derive_and_install_rx_key(doq_conn->conn,
+                       NULL, NULL, NULL, level, read_secret, secret_len)
+                       != 0) {
+                       log_err("ngtcp2_crypto_derive_and_install_rx_key "
+                               "failed");
+                       return 0;
+               }
+       }
+
+       if(write_secret) {
+               verbose(VERB_ALGO, "doq: ngtcp2_crypto_derive_and_install_tx_key for level %d ossl %d", (int)level, (int)ossl_level);
+               if(ngtcp2_crypto_derive_and_install_tx_key(doq_conn->conn,
+                       NULL, NULL, NULL, level, write_secret, secret_len)
+                       != 0) {
+                       log_err("ngtcp2_crypto_derive_and_install_tx_key "
+                               "failed");
+                       return 0;
+               }
+               if(level == NGTCP2_CRYPTO_LEVEL_APPLICATION) {
+                       if(!doq_application_tx_key_cb(doq_conn))
+                               return 0;
+               }
+       }
+       return 1;
+}
+
+/** quic_method add_handshake_data function */
+static int
+doq_add_handshake_data(SSL *ssl, OSSL_ENCRYPTION_LEVEL ossl_level,
+       const uint8_t *data, size_t len)
+{
+       struct doq_conn* doq_conn = (struct doq_conn*)SSL_get_app_data(ssl);
+#ifdef HAVE_NGTCP2_ENCRYPTION_LEVEL
+       ngtcp2_encryption_level
+#else
+       ngtcp2_crypto_level
+#endif
+               level =
+#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_FROM_OSSL_ENCRYPTION_LEVEL
+               ngtcp2_crypto_quictls_from_ossl_encryption_level(ossl_level);
+#else
+               ngtcp2_crypto_openssl_from_ossl_encryption_level(ossl_level);
+#endif
+       int rv;
+
+       verbose(VERB_ALGO, "doq_add_handshake_data: "
+               "ngtcp2_con_submit_crypto_data level %d", (int)level);
+       rv = ngtcp2_conn_submit_crypto_data(doq_conn->conn, level, data, len);
+       if(rv != 0) {
+               log_err("ngtcp2_conn_submit_crypto_data failed: %s",
+                       ngtcp2_strerror(rv));
+               ngtcp2_conn_set_tls_error(doq_conn->conn, rv);
+               return 0;
+       }
+       return 1;
+}
+
+/** quic_method flush_flight function */
+static int
+doq_flush_flight(SSL* ATTR_UNUSED(ssl))
+{
+       return 1;
+}
+
+/** quic_method send_alert function */
+static int
+doq_send_alert(SSL *ssl, enum ssl_encryption_level_t ATTR_UNUSED(level),
+       uint8_t alert)
+{
+       struct doq_conn* doq_conn = (struct doq_conn*)SSL_get_app_data(ssl);
+       doq_conn->tls_alert = alert;
+       return 1;
+}
+#endif /* HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT */
+
+/** ALPN select callback for the doq SSL context */
+static int
+doq_alpn_select_cb(SSL* ATTR_UNUSED(ssl), const unsigned char** out,
+       unsigned char* outlen, const unsigned char* in, unsigned int inlen,
+       void* ATTR_UNUSED(arg))
+{
+       /* select "doq" */
+       int ret = SSL_select_next_proto((void*)out, outlen,
+               (const unsigned char*)"\x03""doq", 4, in, inlen);
+       if(ret == OPENSSL_NPN_NEGOTIATED)
+               return SSL_TLSEXT_ERR_OK;
+       verbose(VERB_ALGO, "doq alpn_select_cb: ALPN from client does "
+               "not have 'doq'");
+       return SSL_TLSEXT_ERR_ALERT_FATAL;
+}
+
+/** create new tls session for server doq connection */
+static SSL_CTX*
+doq_ctx_server_setup(struct doq_server_socket* doq_socket)
+{
+       char* sid_ctx = "unbound server";
+#ifndef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT
+       SSL_QUIC_METHOD* quic_method;
+#endif
+       SSL_CTX* ctx = SSL_CTX_new(TLS_server_method());
+       if(!ctx) {
+               log_crypto_err("Could not SSL_CTX_new");
+               return NULL;
+       }
+       SSL_CTX_set_options(ctx,
+               (SSL_OP_ALL & ~SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS) |
+               SSL_OP_SINGLE_ECDH_USE |
+               SSL_OP_CIPHER_SERVER_PREFERENCE |
+               SSL_OP_NO_ANTI_REPLAY);
+       SSL_CTX_set_mode(ctx, SSL_MODE_RELEASE_BUFFERS);
+       SSL_CTX_set_min_proto_version(ctx, TLS1_3_VERSION);
+       SSL_CTX_set_max_proto_version(ctx, TLS1_3_VERSION);
+#ifdef HAVE_SSL_CTX_SET_ALPN_SELECT_CB
+       SSL_CTX_set_alpn_select_cb(ctx, doq_alpn_select_cb, NULL);
+#endif
+       SSL_CTX_set_default_verify_paths(ctx);
+       if(!SSL_CTX_use_certificate_chain_file(ctx,
+               doq_socket->ssl_service_pem)) {
+               log_err("doq: error for cert file: %s",
+                       doq_socket->ssl_service_pem);
+               log_crypto_err("doq: error in "
+                       "SSL_CTX_use_certificate_chain_file");
+               SSL_CTX_free(ctx);
+               return NULL;
+       }
+       if(!SSL_CTX_use_PrivateKey_file(ctx, doq_socket->ssl_service_key,
+               SSL_FILETYPE_PEM)) {
+               log_err("doq: error for private key file: %s",
+                       doq_socket->ssl_service_key);
+               log_crypto_err("doq: error in SSL_CTX_use_PrivateKey_file");
+               SSL_CTX_free(ctx);
+               return NULL;
+       }
+       if(!SSL_CTX_check_private_key(ctx)) {
+               log_err("doq: error for key file: %s",
+                       doq_socket->ssl_service_key);
+               log_crypto_err("doq: error in SSL_CTX_check_private_key");
+               SSL_CTX_free(ctx);
+               return NULL;
+       }
+       SSL_CTX_set_session_id_context(ctx, (void*)sid_ctx, strlen(sid_ctx));
+       if(doq_socket->ssl_verify_pem && doq_socket->ssl_verify_pem[0]) {
+               if(!SSL_CTX_load_verify_locations(ctx,
+                       doq_socket->ssl_verify_pem, NULL)) {
+                       log_err("doq: error for verify pem file: %s",
+                               doq_socket->ssl_verify_pem);
+                       log_crypto_err("doq: error in "
+                               "SSL_CTX_load_verify_locations");
+                       SSL_CTX_free(ctx);
+                       return NULL;
+               }
+               SSL_CTX_set_client_CA_list(ctx, SSL_load_client_CA_file(
+                       doq_socket->ssl_verify_pem));
+               SSL_CTX_set_verify(ctx, SSL_VERIFY_PEER|
+                       SSL_VERIFY_CLIENT_ONCE|
+                       SSL_VERIFY_FAIL_IF_NO_PEER_CERT, NULL);
+       }
+
+       SSL_CTX_set_max_early_data(ctx, 0xffffffff);
+#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT
+       if(ngtcp2_crypto_quictls_configure_server_context(ctx) != 0) {
+               log_err("ngtcp2_crypto_quictls_configure_server_context failed");
+               SSL_CTX_free(ctx);
+               return NULL;
+       }
+#else
+       /* The quic_method needs to remain valid during the SSL_CTX
+        * lifetime, so we allocate it. It is freed with the
+        * doq_server_socket. */
+       quic_method = calloc(1, sizeof(SSL_QUIC_METHOD));
+       if(!quic_method) {
+               log_err("calloc failed: out of memory");
+               SSL_CTX_free(ctx);
+               return NULL;
+       }
+       doq_socket->quic_method = quic_method;
+       quic_method->set_encryption_secrets = doq_set_encryption_secrets;
+       quic_method->add_handshake_data = doq_add_handshake_data;
+       quic_method->flush_flight = doq_flush_flight;
+       quic_method->send_alert = doq_send_alert;
+       SSL_CTX_set_quic_method(ctx, doq_socket->quic_method);
+#endif
+       return ctx;
+}
+
+/** Get the ngtcp2_conn from ssl userdata of type ngtcp2_conn_ref */
+static ngtcp2_conn* doq_conn_ref_get_conn(ngtcp2_crypto_conn_ref* conn_ref)
+{
+       struct doq_conn* conn = (struct doq_conn*)conn_ref->user_data;
+       return conn->conn;
+}
+
+/** create new SSL session for server connection */
+static SSL*
+doq_ssl_server_setup(SSL_CTX* ctx, struct doq_conn* conn)
+{
+       SSL* ssl = SSL_new(ctx);
+       if(!ssl) {
+               log_crypto_err("doq: SSL_new failed");
+               return NULL;
+       }
+#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT
+       conn->conn_ref.get_conn = &doq_conn_ref_get_conn;
+       conn->conn_ref.user_data = conn;
+       SSL_set_app_data(ssl, &conn->conn_ref);
+#else
+       SSL_set_app_data(ssl, conn);
+#endif
+       SSL_set_accept_state(ssl);
+       SSL_set_quic_early_data_enabled(ssl, 1);
+       return ssl;
+}
+
+/** setup the doq_socket server tls context */
+int
+doq_socket_setup_ctx(struct doq_server_socket* doq_socket)
+{
+       doq_socket->ctx = doq_ctx_server_setup(doq_socket);
+       if(!doq_socket->ctx)
+               return 0;
+       return 1;
+}
+
+int
+doq_conn_setup(struct doq_conn* conn, uint8_t* scid, size_t scidlen,
+       uint8_t* ocid, size_t ocidlen, const uint8_t* token, size_t tokenlen)
+{
+       int rv;
+       struct ngtcp2_cid dcid, sv_scid, scid_cid;
+       struct ngtcp2_path path;
+       struct ngtcp2_callbacks callbacks;
+       struct ngtcp2_settings settings;
+       struct ngtcp2_transport_params params;
+       memset(&dcid, 0, sizeof(dcid));
+       memset(&sv_scid, 0, sizeof(sv_scid));
+       memset(&scid_cid, 0, sizeof(scid_cid));
+       memset(&path, 0, sizeof(path));
+       memset(&callbacks, 0, sizeof(callbacks));
+       memset(&settings, 0, sizeof(settings));
+       memset(&params, 0, sizeof(params));
+
+       ngtcp2_cid_init(&scid_cid, scid, scidlen);
+       ngtcp2_cid_init(&dcid, conn->key.dcid, conn->key.dcidlen);
+
+       path.remote.addr = (struct sockaddr*)&conn->key.paddr.addr;
+       path.remote.addrlen = conn->key.paddr.addrlen;
+       path.local.addr = (struct sockaddr*)&conn->key.paddr.localaddr;
+       path.local.addrlen = conn->key.paddr.localaddrlen;
+
+       callbacks.recv_client_initial = ngtcp2_crypto_recv_client_initial_cb;
+       callbacks.recv_crypto_data = ngtcp2_crypto_recv_crypto_data_cb;
+       callbacks.encrypt = ngtcp2_crypto_encrypt_cb;
+       callbacks.decrypt = ngtcp2_crypto_decrypt_cb;
+       callbacks.hp_mask = ngtcp2_crypto_hp_mask;
+       callbacks.update_key = ngtcp2_crypto_update_key_cb;
+       callbacks.delete_crypto_aead_ctx =
+               ngtcp2_crypto_delete_crypto_aead_ctx_cb;
+       callbacks.delete_crypto_cipher_ctx =
+               ngtcp2_crypto_delete_crypto_cipher_ctx_cb;
+       callbacks.get_path_challenge_data =
+               ngtcp2_crypto_get_path_challenge_data_cb;
+       callbacks.version_negotiation = ngtcp2_crypto_version_negotiation_cb;
+       callbacks.rand = doq_rand_cb;
+       callbacks.get_new_connection_id = doq_get_new_connection_id_cb;
+       callbacks.remove_connection_id = doq_remove_connection_id_cb;
+       callbacks.handshake_completed = doq_handshake_completed_cb;
+       callbacks.stream_open = doq_stream_open_cb;
+       callbacks.stream_close = doq_stream_close_cb;
+       callbacks.stream_reset = doq_stream_reset_cb;
+       callbacks.acked_stream_data_offset = doq_acked_stream_data_offset_cb;
+       callbacks.recv_stream_data = doq_recv_stream_data_cb;
+
+       ngtcp2_settings_default(&settings);
+       if(verbosity >= VERB_ALGO) {
+               settings.log_printf = doq_log_printf_cb;
+       }
+       settings.rand_ctx.native_handle = conn->doq_socket->rnd;
+       settings.initial_ts = doq_get_timestamp_nanosec();
+       settings.max_stream_window = 6*1024*1024;
+       settings.max_window = 6*1024*1024;
+#ifdef HAVE_STRUCT_NGTCP2_SETTINGS_TOKENLEN
+       settings.token = (void*)token;
+       settings.tokenlen = tokenlen;
+#else
+       settings.token.base = (void*)token;
+       settings.token.len = tokenlen;
+#endif
+
+       ngtcp2_transport_params_default(&params);
+       params.max_idle_timeout = conn->doq_socket->idle_timeout;
+       params.active_connection_id_limit = 7;
+       params.initial_max_stream_data_bidi_local = 256*1024;
+       params.initial_max_stream_data_bidi_remote = 256*1024;
+       params.initial_max_data = 1024*1024;
+       /* DoQ uses bidi streams, so we allow 0 uni streams. */
+       params.initial_max_streams_uni = 0;
+       /* Initial max on number of bidi streams the remote end can open.
+        * That is the number of queries it can make, at first. */
+       params.initial_max_streams_bidi = 10;
+       if(ocid) {
+               ngtcp2_cid_init(&params.original_dcid, ocid, ocidlen);
+               ngtcp2_cid_init(&params.retry_scid, conn->key.dcid,
+                       conn->key.dcidlen);
+               params.retry_scid_present = 1;
+       } else {
+               ngtcp2_cid_init(&params.original_dcid, conn->key.dcid,
+                       conn->key.dcidlen);
+       }
+#ifdef HAVE_STRUCT_NGTCP2_TRANSPORT_PARAMS_ORIGINAL_DCID_PRESENT
+       params.original_dcid_present = 1;
+#endif
+       doq_fill_rand(conn->doq_socket->rnd, params.stateless_reset_token,
+               sizeof(params.stateless_reset_token));
+       sv_scid.datalen = conn->doq_socket->sv_scidlen;
+       lock_rw_wrlock(&conn->table->conid_lock);
+       if(!doq_conn_generate_new_conid(conn, sv_scid.data, sv_scid.datalen)) {
+               lock_rw_unlock(&conn->table->conid_lock);
+               return 0;
+       }
+
+       rv = ngtcp2_conn_server_new(&conn->conn, &scid_cid, &sv_scid, &path,
+               conn->version, &callbacks, &settings, &params, NULL, conn);
+       if(rv != 0) {
+               lock_rw_unlock(&conn->table->conid_lock);
+               log_err("ngtcp2_conn_server_new failed: %s",
+                       ngtcp2_strerror(rv));
+               return 0;
+       }
+       if(!doq_conn_setup_conids(conn)) {
+               lock_rw_unlock(&conn->table->conid_lock);
+               log_err("doq_conn_setup_conids failed: out of memory");
+               return 0;
+       }
+       lock_rw_unlock(&conn->table->conid_lock);
+       conn->ssl = doq_ssl_server_setup((SSL_CTX*)conn->doq_socket->ctx,
+               conn);
+       if(!conn->ssl) {
+               log_err("doq_ssl_server_setup failed");
+               return 0;
+       }
+       ngtcp2_conn_set_tls_native_handle(conn->conn, conn->ssl);
+       doq_conn_write_enable(conn);
+       return 1;
+}
+
+struct doq_conid*
+doq_conid_find(struct doq_table* table, const uint8_t* data, size_t datalen)
+{
+       struct rbnode_type* node;
+       struct doq_conid key;
+       key.node.key = &key;
+       key.cid = (void*)data;
+       key.cidlen = datalen;
+       node = rbtree_search(table->conid_tree, &key);
+       if(node)
+               return (struct doq_conid*)node->key;
+       return NULL;
+}
+
+/** insert conid in the conid list */
+static void
+doq_conid_list_insert(struct doq_conn* conn, struct doq_conid* conid)
+{
+       conid->prev = NULL;
+       conid->next = conn->conid_list;
+       if(conn->conid_list)
+               conn->conid_list->prev = conid;
+       conn->conid_list = conid;
+}
+
+/** remove conid from the conid list */
+static void
+doq_conid_list_remove(struct doq_conn* conn, struct doq_conid* conid)
+{
+       if(conid->prev)
+               conid->prev->next = conid->next;
+       else    conn->conid_list = conid->next;
+       if(conid->next)
+               conid->next->prev = conid->prev;
+}
+
+/** create a doq_conid */
+static struct doq_conid*
+doq_conid_create(uint8_t* data, size_t datalen, struct doq_conn_key* key)
+{
+       struct doq_conid* conid;
+       conid = calloc(1, sizeof(*conid));
+       if(!conid)
+               return NULL;
+       conid->cid = memdup(data, datalen);
+       if(!conid->cid) {
+               free(conid);
+               return NULL;
+       }
+       conid->cidlen = datalen;
+       conid->node.key = conid;
+       conid->key = *key;
+       conid->key.dcid = memdup(key->dcid, key->dcidlen);
+       if(!conid->key.dcid) {
+               free(conid->cid);
+               free(conid);
+               return NULL;
+       }
+       return conid;
+}
+
+void
+doq_conid_delete(struct doq_conid* conid)
+{
+       if(!conid)
+               return;
+       free(conid->key.dcid);
+       free(conid->cid);
+       free(conid);
+}
+
+/** return true if the conid is for the conn. */
+static int
+conid_is_for_conn(struct doq_conn* conn, struct doq_conid* conid)
+{
+       if(conid->key.dcidlen == conn->key.dcidlen &&
+               memcmp(conid->key.dcid, conn->key.dcid, conid->key.dcidlen)==0
+               && conid->key.paddr.addrlen == conn->key.paddr.addrlen &&
+               memcmp(&conid->key.paddr.addr, &conn->key.paddr.addr,
+                       conid->key.paddr.addrlen) == 0 &&
+               conid->key.paddr.localaddrlen == conn->key.paddr.localaddrlen &&
+               memcmp(&conid->key.paddr.localaddr, &conn->key.paddr.localaddr,
+                       conid->key.paddr.localaddrlen) == 0 &&
+               conid->key.paddr.ifindex == conn->key.paddr.ifindex)
+               return 1;
+       return 0;
+}
+
+int
+doq_conn_associate_conid(struct doq_conn* conn, uint8_t* data, size_t datalen)
+{
+       struct doq_conid* conid;
+       conid = doq_conid_find(conn->table, data, datalen);
+       if(conid && !conid_is_for_conn(conn, conid)) {
+               verbose(VERB_ALGO, "doq connection id already exists for "
+                       "another doq_conn. Ignoring second connection id.");
+               /* Already exists to another conn, ignore it.
+                * This works, in that the conid is listed in the doq_conn
+                * conid_list element, and removed from there. So our conid
+                * tree and list are fine, when created and removed.
+                * The tree now does not have the lookup element pointing
+                * to this connection. */
+               return 1;
+       }
+       if(conid)
+               return 1; /* already inserted */
+       conid = doq_conid_create(data, datalen, &conn->key);
+       if(!conid)
+               return 0;
+       doq_conid_list_insert(conn, conid);
+       (void)rbtree_insert(conn->table->conid_tree, &conid->node);
+       return 1;
+}
+
+void
+doq_conn_dissociate_conid(struct doq_conn* conn, const uint8_t* data,
+       size_t datalen)
+{
+       struct doq_conid* conid;
+       conid = doq_conid_find(conn->table, data, datalen);
+       if(conid && !conid_is_for_conn(conn, conid))
+               return;
+       if(conid) {
+               (void)rbtree_delete(conn->table->conid_tree,
+                       conid->node.key);
+               doq_conid_list_remove(conn, conid);
+               doq_conid_delete(conid);
+       }
+}
+
+/** associate the scid array and also the dcid.
+ * caller must hold the locks on conn and doq_table.conid_lock. */
+static int
+doq_conn_setup_id_array_and_dcid(struct doq_conn* conn,
+       struct ngtcp2_cid* scids, size_t num_scid)
+{
+       size_t i;
+       for(i=0; i<num_scid; i++) {
+               if(!doq_conn_associate_conid(conn, scids[i].data,
+                       scids[i].datalen))
+                       return 0;
+       }
+       if(!doq_conn_associate_conid(conn, conn->key.dcid, conn->key.dcidlen))
+               return 0;
+       return 1;
+}
+
+int
+doq_conn_setup_conids(struct doq_conn* conn)
+{
+       size_t num_scid =
+#ifndef HAVE_NGTCP2_CONN_GET_NUM_SCID
+               ngtcp2_conn_get_scid(conn->conn, NULL);
+#else
+               ngtcp2_conn_get_num_scid(conn->conn);
+#endif
+       if(num_scid <= 4) {
+               struct ngtcp2_cid ids[4];
+               /* Usually there are not that many scids when just accepted,
+                * like only 2. */
+               ngtcp2_conn_get_scid(conn->conn, ids);
+               return doq_conn_setup_id_array_and_dcid(conn, ids, num_scid);
+       } else {
+               struct ngtcp2_cid *scids = calloc(num_scid,
+                       sizeof(struct ngtcp2_cid));
+               if(!scids)
+                       return 0;
+               ngtcp2_conn_get_scid(conn->conn, scids);
+               if(!doq_conn_setup_id_array_and_dcid(conn, scids, num_scid)) {
+                       free(scids);
+                       return 0;
+               }
+               free(scids);
+       }
+       return 1;
+}
+
+void
+doq_conn_clear_conids(struct doq_conn* conn)
+{
+       struct doq_conid* p, *next;
+       if(!conn)
+               return;
+       p = conn->conid_list;
+       while(p) {
+               next = p->next;
+               (void)rbtree_delete(conn->table->conid_tree, p->node.key);
+               doq_conid_delete(p);
+               p = next;
+       }
+       conn->conid_list = NULL;
+}
+
+ngtcp2_tstamp doq_get_timestamp_nanosec(void)
+{
+#ifdef CLOCK_REALTIME
+       struct timespec tp;
+       memset(&tp, 0, sizeof(tp));
+       /* Get a nanosecond time, that can be compared with the event base. */
+       if(clock_gettime(CLOCK_REALTIME, &tp) == -1) {
+               log_err("clock_gettime failed: %s", strerror(errno));
+       }
+       return ((uint64_t)tp.tv_sec)*((uint64_t)1000000000) +
+               ((uint64_t)tp.tv_nsec);
+#else
+       struct timeval tv;
+       if(gettimeofday(&tv, NULL) < 0) {
+               log_err("gettimeofday failed: %s", strerror(errno));
+       }
+       return ((uint64_t)tv.tv_sec)*((uint64_t)1000000000) +
+               ((uint64_t)tv.tv_usec)*((uint64_t)1000);
+#endif /* CLOCK_REALTIME */
+}
+
+/** doq start the closing period for the connection. */
+static int
+doq_conn_start_closing_period(struct comm_point* c, struct doq_conn* conn)
+{
+       struct ngtcp2_path_storage ps;
+       struct ngtcp2_pkt_info pi;
+       ngtcp2_ssize ret;
+       if(!conn)
+               return 1;
+       if(
+#ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD
+               ngtcp2_conn_in_closing_period(conn->conn)
+#else
+               ngtcp2_conn_is_in_closing_period(conn->conn)
+#endif
+               )
+               return 1;
+       if(
+#ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD
+               ngtcp2_conn_in_draining_period(conn->conn)
+#else
+               ngtcp2_conn_is_in_draining_period(conn->conn)
+#endif
+               ) {
+               doq_conn_write_disable(conn);
+               return 1;
+       }
+       ngtcp2_path_storage_zero(&ps);
+       sldns_buffer_clear(c->doq_socket->pkt_buf);
+       /* the call to ngtcp2_conn_write_connection_close causes the
+        * conn to be closed. It is now in the closing period. */
+       ret = ngtcp2_conn_write_connection_close(conn->conn, &ps.path,
+               &pi, sldns_buffer_begin(c->doq_socket->pkt_buf),
+               sldns_buffer_remaining(c->doq_socket->pkt_buf),
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+               &conn->ccerr
+#else
+               &conn->last_error
+#endif
+               , doq_get_timestamp_nanosec());
+       if(ret < 0) {
+               log_err("doq ngtcp2_conn_write_connection_close failed: %s",
+                       ngtcp2_strerror(ret));
+               return 0;
+       }
+       if(ret == 0) {
+               return 0;
+       }
+       sldns_buffer_set_position(c->doq_socket->pkt_buf, ret);
+       sldns_buffer_flip(c->doq_socket->pkt_buf);
+
+       /* The close packet is allocated, because it may have to be repeated.
+        * When incoming packets have this connection dcid. */
+       conn->close_pkt = memdup(sldns_buffer_begin(c->doq_socket->pkt_buf),
+               sldns_buffer_limit(c->doq_socket->pkt_buf));
+       if(!conn->close_pkt) {
+               log_err("doq: could not allocate close packet: out of memory");
+               return 0;
+       }
+       conn->close_pkt_len = sldns_buffer_limit(c->doq_socket->pkt_buf);
+       conn->close_ecn = pi.ecn;
+       return 1;
+}
+
+/** doq send the close packet for the connection, perhaps again. */
+int
+doq_conn_send_close(struct comm_point* c, struct doq_conn* conn)
+{
+       if(!conn)
+               return 0;
+       if(!conn->close_pkt)
+               return 0;
+       if(conn->close_pkt_len > sldns_buffer_capacity(c->doq_socket->pkt_buf))
+               return 0;
+       sldns_buffer_clear(c->doq_socket->pkt_buf);
+       sldns_buffer_write(c->doq_socket->pkt_buf, conn->close_pkt, conn->close_pkt_len);
+       sldns_buffer_flip(c->doq_socket->pkt_buf);
+       verbose(VERB_ALGO, "doq send connection close");
+       doq_send_pkt(c, &conn->key.paddr, conn->close_ecn);
+       doq_conn_write_disable(conn);
+       return 1;
+}
+
+/** doq close the connection on error. If it returns a failure, it
+ * does not wait to send a close, and the connection can be dropped. */
+static int
+doq_conn_close_error(struct comm_point* c, struct doq_conn* conn)
+{
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+       if(conn->ccerr.type == NGTCP2_CCERR_TYPE_IDLE_CLOSE)
+               return 0;
+#else
+       if(conn->last_error.type ==
+               NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT_IDLE_CLOSE)
+               return 0;
+#endif
+       if(!doq_conn_start_closing_period(c, conn))
+               return 0;
+       if(
+#ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD
+               ngtcp2_conn_in_draining_period(conn->conn)
+#else
+               ngtcp2_conn_is_in_draining_period(conn->conn)
+#endif
+               ) {
+               doq_conn_write_disable(conn);
+               return 1;
+       }
+       doq_conn_write_enable(conn);
+       if(!doq_conn_send_close(c, conn))
+               return 0;
+       return 1;
+}
+
+int
+doq_conn_recv(struct comm_point* c, struct doq_pkt_addr* paddr,
+       struct doq_conn* conn, struct ngtcp2_pkt_info* pi, int* err_retry,
+       int* err_drop)
+{
+       int ret;
+       ngtcp2_tstamp ts;
+       struct ngtcp2_path path;
+       memset(&path, 0, sizeof(path));
+       path.remote.addr = (struct sockaddr*)&paddr->addr;
+       path.remote.addrlen = paddr->addrlen;
+       path.local.addr = (struct sockaddr*)&paddr->localaddr;
+       path.local.addrlen = paddr->localaddrlen;
+       ts = doq_get_timestamp_nanosec();
+
+       ret = ngtcp2_conn_read_pkt(conn->conn, &path, pi,
+               sldns_buffer_begin(c->doq_socket->pkt_buf),
+               sldns_buffer_limit(c->doq_socket->pkt_buf), ts);
+       if(ret != 0) {
+               if(err_retry)
+                       *err_retry = 0;
+               if(err_drop)
+                       *err_drop = 0;
+               if(ret == NGTCP2_ERR_DRAINING) {
+                       verbose(VERB_ALGO, "ngtcp2_conn_read_pkt returned %s",
+                               ngtcp2_strerror(ret));
+                       doq_conn_write_disable(conn);
+                       return 0;
+               } else if(ret == NGTCP2_ERR_DROP_CONN) {
+                       verbose(VERB_ALGO, "ngtcp2_conn_read_pkt returned %s",
+                               ngtcp2_strerror(ret));
+                       if(err_drop)
+                               *err_drop = 1;
+                       return 0;
+               } else if(ret == NGTCP2_ERR_RETRY) {
+                       verbose(VERB_ALGO, "ngtcp2_conn_read_pkt returned %s",
+                               ngtcp2_strerror(ret));
+                       if(err_retry)
+                               *err_retry = 1;
+                       if(err_drop)
+                               *err_drop = 1;
+                       return 0;
+               } else if(ret == NGTCP2_ERR_CRYPTO) {
+                       if(
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+                               !conn->ccerr.error_code
+#else
+                               !conn->last_error.error_code
+#endif
+                               ) {
+                               /* in picotls the tls alert may need to be
+                                * copied, but this is with openssl. And there
+                                * is conn->tls_alert. */
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+                               ngtcp2_ccerr_set_tls_alert(&conn->ccerr,
+                                       conn->tls_alert, NULL, 0);
+#else
+                               ngtcp2_connection_close_error_set_transport_error_tls_alert(
+                                       &conn->last_error, conn->tls_alert,
+                                       NULL, 0);
+#endif
+                       }
+               } else {
+                       if(
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+                               !conn->ccerr.error_code
+#else
+                               !conn->last_error.error_code
+#endif
+                               ) {
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+                               ngtcp2_ccerr_set_liberr(&conn->ccerr, ret,
+                                       NULL, 0);
+#else
+                               ngtcp2_connection_close_error_set_transport_error_liberr(
+                                       &conn->last_error, ret, NULL, 0);
+#endif
+                       }
+               }
+               log_err("ngtcp2_conn_read_pkt failed: %s",
+                       ngtcp2_strerror(ret));
+               if(!doq_conn_close_error(c, conn)) {
+                       if(err_drop)
+                               *err_drop = 1;
+               }
+               return 0;
+       }
+       doq_conn_write_enable(conn);
+       return 1;
+}
+
+/** doq stream write is done */
+static void
+doq_stream_write_is_done(struct doq_conn* conn, struct doq_stream* stream)
+{
+       /* Cannot deallocate, the buffer may be needed for resends. */
+       doq_stream_off_write_list(conn, stream);
+}
+
+int
+doq_conn_write_streams(struct comm_point* c, struct doq_conn* conn,
+       int* err_drop)
+{
+       struct doq_stream* stream = conn->stream_write_first;
+       ngtcp2_path_storage ps;
+       ngtcp2_tstamp ts = doq_get_timestamp_nanosec();
+       size_t num_packets = 0, max_packets = 65535;
+       ngtcp2_path_storage_zero(&ps);
+
+       for(;;) {
+               int64_t stream_id;
+               uint32_t flags = 0;
+               ngtcp2_pkt_info pi;
+               ngtcp2_vec datav[2];
+               size_t datav_count = 0;
+               ngtcp2_ssize ret, ndatalen = 0;
+               int fin;
+
+               if(stream) {
+                       /* data to send */
+                       verbose(VERB_ALGO, "doq: doq_conn write stream %d",
+                               (int)stream->stream_id);
+                       stream_id = stream->stream_id;
+                       fin = 1;
+                       if(stream->nwrite < 2) {
+                               datav[0].base = ((uint8_t*)&stream->
+                                       outlen_wire) + stream->nwrite;
+                               datav[0].len = 2 - stream->nwrite;
+                               datav[1].base = stream->out;
+                               datav[1].len = stream->outlen;
+                               datav_count = 2;
+                       } else {
+                               datav[0].base = stream->out +
+                                       (stream->nwrite-2);
+                               datav[0].len = stream->outlen -
+                                       (stream->nwrite-2);
+                               datav_count = 1;
+                       }
+               } else {
+                       /* no data to send */
+                       verbose(VERB_ALGO, "doq: doq_conn write stream -1");
+                       stream_id = -1;
+                       fin = 0;
+                       datav[0].base = NULL;
+                       datav[0].len = 0;
+                       datav_count = 1;
+               }
+
+               /* if more streams, set it to write more */
+               if(stream && stream->write_next)
+                       flags |= NGTCP2_WRITE_STREAM_FLAG_MORE;
+               if(fin)
+                       flags |= NGTCP2_WRITE_STREAM_FLAG_FIN;
+
+               sldns_buffer_clear(c->doq_socket->pkt_buf);
+               ret = ngtcp2_conn_writev_stream(conn->conn, &ps.path, &pi,
+                       sldns_buffer_begin(c->doq_socket->pkt_buf),
+                       sldns_buffer_remaining(c->doq_socket->pkt_buf),
+                       &ndatalen, flags, stream_id, datav, datav_count, ts);
+               if(ret < 0) {
+                       if(ret == NGTCP2_ERR_WRITE_MORE) {
+                               verbose(VERB_ALGO, "doq: write more, ndatalen %d", (int)ndatalen);
+                               if(stream) {
+                                       if(ndatalen >= 0)
+                                               stream->nwrite += ndatalen;
+                                       if(stream->nwrite >= stream->outlen+2)
+                                               doq_stream_write_is_done(
+                                                       conn, stream);
+                                       stream = stream->write_next;
+                               }
+                               continue;
+                       } else if(ret == NGTCP2_ERR_STREAM_DATA_BLOCKED) {
+                               verbose(VERB_ALGO, "doq: ngtcp2_conn_writev_stream returned NGTCP2_ERR_STREAM_DATA_BLOCKED");
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+                               ngtcp2_ccerr_set_application_error(
+                                       &conn->ccerr, -1, NULL, 0);
+#else
+                               ngtcp2_connection_close_error_set_application_error(&conn->last_error, -1, NULL, 0);
+#endif
+                               if(err_drop)
+                                       *err_drop = 0;
+                               if(!doq_conn_close_error(c, conn)) {
+                                       if(err_drop)
+                                               *err_drop = 1;
+                               }
+                               return 0;
+                       } else if(ret == NGTCP2_ERR_STREAM_SHUT_WR) {
+                               verbose(VERB_ALGO, "doq: ngtcp2_conn_writev_stream returned NGTCP2_ERR_STREAM_SHUT_WR");
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+                               ngtcp2_ccerr_set_application_error(
+                                       &conn->ccerr, -1, NULL, 0);
+#else
+                               ngtcp2_connection_close_error_set_application_error(&conn->last_error, -1, NULL, 0);
+#endif
+                               if(err_drop)
+                                       *err_drop = 0;
+                               if(!doq_conn_close_error(c, conn)) {
+                                       if(err_drop)
+                                               *err_drop = 1;
+                               }
+                               return 0;
+                       }
+
+                       log_err("doq: ngtcp2_conn_writev_stream failed: %s",
+                               ngtcp2_strerror(ret));
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+                       ngtcp2_ccerr_set_liberr(&conn->ccerr, ret, NULL, 0);
+#else
+                       ngtcp2_connection_close_error_set_transport_error_liberr(
+                               &conn->last_error, ret, NULL, 0);
+#endif
+                       if(err_drop)
+                               *err_drop = 0;
+                       if(!doq_conn_close_error(c, conn)) {
+                               if(err_drop)
+                                       *err_drop = 1;
+                       }
+                       return 0;
+               }
+               verbose(VERB_ALGO, "doq: writev_stream pkt size %d ndatawritten %d",
+                       (int)ret, (int)ndatalen);
+
+               if(ndatalen >= 0 && stream) {
+                       stream->nwrite += ndatalen;
+                       if(stream->nwrite >= stream->outlen+2)
+                               doq_stream_write_is_done(conn, stream);
+               }
+               if(ret == 0) {
+                       /* congestion limited */
+                       doq_conn_write_disable(conn);
+                       ngtcp2_conn_update_pkt_tx_time(conn->conn, ts);
+                       return 1;
+               }
+               sldns_buffer_set_position(c->doq_socket->pkt_buf, ret);
+               sldns_buffer_flip(c->doq_socket->pkt_buf);
+               doq_send_pkt(c, &conn->key.paddr, pi.ecn);
+
+               if(c->doq_socket->have_blocked_pkt)
+                       break;
+               if(++num_packets == max_packets)
+                       break;
+               if(stream)
+                       stream = stream->write_next;
+       }
+       ngtcp2_conn_update_pkt_tx_time(conn->conn, ts);
+       return 1;
+}
+
+void
+doq_conn_write_enable(struct doq_conn* conn)
+{
+       conn->write_interest = 1;
+}
+
+void
+doq_conn_write_disable(struct doq_conn* conn)
+{
+       conn->write_interest = 0;
+}
+
+/** doq append the connection to the write list */
+static void
+doq_conn_write_list_append(struct doq_table* table, struct doq_conn* conn)
+{
+       if(conn->on_write_list)
+               return;
+       conn->write_prev = table->write_list_last;
+       if(table->write_list_last)
+               table->write_list_last->write_next = conn;
+       else table->write_list_first = conn;
+       conn->write_next = NULL;
+       table->write_list_last = conn;
+       conn->on_write_list = 1;
+}
+
+void
+doq_conn_write_list_remove(struct doq_table* table, struct doq_conn* conn)
+{
+       if(!conn->on_write_list)
+               return;
+       if(conn->write_next)
+               conn->write_next->write_prev = conn->write_prev;
+       else table->write_list_last = conn->write_prev;
+       if(conn->write_prev)
+               conn->write_prev->write_next = conn->write_next;
+       else table->write_list_first = conn->write_next;
+       conn->write_prev = NULL;
+       conn->write_next = NULL;
+       conn->on_write_list = 0;
+}
+
+void
+doq_conn_set_write_list(struct doq_table* table, struct doq_conn* conn)
+{
+       if(conn->write_interest && conn->on_write_list)
+               return;
+       if(!conn->write_interest && !conn->on_write_list)
+               return;
+       if(conn->write_interest)
+               doq_conn_write_list_append(table, conn);
+       else doq_conn_write_list_remove(table, conn);
+}
+
+struct doq_conn*
+doq_table_pop_first(struct doq_table* table)
+{
+       struct doq_conn* conn = table->write_list_first;
+       if(!conn)
+               return NULL;
+       lock_basic_lock(&conn->lock);
+       table->write_list_first = conn->write_next;
+       if(conn->write_next)
+               conn->write_next->write_prev = NULL;
+       else table->write_list_last = NULL;
+       conn->write_next = NULL;
+       conn->write_prev = NULL;
+       conn->on_write_list = 0;
+       return conn;
+}
+
+int
+doq_conn_check_timer(struct doq_conn* conn, struct timeval* tv)
+{
+       ngtcp2_tstamp expiry = ngtcp2_conn_get_expiry(conn->conn);
+       ngtcp2_tstamp now = doq_get_timestamp_nanosec();
+       ngtcp2_tstamp t;
+
+       if(expiry <= now) {
+               /* The timer has already expired, add with zero timeout.
+                * This should call the callback straight away. Calling it
+                * from the event callbacks is cleaner than calling it here,
+                * because then it is always called with the same locks and
+                * so on. This routine only has the conn.lock. */
+               t = now;
+       } else {
+               t = expiry;
+       }
+
+       /* convert to timeval */
+       memset(tv, 0, sizeof(*tv));
+       tv->tv_sec = t / NGTCP2_SECONDS;
+       tv->tv_usec = (t / NGTCP2_MICROSECONDS)%1000000;
+
+       /* If we already have a timer, is it the right value? */
+       if(conn->timer.timer_in_tree || conn->timer.timer_in_list) {
+               if(conn->timer.time.tv_sec == tv->tv_sec &&
+                       conn->timer.time.tv_usec == tv->tv_usec)
+                       return 0;
+       }
+       return 1;
+}
+
+/* doq print connection log */
+static void
+doq_conn_log_line(struct doq_conn* conn, char* s)
+{
+       char remotestr[256], localstr[256];
+       addr_to_str((void*)&conn->key.paddr.addr, conn->key.paddr.addrlen,
+               remotestr, sizeof(remotestr));
+       addr_to_str((void*)&conn->key.paddr.localaddr,
+               conn->key.paddr.localaddrlen, localstr, sizeof(localstr));
+       log_info("doq conn %s %s %s", remotestr, localstr, s);
+}
+
+int
+doq_conn_handle_timeout(struct doq_conn* conn)
+{
+       ngtcp2_tstamp now = doq_get_timestamp_nanosec();
+       int rv;
+
+       if(verbosity >= VERB_ALGO)
+               doq_conn_log_line(conn, "timeout");
+
+       rv = ngtcp2_conn_handle_expiry(conn->conn, now);
+       if(rv != 0) {
+               verbose(VERB_ALGO, "ngtcp2_conn_handle_expiry failed: %s",
+                       ngtcp2_strerror(rv));
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+               ngtcp2_ccerr_set_liberr(&conn->ccerr, rv, NULL, 0);
+#else
+               ngtcp2_connection_close_error_set_transport_error_liberr(
+                       &conn->last_error, rv, NULL, 0);
+#endif
+               if(!doq_conn_close_error(conn->doq_socket->cp, conn)) {
+                       /* failed, return for deletion */
+                       return 0;
+               }
+               return 1;
+       }
+       doq_conn_write_enable(conn);
+       if(!doq_conn_write_streams(conn->doq_socket->cp, conn, NULL)) {
+               /* failed, return for deletion. */
+               return 0;
+       }
+       return 1;
+}
+
+void
+doq_table_quic_size_add(struct doq_table* table, size_t add)
+{
+       lock_basic_lock(&table->size_lock);
+       table->current_size += add;
+       lock_basic_unlock(&table->size_lock);
+}
+
+void
+doq_table_quic_size_subtract(struct doq_table* table, size_t subtract)
+{
+       lock_basic_lock(&table->size_lock);
+       if(table->current_size < subtract)
+               table->current_size = 0;
+       else    table->current_size -= subtract;
+       lock_basic_unlock(&table->size_lock);
+}
+
+int
+doq_table_quic_size_available(struct doq_table* table,
+       struct config_file* cfg, size_t mem)
+{
+       size_t cur;
+       lock_basic_lock(&table->size_lock);
+       cur = table->current_size;
+       lock_basic_unlock(&table->size_lock);
+
+       if(cur + mem > cfg->quic_size)
+               return 0;
+       return 1;
+}
+
+size_t doq_table_quic_size_get(struct doq_table* table)
+{
+       size_t sz;
+       if(!table)
+               return 0;
+       lock_basic_lock(&table->size_lock);
+       sz = table->current_size;
+       lock_basic_unlock(&table->size_lock);
+       return sz;
+}
+#endif /* HAVE_NGTCP2 */
index 84ac4b068b1bdde971a8cb49119c17703b47b46f..c29f4d72b0a2737d49d5b34f5de6d38715d2ef6a 100644 (file)
 #define LISTEN_DNSPORT_H
 
 #include "util/netevent.h"
+#include "util/rbtree.h"
+#include "util/locks.h"
 #include "daemon/acl_list.h"
 #ifdef HAVE_NGHTTP2_NGHTTP2_H
 #include <nghttp2/nghttp2.h>
 #endif
+#ifdef HAVE_NGTCP2
+#include <ngtcp2/ngtcp2.h>
+#include <ngtcp2/ngtcp2_crypto.h>
+#endif
 struct listen_list;
 struct config_file;
 struct addrinfo;
@@ -100,7 +106,9 @@ enum listen_type {
        /** udp ipv6 (v4mapped) for use with ancillary data + dnscrypt*/
        listen_type_udpancil_dnscrypt,
        /** HTTP(2) over TLS over TCP */
-       listen_type_http
+       listen_type_http,
+       /** DNS over QUIC */
+       listen_type_doq
 };
 
 /*
@@ -188,6 +196,11 @@ int resolve_interface_names(char** ifs, int num_ifs,
  * @param tcp_conn_limit: TCP connection limit info.
  * @param sslctx: nonNULL if ssl context.
  * @param dtenv: nonNULL if dnstap enabled.
+ * @param doq_table: the doq connection table, with shared information.
+ * @param rnd: random state.
+ * @param ssl_service_key: the SSL service key file.
+ * @param ssl_service_pem: the SSL service pem file.
+ * @param cfg: config file struct.
  * @param cb: callback function when a request arrives. It is passed
  *       the packet and user argument. Return true to send a reply.
  * @param cb_arg: user data argument for callback function.
@@ -198,8 +211,10 @@ listen_create(struct comm_base* base, struct listen_port* ports,
        size_t bufsize, int tcp_accept_count, int tcp_idle_timeout,
        int harden_large_queries, uint32_t http_max_streams,
        char* http_endpoint, int http_notls, struct tcl_list* tcp_conn_limit,
-       void* sslctx, struct dt_env* dtenv, comm_point_callback_type* cb,
-       void *cb_arg);
+       void* sslctx, struct dt_env* dtenv, struct doq_table* doq_table,
+       struct ub_randstate* rnd, const char* ssl_service_key,
+       const char* ssl_service_pem, struct config_file* cfg,
+       comm_point_callback_type* cb, void *cb_arg);
 
 /**
  * delete the listening structure
@@ -278,11 +293,12 @@ int create_udp_sock(int family, int socktype, struct sockaddr* addr,
  * @param freebind: set IP_FREEBIND socket option.
  * @param use_systemd: if true, fetch sockets from systemd.
  * @param dscp: DSCP to use.
+ * @param additional: additional log information for the socket type.
  * @return: the socket. -1 on error.
  */
 int create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto,
        int* reuseport, int transparent, int mss, int nodelay, int freebind,
-       int use_systemd, int dscp);
+       int use_systemd, int dscp, const char* additional);
 
 /**
  * Create and bind local listening socket
@@ -452,6 +468,377 @@ int http2_submit_dns_response(struct http2_session* h2_session);
 int http2_submit_dns_response(void* v);
 #endif /* HAVE_NGHTTP2 */
 
+#ifdef HAVE_NGTCP2
+struct doq_conid;
+struct doq_server_socket;
+
+/**
+ * DoQ shared connection table. This is the connections for the host.
+ * And some config parameter values for connections. The host has to
+ * respond on that ip,port for those connections, so they are shared
+ * between threads.
+ */
+struct doq_table {
+       /** the lock on the tree and config elements. insert and deletion,
+        * also lookup in the tree needs to hold the lock. */
+       lock_rw_type lock;
+       /** rbtree of doq_conn, the connections to different destination
+        * addresses, and can be found by dcid. */
+       struct rbtree_type* conn_tree;
+       /** lock for the conid tree, needed for the conid tree and also
+        * the conid elements */
+       lock_rw_type conid_lock;
+       /** rbtree of doq_conid, connections can be found by their
+        * connection ids. Lookup by connection id, finds doq_conn. */
+       struct rbtree_type* conid_tree;
+       /** the server scid length */
+       int sv_scidlen;
+       /** the static secret for the server */
+       uint8_t* static_secret;
+       /** length of the static secret */
+       size_t static_secret_len;
+       /** the idle timeout in nanoseconds */
+       uint64_t idle_timeout;
+       /** the list of write interested connections, hold the doq_table.lock
+        * to change them */
+       struct doq_conn* write_list_first, *write_list_last;
+       /** rbtree of doq_timer. */
+       struct rbtree_type* timer_tree;
+       /** lock on the current_size counter. */
+       lock_basic_type size_lock;
+       /** current use, in bytes, of QUIC buffers.
+        * The doq_conn ngtcp2_conn structure, SSL structure and conid structs
+        * are not counted. */
+       size_t current_size;
+};
+
+/** create doq table */
+struct doq_table* doq_table_create(struct config_file* cfg,
+       struct ub_randstate* rnd);
+
+/** delete doq table */
+void doq_table_delete(struct doq_table* table);
+
+/**
+ * Timer information for doq timer.
+ */
+struct doq_timer {
+       /** The rbnode in the tree sorted by timeout value. Key this struct. */
+       struct rbnode_type node;
+       /** The timeout value. Absolute time value. */
+       struct timeval time;
+       /** If the timer is in the time tree, with the node. */
+       int timer_in_tree;
+       /** If there are more timers with the exact same timeout value,
+        * they form a set of timers. The rbnode timer has a link to the list
+        * with the other timers in the set. The rbnode timer is not a
+        * member of the list with the other timers. The other timers are not
+        * linked into the tree. */
+       struct doq_timer* setlist_first, *setlist_last;
+       /** If the timer is on the setlist. */
+       int timer_in_list;
+       /** If in the setlist, the next and prev element. */
+       struct doq_timer* setlist_next, *setlist_prev;
+       /** The connection that is timeouted. */
+       struct doq_conn* conn;
+       /** The worker that is waiting for the timeout event.
+        * Set for the rbnode tree linked element. If a worker is waiting
+        * for the event. If NULL, no worker is waiting for this timeout. */
+       struct doq_server_socket* worker_doq_socket;
+};
+
+/**
+ * Key information that makes a doq_conn node in the tree lookup.
+ */
+struct doq_conn_key {
+       /** the remote endpoint and local endpoint and ifindex */
+       struct doq_pkt_addr paddr;
+       /** the doq connection dcid */
+       uint8_t* dcid;
+       /** length of dcid */
+       size_t dcidlen;
+};
+
+/**
+ * DoQ connection, for DNS over QUIC. One connection to a remote endpoint
+ * with a number of streams in it. Every stream is like a tcp stream with
+ * a uint16_t length, query read, and a uint16_t length and answer written.
+ */
+struct doq_conn {
+       /** rbtree node, key is addresses and dcid */
+       struct rbnode_type node;
+       /** lock on the connection */
+       lock_basic_type lock;
+       /** the key information, with dcid and address endpoint */
+       struct doq_conn_key key;
+       /** the doq server socket for inside callbacks */
+       struct doq_server_socket* doq_socket;
+       /** the doq table this connection is part of */
+       struct doq_table* table;
+       /** if the connection is about to be deleted. */
+       uint8_t is_deleted;
+       /** the version, the client chosen version of QUIC */
+       uint32_t version;
+       /** the ngtcp2 connection, a server connection */
+       struct ngtcp2_conn* conn;
+       /** the connection ids that are associated with this doq_conn.
+        * There can be a number, that can change. They are linked here,
+        * so that upon removal, the list of actually associated conid
+        * elements can be removed as well. */
+       struct doq_conid* conid_list;
+       /** the ngtcp2 last error for the connection */
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+       struct ngtcp2_ccerr ccerr;
+#else
+       struct ngtcp2_connection_close_error last_error;
+#endif
+       /** the recent tls alert error code */
+       uint8_t tls_alert;
+       /** the ssl context, SSL* */
+       void* ssl;
+#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT
+       /** the connection reference for ngtcp2_conn and userdata in ssl */
+       struct ngtcp2_crypto_conn_ref conn_ref;
+#endif
+       /** closure packet, if any */
+       uint8_t* close_pkt;
+       /** length of closure packet. */
+       size_t close_pkt_len;
+       /** closure ecn */
+       uint32_t close_ecn;
+       /** the streams for this connection, of type doq_stream */
+       struct rbtree_type stream_tree;
+       /** the streams that want write, they have something to write.
+        * The list is ordered, the last have to wait for the first to
+        * get their data written. */
+       struct doq_stream* stream_write_first, *stream_write_last;
+       /** the conn has write interest if true, no write interest if false. */
+       uint8_t write_interest;
+       /** if the conn is on the connection write list */
+       uint8_t on_write_list;
+       /** the connection write list prev and next, if on the write list */
+       struct doq_conn* write_prev, *write_next;
+       /** The timer for the connection. If unused, it is not in the tree
+        * and not in the list. It is alloced here, so that it is prealloced.
+        * It has to be set after every read and write on the connection, so
+        * this improves performance, but also the allocation does not fail. */
+       struct doq_timer timer;
+};
+
+/**
+ * Connection ID and the doq_conn that is that connection. A connection
+ * has an original dcid, and then more connection ids associated.
+ */
+struct doq_conid {
+       /** rbtree node, key is the connection id. */
+       struct rbnode_type node;
+       /** the next and prev in the list of conids for the doq_conn */
+       struct doq_conid* next, *prev;
+       /** key to the doq_conn that is the connection */
+       struct doq_conn_key key;
+       /** the connection id, byte string */
+       uint8_t* cid;
+       /** the length of cid */
+       size_t cidlen;
+};
+
+/**
+ * DoQ stream, for DNS over QUIC.
+ */
+struct doq_stream {
+       /** the rbtree node for the stream, key is the stream_id */
+       rbnode_type node;
+       /** the stream id */
+       int64_t stream_id;
+       /** if the stream is closed */
+       uint8_t is_closed;
+       /** if the query is complete */
+       uint8_t is_query_complete;
+       /** the number of bytes read on the stream, up to querylen+2. */
+       size_t nread;
+       /** the length of the input query bytes */
+       size_t inlen;
+       /** the input bytes */
+       uint8_t* in;
+       /** does the stream have an answer to send */
+       uint8_t is_answer_available;
+       /** the answer bytes sent, up to outlen+2. */
+       size_t nwrite;
+       /** the length of the output answer bytes */
+       size_t outlen;
+       /** the output length in network wireformat */
+       uint16_t outlen_wire;
+       /** the output packet bytes */
+       uint8_t* out;
+       /** if the stream is on the write list */
+       uint8_t on_write_list;
+       /** the prev and next on the write list, if on the list */
+       struct doq_stream* write_prev, *write_next;
+};
+
+/** doq application error code that is sent when a stream is closed */
+#define DOQ_APP_ERROR_CODE 1
+
+/**
+ * Create the doq connection.
+ * @param c: the comm point for the listening doq socket.
+ * @param paddr: with remote and local address and ifindex for the
+ *     connection destination. This is where packets are sent.
+ * @param dcid: the dcid, Destination Connection ID.
+ * @param dcidlen: length of dcid.
+ * @param version: client chosen version.
+ * @return new doq connection or NULL on allocation failure.
+ */
+struct doq_conn* doq_conn_create(struct comm_point* c,
+       struct doq_pkt_addr* paddr, const uint8_t* dcid, size_t dcidlen,
+       uint32_t version);
+
+/**
+ * Delete the doq connection structure.
+ * @param conn: to delete.
+ * @param table: with memory size.
+ */
+void doq_conn_delete(struct doq_conn* conn, struct doq_table* table);
+
+/** compare function of doq_conn */
+int doq_conn_cmp(const void* key1, const void* key2);
+
+/** compare function of doq_conid */
+int doq_conid_cmp(const void* key1, const void* key2);
+
+/** compare function of doq_timer */
+int doq_timer_cmp(const void* key1, const void* key2);
+
+/** compare function of doq_stream */
+int doq_stream_cmp(const void* key1, const void* key2);
+
+/** setup the doq_socket server tls context */
+int doq_socket_setup_ctx(struct doq_server_socket* doq_socket);
+
+/** setup the doq connection callbacks, and settings. */
+int doq_conn_setup(struct doq_conn* conn, uint8_t* scid, size_t scidlen,
+       uint8_t* ocid, size_t ocidlen, const uint8_t* token, size_t tokenlen);
+
+/** fill a buffer with random data */
+void doq_fill_rand(struct ub_randstate* rnd, uint8_t* buf, size_t len);
+
+/** delete a doq_conid */
+void doq_conid_delete(struct doq_conid* conid);
+
+/** add a connection id to the doq_conn.
+ * caller must hold doq_table.conid_lock. */
+int doq_conn_associate_conid(struct doq_conn* conn, uint8_t* data,
+       size_t datalen);
+
+/** remove a connection id from the doq_conn.
+ * caller must hold doq_table.conid_lock. */
+void doq_conn_dissociate_conid(struct doq_conn* conn, const uint8_t* data,
+       size_t datalen);
+
+/** initial setup to link current connection ids to the doq_conn */
+int doq_conn_setup_conids(struct doq_conn* conn);
+
+/** remove the connection ids from the doq_conn.
+ * caller must hold doq_table.conid_lock. */
+void doq_conn_clear_conids(struct doq_conn* conn);
+
+/** find a conid in the doq_conn connection.
+ * caller must hold table.conid_lock. */
+struct doq_conid* doq_conid_find(struct doq_table* doq_table,
+       const uint8_t* data, size_t datalen);
+
+/** receive a packet for a connection */
+int doq_conn_recv(struct comm_point* c, struct doq_pkt_addr* paddr,
+       struct doq_conn* conn, struct ngtcp2_pkt_info* pi, int* err_retry,
+       int* err_drop);
+
+/** send packets for a connection */
+int doq_conn_write_streams(struct comm_point* c, struct doq_conn* conn,
+       int* err_drop);
+
+/** send the close packet for the connection, perhaps again. */
+int doq_conn_send_close(struct comm_point* c, struct doq_conn* conn);
+
+/** delete doq stream */
+void doq_stream_delete(struct doq_stream* stream);
+
+/** doq read a connection key from repinfo. It is not malloced, but points
+ * into the repinfo for the dcid. */
+void doq_conn_key_from_repinfo(struct doq_conn_key* key,
+       struct comm_reply* repinfo);
+
+/** doq find a stream in the connection */
+struct doq_stream* doq_stream_find(struct doq_conn* conn, int64_t stream_id);
+
+/** doq shutdown the stream. */
+int doq_stream_close(struct doq_conn* conn, struct doq_stream* stream,
+       int send_shutdown);
+
+/** send reply for a connection */
+int doq_stream_send_reply(struct doq_conn* conn, struct doq_stream* stream,
+       struct sldns_buffer* buf);
+
+/** the connection has write interest, wants to write packets */
+void doq_conn_write_enable(struct doq_conn* conn);
+
+/** the connection has no write interest, does not want to write packets */
+void doq_conn_write_disable(struct doq_conn* conn);
+
+/** set the connection on or off the write list, depending on write interest */
+void doq_conn_set_write_list(struct doq_table* table, struct doq_conn* conn);
+
+/** doq remove the connection from the write list */
+void doq_conn_write_list_remove(struct doq_table* table,
+       struct doq_conn* conn);
+
+/** doq get the first conn from the write list, if any, popped from list.
+ * Locks the conn that is returned. */
+struct doq_conn* doq_table_pop_first(struct doq_table* table);
+
+/**
+ * doq check if the timer for the conn needs to be changed.
+ * @param conn: connection, caller must hold lock on it.
+ * @param tv: time value, absolute time, returned.
+ * @return true if timer needs to be set to tv, false if no change is needed
+ *     to the timer. The timer is already set to the right time in that case.
+ */
+int doq_conn_check_timer(struct doq_conn* conn, struct timeval* tv);
+
+/** doq remove timer from tree */
+void doq_timer_tree_remove(struct doq_table* table, struct doq_timer* timer);
+
+/** doq remove timer from list */
+void doq_timer_list_remove(struct doq_table* table, struct doq_timer* timer);
+
+/** doq unset the timer if it was set. */
+void doq_timer_unset(struct doq_table* table, struct doq_timer* timer);
+
+/** doq set the timer and add it. */
+void doq_timer_set(struct doq_table* table, struct doq_timer* timer,
+       struct doq_server_socket* worker_doq_socket, struct timeval* tv);
+
+/** doq find a timeout in the timer tree */
+struct doq_timer* doq_timer_find_time(struct doq_table* table,
+       struct timeval* tv);
+
+/** doq handle timeout for a connection. Pass conn locked. Returns false for
+ * deletion. */
+int doq_conn_handle_timeout(struct doq_conn* conn);
+
+/** doq add size to the current quic buffer counter */
+void doq_table_quic_size_add(struct doq_table* table, size_t add);
+
+/** doq subtract size from the current quic buffer counter */
+void doq_table_quic_size_subtract(struct doq_table* table, size_t subtract);
+
+/** doq check if mem is available for quic. */
+int doq_table_quic_size_available(struct doq_table* table,
+       struct config_file* cfg, size_t mem);
+
+/** doq get the quic size value */
+size_t doq_table_quic_size_get(struct doq_table* table);
+#endif /* HAVE_NGTCP2 */
+
 char* set_ip_dscp(int socket, int addrfamily, int ds);
 
 /** for debug and profiling purposes only
@@ -459,4 +846,14 @@ char* set_ip_dscp(int socket, int addrfamily, int ds);
  */
 void verbose_print_unbound_socket(struct unbound_socket* ub_sock);
 
+/** event callback for testcode/doqclient */
+void doq_client_event_cb(int fd, short event, void* arg);
+
+/** timer event callback for testcode/doqclient */
+void doq_client_timer_cb(int fd, short event, void* arg);
+
+#ifdef HAVE_NGTCP2
+/** get a timestamp in nanoseconds */
+ngtcp2_tstamp doq_get_timestamp_nanosec(void);
+#endif
 #endif /* LISTEN_DNSPORT_H */
index 21e7eb82d5ef0725a64e13a636e98c3832824bf9..b8479e9ab66733b99d1b878101dcaca2be0a64ef 100644 (file)
@@ -293,6 +293,9 @@ static void print_mem(struct ub_shm_stat_info* shm_stat,
        PR_LL("mem.streamwait", s->svr.mem_stream_wait);
        PR_LL("mem.http.query_buffer", s->svr.mem_http2_query_buffer);
        PR_LL("mem.http.response_buffer", s->svr.mem_http2_response_buffer);
+#ifdef HAVE_NGTCP2
+       PR_LL("mem.quic", s->svr.mem_quic);
+#endif
 }
 
 /** print histogram */
@@ -359,6 +362,9 @@ static void print_extended(struct ub_stats_info* s, int inhibit_zero)
        PR_UL("num.query.tls_resume", s->svr.qtls_resume);
        PR_UL("num.query.ipv6", s->svr.qipv6);
        PR_UL("num.query.https", s->svr.qhttps);
+#ifdef HAVE_NGTCP2
+       PR_UL("num.query.quic", s->svr.qquic);
+#endif
 
        /* flags */
        PR_UL("num.query.flags.QR", s->svr.qbit_QR);
index c68981735cb76e947e053d2b2481b64ad510b122..1d71a0945154fe9f2ebc7ad6d7201cbcd3a0d8ef 100644 (file)
@@ -255,3 +255,19 @@ void dtio_mainfdcallback(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev),
        log_assert(0);
 }
 #endif
+
+#ifdef HAVE_NGTCP2
+void doq_client_event_cb(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev),
+       void* ATTR_UNUSED(arg))
+{
+       log_assert(0);
+}
+#endif
+
+#ifdef HAVE_NGTCP2
+void doq_client_timer_cb(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev),
+       void* ATTR_UNUSED(arg))
+{
+       log_assert(0);
+}
+#endif
index d1c87746730718b60883e17954d9ff6695ec38e8..fdc1b8af16dea0535d482a85f795a188cd3c6049 100644 (file)
@@ -68,6 +68,8 @@ static struct thr_check* thread_infos[THRDEBUG_MAX_THREADS];
 int check_locking_order = 1;
 /** the pid of this runset, reasonably unique. */
 static pid_t check_lock_pid;
+/** the name of the output file */
+static const char* output_name = "ublocktrace";
 /**
  * Should checklocks print a trace of the lock and unlock calls.
  * It uses fprintf for that because the log function uses a lock and that
@@ -142,7 +144,8 @@ acquire_locklock(struct checked_lock* lock,
 
 /** add protected region */
 void 
-lock_protect(void *p, void* area, size_t size)
+lock_protect_place(void* p, void* area, size_t size, const char* def_func,
+       const char* def_file, int def_line, const char* def_area)
 {
        struct checked_lock* lock = *(struct checked_lock**)p;
        struct protected_area* e = (struct protected_area*)malloc(
@@ -151,6 +154,10 @@ lock_protect(void *p, void* area, size_t size)
                fatal_exit("lock_protect: out of memory");
        e->region = area;
        e->size = size;
+       e->def_func = def_func;
+       e->def_file = def_file;
+       e->def_line = def_line;
+       e->def_area = def_area;
        e->hold = malloc(size);
        if(!e->hold)
                fatal_exit("lock_protect: out of memory");
@@ -203,6 +210,9 @@ prot_check(struct checked_lock* lock,
                if(memcmp(p->hold, p->region, p->size) != 0) {
                        log_hex("memory prev", p->hold, p->size);
                        log_hex("memory here", p->region, p->size);
+                       log_err("lock_protect on %s %s:%d %s failed",
+                               p->def_func, p->def_file, p->def_line,
+                               p->def_area);
                        lock_error(lock, func, file, line, 
                                "protected area modified");
                }
@@ -675,13 +685,19 @@ checklock_unlock(enum check_lock_type type, struct checked_lock* lock,
        }
 }
 
+void
+checklock_set_output_name(const char* name)
+{
+       output_name = name;
+}
+
 /** open order info debug file, thr->num must be valid */
 static void 
 open_lockorder(struct thr_check* thr)
 {
        char buf[24];
        time_t t;
-       snprintf(buf, sizeof(buf), "ublocktrace.%d", thr->num);
+       snprintf(buf, sizeof(buf), "%s.%d", output_name, thr->num);
        thr->order_info = fopen(buf, "w");
        if(!thr->order_info)
                fatal_exit("could not open %s: %s", buf, strerror(errno));
index 61cc6fb0c5e2c86a965d97c3d241dd045f66d9c6..7ebc2f98407e33bff16e7995ed5551e800165359 100644 (file)
@@ -90,6 +90,14 @@ struct protected_area {
        void* hold;
        /** next protected area in list */
        struct protected_area* next;
+       /** the place where the lock_protect is made, at init. */
+       const char* def_func;
+       /** the file where the lock_protect is made */
+       const char* def_file;
+       /** the line number where the lock_protect is made */
+       int def_line;
+       /** the text string for the area that is protected, at init call. */
+       const char* def_area;
 };
 
 /**
@@ -181,12 +189,19 @@ struct checked_lock {
  *     It demangles the lock itself (struct checked_lock**).
  * @param area: ptr to mem.
  * @param size: length of area.
+ * @param def_func: function where the lock_protect() line is.
+ * @param def_file: file where the lock_protect() line is.
+ * @param def_line: line where the lock_protect() line is.
+ * @param def_area: area string
  * You can call it multiple times with the same lock to give several areas.
  * Call it when you are done initializing the area, since it will be copied
  * at this time and protected right away against unauthorised changes until 
  * the next lock() call is done.
  */
-void lock_protect(void* lock, void* area, size_t size);
+void lock_protect_place(void* lock, void* area, size_t size,
+       const char* def_func, const char* def_file, int def_line,
+       const char* def_area);
+#define lock_protect(lock, area, size) lock_protect_place(lock, area, size, __func__, __FILE__, __LINE__, #area)
 
 /**
  * Remove protected area from lock.
@@ -203,6 +218,13 @@ void lock_unprotect(void* lock, void* area);
  */
 size_t lock_get_mem(void* lock);
 
+/**
+ * Set the output name, prefix, of the lock check output file(s).
+ * Call it before the checklock_start or thread creation. Pass a fixed string.
+ * @param name: string to use for output data file names.
+ */
+void checklock_set_output_name(const char* name);
+
 /**
  * Initialise checklock. Sets up internal debug structures.
  */
diff --git a/testcode/doqclient.c b/testcode/doqclient.c
new file mode 100644 (file)
index 0000000..4ba4f8c
--- /dev/null
@@ -0,0 +1,2685 @@
+/*
+ * testcode/doqclient.c - debug program. Perform multiple DNS queries using DoQ.
+ *
+ * Copyright (c) 2022, NLnet Labs. All rights reserved.
+ *
+ * This software is open source.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * 
+ * Neither the name of the NLNET LABS nor the names of its contributors may
+ * be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** 
+ * \file
+ *
+ * Simple DNS-over-QUIC client. For testing and debugging purposes.
+ * No authentication of TLS cert.
+ */
+
+#include "config.h"
+#ifdef HAVE_GETOPT_H
+#include <getopt.h>
+#endif
+
+#ifdef HAVE_NGTCP2
+#include <ngtcp2/ngtcp2.h>
+#include <ngtcp2/ngtcp2_crypto.h>
+#ifdef HAVE_NGTCP2_NGTCP2_CRYPTO_QUICTLS_H
+#include <ngtcp2/ngtcp2_crypto_quictls.h>
+#else
+#include <ngtcp2/ngtcp2_crypto_openssl.h>
+#endif
+#include <openssl/ssl.h>
+#include <openssl/rand.h>
+#ifdef HAVE_TIME_H
+#include <time.h>
+#endif
+#include <sys/time.h>
+#include "util/locks.h"
+#include "util/net_help.h"
+#include "sldns/sbuffer.h"
+#include "sldns/str2wire.h"
+#include "sldns/wire2str.h"
+#include "util/data/msgreply.h"
+#include "util/data/msgencode.h"
+#include "util/data/msgparse.h"
+#include "util/data/dname.h"
+#include "util/random.h"
+#include "util/ub_event.h"
+struct doq_client_stream_list;
+struct doq_client_stream;
+
+/** the local client data for the DoQ connection */
+struct doq_client_data {
+       /** file descriptor */
+       int fd;
+       /** the event base for the events */
+       struct ub_event_base* base;
+       /** the ub event */
+       struct ub_event* ev;
+       /** the expiry timer */
+       struct ub_event* expire_timer;
+       /** is the expire_timer added */
+       int expire_timer_added;
+       /** the ngtcp2 connection information */
+       struct ngtcp2_conn* conn;
+       /** random state */
+       struct ub_randstate* rnd;
+       /** server connected to as a string */
+       const char* svr;
+       /** the static secret */
+       uint8_t* static_secret_data;
+       /** the static secret size */
+       size_t static_secret_size;
+       /** destination address sockaddr */
+       struct sockaddr_storage dest_addr;
+       /** length of dest addr */
+       socklen_t dest_addr_len;
+       /** local address sockaddr */
+       struct sockaddr_storage local_addr;
+       /** length of local addr */
+       socklen_t local_addr_len;
+       /** SSL context */
+       SSL_CTX* ctx;
+       /** SSL object */
+       SSL* ssl;
+#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_CLIENT_CONTEXT
+       /** the connection reference for ngtcp2_conn and userdata in ssl */
+       struct ngtcp2_crypto_conn_ref conn_ref;
+#endif
+       /** the quic version to use */
+       uint32_t quic_version;
+       /** the last error */
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+       struct ngtcp2_ccerr ccerr;
+#else
+       struct ngtcp2_connection_close_error last_error;
+#endif
+       /** the recent tls alert error code */
+       uint8_t tls_alert;
+       /** the buffer for packet operations */
+       struct sldns_buffer* pkt_buf;
+       /** The list of queries to start. They have no stream associated.
+        * Once they do, they move to the send list. */
+       struct doq_client_stream_list* query_list_start;
+       /** The list of queries to send. They have a stream, and they are
+        * sending data. Data could also be received, like errors. */
+       struct doq_client_stream_list* query_list_send;
+       /** The list of queries to receive. They have a stream, and the
+        * send is done, it is possible to read data. */
+       struct doq_client_stream_list* query_list_receive;
+       /** The list of queries that are stopped. They have no stream
+        * active any more. Write and read are done. The query is done,
+        * and it may be in error and then have no answer or partial answer. */
+       struct doq_client_stream_list* query_list_stop;
+       /** is there a blocked packet in the blocked_pkt buffer */
+       int have_blocked_pkt;
+       /** store blocked packet, a packet that could not be sent on the
+        * nonblocking socket. */
+       struct sldns_buffer* blocked_pkt;
+       /** ecn info for the blocked packet */
+       struct ngtcp2_pkt_info blocked_pkt_pi;
+       /** the congestion control algorithm */
+       ngtcp2_cc_algo cc_algo;
+       /** the transport parameters file, for early data transmission */
+       const char* transport_file;
+       /** the tls session file, for session resumption */
+       const char* session_file;
+       /** if early data is enabled for the connection */
+       int early_data_enabled;
+       /** how quiet is the output */
+       int quiet;
+       /** the configured port for the destination */
+       int port;
+};
+
+/** the local client stream list, for appending streams to */
+struct doq_client_stream_list {
+       /** first and last members of the list */
+       struct doq_client_stream* first, *last;
+};
+
+/** the local client data for a DoQ stream */
+struct doq_client_stream {
+       /** next stream in list, and prev in list */
+       struct doq_client_stream* next, *prev;
+       /** the data buffer */
+       uint8_t* data;
+       /** length of the data buffer */
+       size_t data_len;
+       /** if the client query has a stream, that is active, associated with
+        * it. The stream_id is in stream_id. */
+       int has_stream;
+       /** the stream id */
+       int64_t stream_id;
+       /** data written position */
+       size_t nwrite;
+       /** the data length for write, in network format */
+       uint16_t data_tcplen;
+       /** if the write of the query data is done. That means the
+        * write channel has FIN, is closed for writing. */
+       int write_is_done;
+       /** data read position */
+       size_t nread;
+       /** the answer length, in network byte order */
+       uint16_t answer_len;
+       /** the answer buffer */
+       struct sldns_buffer* answer;
+       /** the answer is complete */
+       int answer_is_complete;
+       /** the query has an error, it has no answer, or no complete answer */
+       int query_has_error;
+       /** if the query is done */
+       int query_is_done;
+};
+
+#ifndef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_CLIENT_CONTEXT
+/** the quic method struct, must remain valid during the QUIC connection. */
+static SSL_QUIC_METHOD quic_method;
+#endif
+
+/** Get the connection ngtcp2_conn from the ssl app data
+ * ngtcp2_crypto_conn_ref */
+static ngtcp2_conn* conn_ref_get_conn(ngtcp2_crypto_conn_ref* conn_ref)
+{
+       struct doq_client_data* data = (struct doq_client_data*)
+               conn_ref->user_data;
+       return data->conn;
+}
+
+static void
+set_app_data(SSL* ssl, struct doq_client_data* data)
+{
+#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_CLIENT_CONTEXT
+       data->conn_ref.get_conn = &conn_ref_get_conn;
+       data->conn_ref.user_data = data;
+       SSL_set_app_data(ssl, &data->conn_ref);
+#else
+       SSL_set_app_data(ssl, data);
+#endif
+}
+
+static struct doq_client_data*
+get_app_data(SSL* ssl)
+{
+       struct doq_client_data* data;
+#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_CLIENT_CONTEXT
+       data = (struct doq_client_data*)((struct ngtcp2_crypto_conn_ref*)
+               SSL_get_app_data(ssl))->user_data;
+#else
+       data = (struct doq_client_data*) SSL_get_app_data(ssl);
+#endif
+       return data;
+}
+
+
+
+/** write handle routine */
+static void on_write(struct doq_client_data* data);
+/** update the timer */
+static void update_timer(struct doq_client_data* data);
+/** disconnect we are done */
+static void disconnect(struct doq_client_data* data);
+/** fetch and write the transport file */
+static void early_data_write_transport(struct doq_client_data* data);
+
+/** usage of doqclient */
+static void usage(char* argv[])
+{
+       printf("usage: %s [options] name type class ...\n", argv[0]);
+       printf("        sends the name-type-class queries over "
+                       "DNS-over-QUIC.\n");
+       printf("-s server       IP address to send the queries to, "
+                       "default: 127.0.0.1\n");
+       printf("-p              Port to connect to, default: %d\n",
+               UNBOUND_DNS_OVER_QUIC_PORT);
+       printf("-v              verbose output\n");
+       printf("-q              quiet, short output of answer\n");
+       printf("-x file         transport file, for read/write of transport parameters.\n\t\tIf it exists, it is used to send early data. It is then\n\t\twritten to contain the last used transport parameters.\n\t\tAlso -y must be enabled for early data to succeed.\n");
+       printf("-y file         session file, for read/write of TLS session. If it exists,\n\t\tit is used for TLS session resumption. It is then written\n\t\tto contain the last session used.\n\t\tOn its own, without also -x, resumes TLS session.\n");
+       printf("-h              This help text\n");
+       exit(1);
+}
+
+/** get the dest address */
+static void
+get_dest_addr(struct doq_client_data* data, const char* svr, int port)
+{
+       if(!ipstrtoaddr(svr, port, &data->dest_addr, &data->dest_addr_len)) {
+               printf("fatal: bad server specs '%s'\n", svr);
+               exit(1);
+       }
+}
+
+/** open UDP socket to svr */
+static int
+open_svr_udp(struct doq_client_data* data)
+{
+       int fd = -1;
+       int r;
+       fd = socket(addr_is_ip6(&data->dest_addr, data->dest_addr_len)?
+               PF_INET6:PF_INET, SOCK_DGRAM, 0);
+       if(fd == -1) {
+               perror("socket() error");
+               exit(1);
+       }
+       r = connect(fd, (struct sockaddr*)&data->dest_addr,
+               data->dest_addr_len);
+       if(r < 0 && r != EINPROGRESS) {
+               perror("connect() error");
+               exit(1);
+       }
+       fd_set_nonblock(fd);
+       return fd;
+}
+
+/** get the local address of the connection */
+static void
+get_local_addr(struct doq_client_data* data)
+{
+       memset(&data->local_addr, 0, sizeof(data->local_addr));
+       data->local_addr_len = (socklen_t)sizeof(data->local_addr);
+       if(getsockname(data->fd, (struct sockaddr*)&data->local_addr,
+               &data->local_addr_len) == -1) {
+               perror("getsockname() error");
+               exit(1);
+       }
+       log_addr(1, "local_addr", &data->local_addr, data->local_addr_len);
+       log_addr(1, "dest_addr", &data->dest_addr, data->dest_addr_len);
+}
+
+static sldns_buffer*
+make_query(char* qname, char* qtype, char* qclass)
+{
+       struct query_info qinfo;
+       struct edns_data edns;
+       sldns_buffer* buf = sldns_buffer_new(65553);
+       if(!buf) fatal_exit("out of memory");
+       qinfo.qname = sldns_str2wire_dname(qname, &qinfo.qname_len);
+       if(!qinfo.qname) {
+               printf("cannot parse query name: '%s'\n", qname);
+               exit(1);
+       }
+
+       qinfo.qtype = sldns_get_rr_type_by_name(qtype);
+       qinfo.qclass = sldns_get_rr_class_by_name(qclass);
+       qinfo.local_alias = NULL;
+
+       qinfo_query_encode(buf, &qinfo); /* flips buffer */
+       free(qinfo.qname);
+       sldns_buffer_write_u16_at(buf, 0, 0x0000);
+       sldns_buffer_write_u16_at(buf, 2, BIT_RD);
+       memset(&edns, 0, sizeof(edns));
+       edns.edns_present = 1;
+       edns.bits = EDNS_DO;
+       edns.udp_size = 4096;
+       if(sldns_buffer_capacity(buf) >=
+               sldns_buffer_limit(buf)+calc_edns_field_size(&edns))
+               attach_edns_record(buf, &edns);
+       return buf;
+}
+
+/** create client stream structure */
+static struct doq_client_stream*
+client_stream_create(struct sldns_buffer* query_data)
+{
+       struct doq_client_stream* str = calloc(1, sizeof(*str));
+       if(!str)
+               fatal_exit("calloc failed: out of memory");
+       str->data = memdup(sldns_buffer_begin(query_data),
+               sldns_buffer_limit(query_data));
+       if(!str->data)
+               fatal_exit("alloc data failed: out of memory");
+       str->data_len = sldns_buffer_limit(query_data);
+       str->stream_id = -1;
+       return str;
+}
+
+/** free client stream structure */
+static void
+client_stream_free(struct doq_client_stream* str)
+{
+       if(!str)
+               return;
+       free(str->data);
+       sldns_buffer_free(str->answer);
+       free(str);
+}
+
+/** setup the stream to start the write process */
+static void
+client_stream_start_setup(struct doq_client_stream* str, int64_t stream_id)
+{
+       str->has_stream = 1;
+       str->stream_id = stream_id;
+       str->nwrite = 0;
+       str->nread = 0;
+       str->answer_len = 0;
+       str->query_is_done = 0;
+       str->answer_is_complete = 0;
+       str->query_has_error = 0;
+       if(str->answer) {
+               sldns_buffer_free(str->answer);
+               str->answer = NULL;
+       }
+}
+
+/** Return string for log purposes with query name. */
+static char*
+client_stream_string(struct doq_client_stream* str)
+{
+       char* s;
+       size_t dname_len;
+       char dname[256], tpstr[32], result[256+32+16];
+       uint16_t tp;
+       if(str->data_len <= LDNS_HEADER_SIZE) {
+               s = strdup("query_with_no_question");
+               if(!s)
+                       fatal_exit("strdup failed: out of memory");
+               return s;
+       }
+       dname_len = dname_valid(str->data+LDNS_HEADER_SIZE,
+               str->data_len-LDNS_HEADER_SIZE);
+       if(!dname_len) {
+               s = strdup("query_dname_not_valid");
+               if(!s)
+                       fatal_exit("strdup failed: out of memory");
+               return s;
+       }
+       (void)sldns_wire2str_dname_buf(str->data+LDNS_HEADER_SIZE, dname_len,
+               dname, sizeof(dname));
+       tp = sldns_wirerr_get_type(str->data+LDNS_HEADER_SIZE,
+               str->data_len-LDNS_HEADER_SIZE, dname_len);
+       (void)sldns_wire2str_type_buf(tp, tpstr, sizeof(tpstr));
+       snprintf(result, sizeof(result), "%s %s", dname, tpstr);
+       s = strdup(result);
+       if(!s)
+               fatal_exit("strdup failed: out of memory");
+       return s;
+}
+
+/** create query stream list */
+static struct doq_client_stream_list*
+stream_list_create(void)
+{
+       struct doq_client_stream_list* list = calloc(1, sizeof(*list));
+       if(!list)
+               fatal_exit("calloc failed: out of memory");
+       return list;
+}
+
+/** free the query stream list */
+static void
+stream_list_free(struct doq_client_stream_list* list)
+{
+       struct doq_client_stream* str;
+       if(!list)
+               return;
+       str = list->first;
+       while(str) {
+               struct doq_client_stream* next = str->next;
+               client_stream_free(str);
+               str = next;
+       }
+       free(list);
+}
+
+/** append item to list */
+static void
+stream_list_append(struct doq_client_stream_list* list,
+       struct doq_client_stream* str)
+{
+       if(list->last) {
+               str->prev = list->last;
+               list->last->next = str;
+       } else {
+               str->prev = NULL;
+               list->first = str;
+       }
+       str->next = NULL;
+       list->last = str;
+}
+
+/** delete the item from the list */
+static void
+stream_list_delete(struct doq_client_stream_list* list,
+       struct doq_client_stream* str)
+{
+       if(str->next) {
+               str->next->prev = str->prev;
+       } else {
+               list->last = str->prev;
+       }
+       if(str->prev) {
+               str->prev->next = str->next;
+       } else {
+               list->first = str->next;
+       }
+       str->prev = NULL;
+       str->next = NULL;
+}
+
+/** move the item from list1 to list2 */
+static void
+stream_list_move(struct doq_client_stream* str,
+       struct doq_client_stream_list* list1,
+       struct doq_client_stream_list* list2)
+{
+       stream_list_delete(list1, str);
+       stream_list_append(list2, str);
+}
+
+/** allocate stream data buffer, then answer length is complete */
+static void
+client_stream_datalen_complete(struct doq_client_stream* str)
+{
+       verbose(1, "answer length %d", (int)ntohs(str->answer_len));
+       str->answer = sldns_buffer_new(ntohs(str->answer_len));
+       if(!str->answer)
+               fatal_exit("sldns_buffer_new failed: out of memory");
+       sldns_buffer_set_limit(str->answer, ntohs(str->answer_len));
+}
+
+/** print the answer rrs */
+static void
+print_answer_rrs(uint8_t* pkt, size_t pktlen)
+{
+       char buf[65535];
+       char* str;
+       size_t str_len;
+       int i, qdcount, ancount;
+       uint8_t* data = pkt;
+       size_t data_len = pktlen;
+       int comprloop = 0;
+       if(data_len < LDNS_HEADER_SIZE)
+               return;
+       qdcount = LDNS_QDCOUNT(data);
+       ancount = LDNS_ANCOUNT(data);
+       data += LDNS_HEADER_SIZE;
+       data_len -= LDNS_HEADER_SIZE;
+
+       for(i=0; i<qdcount; i++) {
+               str = buf;
+               str_len = sizeof(buf);
+               (void)sldns_wire2str_rrquestion_scan(&data, &data_len,
+                       &str, &str_len, pkt, pktlen, &comprloop);
+       }
+       for(i=0; i<ancount; i++) {
+               str = buf;
+               str_len = sizeof(buf);
+               (void)sldns_wire2str_rr_scan(&data, &data_len, &str, &str_len,
+                       pkt, pktlen, &comprloop);
+               /* terminate string */
+               if(str_len == 0)
+                       buf[sizeof(buf)-1] = 0;
+               else    *str = 0;
+               printf("%s", buf);
+       }
+}
+
+/** short output of answer, short error or rcode or answer section RRs. */
+static void
+client_stream_print_short(struct doq_client_stream* str)
+{
+       int rcode, ancount;
+       if(str->query_has_error) {
+               char* logs = client_stream_string(str);
+               printf("%s has error, there is no answer\n", logs);
+               free(logs);
+               return;
+       }
+       if(sldns_buffer_limit(str->answer) < LDNS_HEADER_SIZE) {
+               char* logs = client_stream_string(str);
+               printf("%s received short packet, smaller than header\n",
+                       logs);
+               free(logs);
+               return;
+       }
+       rcode = LDNS_RCODE_WIRE(sldns_buffer_begin(str->answer));
+       if(rcode != 0) {
+               char* logs = client_stream_string(str);
+               char rc[16];
+               (void)sldns_wire2str_rcode_buf(rcode, rc, sizeof(rc));
+               printf("%s rcode %s\n", logs, rc);
+               free(logs);
+               return;
+       }
+       ancount = LDNS_ANCOUNT(sldns_buffer_begin(str->answer));
+       if(ancount == 0) {
+               char* logs = client_stream_string(str);
+               printf("%s nodata answer\n", logs);
+               free(logs);
+               return;
+       }
+       print_answer_rrs(sldns_buffer_begin(str->answer),
+               sldns_buffer_limit(str->answer));
+}
+
+/** print the stream output answer */
+static void
+client_stream_print_long(struct doq_client_data* data,
+       struct doq_client_stream* str)
+{
+       char* s;
+       if(str->query_has_error) {
+               char* logs = client_stream_string(str);
+               printf("%s has error, there is no answer\n", logs);
+               free(logs);
+               return;
+       }
+       s = sldns_wire2str_pkt(sldns_buffer_begin(str->answer),
+               sldns_buffer_limit(str->answer));
+       printf("%s", (s?s:";sldns_wire2str_pkt failed\n"));
+       printf(";; SERVER: %s %d\n", data->svr, data->port);
+       free(s);
+}
+
+/** the stream has completed the data */
+static void
+client_stream_data_complete(struct doq_client_stream* str)
+{
+       verbose(1, "received all answer content");
+       if(verbosity > 0) {
+               char* logs = client_stream_string(str);
+               char* s;
+               log_buf(1, "received answer", str->answer);
+               s = sldns_wire2str_pkt(sldns_buffer_begin(str->answer),
+                       sldns_buffer_limit(str->answer));
+               if(!s) verbose(1, "could not sldns_wire2str_pkt");
+               else verbose(1, "query %s received:\n%s", logs, s);
+               free(s);
+               free(logs);
+       }
+       str->answer_is_complete = 1;
+}
+
+/** the stream has completed but with an error */
+static void
+client_stream_answer_error(struct doq_client_stream* str)
+{
+       if(verbosity > 0) {
+               char* logs = client_stream_string(str);
+               if(str->answer)
+                       verbose(1, "query %s has an error. received %d/%d bytes.",
+                               logs, (int)sldns_buffer_position(str->answer),
+                               (int)sldns_buffer_limit(str->answer));
+               else
+                       verbose(1, "query %s has an error. received no data.",
+                               logs);
+               free(logs);
+       }
+       str->query_has_error = 1;
+}
+
+/** receive data for a stream */
+static void
+client_stream_recv_data(struct doq_client_stream* str, const uint8_t* data,
+       size_t datalen)
+{
+       int got_data = 0;
+       /* read the tcplength uint16_t at the start of the DNS message */
+       if(str->nread < 2) {
+               size_t to_move = datalen;
+               if(datalen > 2-str->nread)
+                       to_move = 2-str->nread;
+               memmove(((uint8_t*)&str->answer_len)+str->nread, data,
+                       to_move);
+               str->nread += to_move;
+               data += to_move;
+               datalen -= to_move;
+               if(str->nread == 2) {
+                       /* we can allocate the data buffer */
+                       client_stream_datalen_complete(str);
+               }
+       }
+       /* if we have data bytes */
+       if(datalen > 0) {
+               size_t to_write = datalen;
+               if(datalen > sldns_buffer_remaining(str->answer))
+                       to_write = sldns_buffer_remaining(str->answer);
+               if(to_write > 0) {
+                       sldns_buffer_write(str->answer, data, to_write);
+                       str->nread += to_write;
+                       data += to_write;
+                       datalen -= to_write;
+                       got_data = 1;
+               }
+       }
+       /* extra received bytes after end? */
+       if(datalen > 0) {
+               verbose(1, "extra bytes after end of DNS length");
+               if(verbosity > 0)
+                       log_hex("extradata", (void*)data, datalen);
+       }
+       /* are we done with it? */
+       if(got_data && str->nread >= (size_t)(ntohs(str->answer_len))+2) {
+               client_stream_data_complete(str);
+       }
+}
+
+/** receive FIN from remote end on client stream, no more data to be
+ * received on the stream. */
+static void
+client_stream_recv_fin(struct doq_client_data* data,
+       struct doq_client_stream* str, int is_fin)
+{
+       if(verbosity > 0) {
+               char* logs = client_stream_string(str);
+               if(is_fin)
+                       verbose(1, "query %s: received FIN from remote", logs);
+               else
+                       verbose(1, "query %s: stream reset from remote", logs);
+               free(logs);
+       }
+       if(str->write_is_done)
+               stream_list_move(str, data->query_list_receive,
+                       data->query_list_stop);
+       else
+               stream_list_move(str, data->query_list_send,
+                       data->query_list_stop);
+       if(!str->answer_is_complete) {
+               client_stream_answer_error(str);
+       }
+       str->query_is_done = 1;
+       if(data->quiet)
+               client_stream_print_short(str);
+       else client_stream_print_long(data, str);
+       if(data->query_list_send->first==NULL &&
+               data->query_list_receive->first==NULL)
+               disconnect(data);
+}
+
+/** fill a buffer with random data */
+static void fill_rand(struct ub_randstate* rnd, uint8_t* buf, size_t len)
+{
+       if(RAND_bytes(buf, len) != 1) {
+               size_t i;
+               for(i=0; i<len; i++)
+                       buf[i] = ub_random(rnd)&0xff;
+       }
+}
+
+/** create the static secret */
+static void generate_static_secret(struct doq_client_data* data, size_t len)
+{
+       data->static_secret_data = malloc(len);
+       if(!data->static_secret_data)
+               fatal_exit("malloc failed: out of memory");
+       data->static_secret_size = len;
+       fill_rand(data->rnd, data->static_secret_data, len);
+}
+
+/** fill cid structure with random data */
+static void cid_randfill(struct ngtcp2_cid* cid, size_t datalen,
+       struct ub_randstate* rnd)
+{
+       uint8_t buf[32];
+       if(datalen > sizeof(buf))
+               datalen = sizeof(buf);
+       fill_rand(rnd, buf, datalen);
+       ngtcp2_cid_init(cid, buf, datalen);
+}
+
+/** send buf on the client stream */
+static int
+client_bidi_stream(struct doq_client_data* data, int64_t* ret_stream_id,
+       void* stream_user_data)
+{
+       int64_t stream_id;
+       int rv;
+
+       /* open new bidirectional stream */
+       rv = ngtcp2_conn_open_bidi_stream(data->conn, &stream_id,
+               stream_user_data);
+       if(rv != 0) {
+               if(rv == NGTCP2_ERR_STREAM_ID_BLOCKED) {
+                       /* no bidi stream count for this new stream */
+                       return 0;
+               }
+               fatal_exit("could not ngtcp2_conn_open_bidi_stream: %s",
+                       ngtcp2_strerror(rv));
+       }
+       *ret_stream_id = stream_id;
+       return 1;
+}
+
+/** See if we can start query streams, by creating bidirectional streams
+ * on the QUIC transport for them. */
+static void
+query_streams_start(struct doq_client_data* data)
+{
+       while(data->query_list_start->first) {
+               struct doq_client_stream* str = data->query_list_start->first;
+               int64_t stream_id = 0;
+               if(!client_bidi_stream(data, &stream_id, str)) {
+                       /* no more bidi streams allowed */
+                       break;
+               }
+               if(verbosity > 0) {
+                       char* logs = client_stream_string(str);
+                       verbose(1, "query %s start on bidi stream id %lld",
+                               logs, (long long int)stream_id);
+                       free(logs);
+               }
+               /* setup the stream to start */
+               client_stream_start_setup(str, stream_id);
+               /* move the query entry to the send list to write it */
+               stream_list_move(str, data->query_list_start,
+                       data->query_list_send);
+       }
+}
+
+/** the rand callback routine from ngtcp2 */
+static void rand_cb(uint8_t* dest, size_t destlen,
+       const ngtcp2_rand_ctx* rand_ctx)
+{
+       struct ub_randstate* rnd = (struct ub_randstate*)
+               rand_ctx->native_handle;
+       fill_rand(rnd, dest, destlen);
+}
+
+/** the get_new_connection_id callback routine from ngtcp2 */
+static int get_new_connection_id_cb(struct ngtcp2_conn* ATTR_UNUSED(conn),
+       struct ngtcp2_cid* cid, uint8_t* token, size_t cidlen, void* user_data)
+{
+       struct doq_client_data* data = (struct doq_client_data*)user_data;
+       cid_randfill(cid, cidlen, data->rnd);
+       if(ngtcp2_crypto_generate_stateless_reset_token(token,
+               data->static_secret_data, data->static_secret_size, cid) != 0)
+               return NGTCP2_ERR_CALLBACK_FAILURE;
+       return 0;
+}
+
+/** handle that early data is rejected */
+static void
+early_data_is_rejected(struct doq_client_data* data)
+{
+       int rv;
+       verbose(1, "early data was rejected by the server");
+#ifdef HAVE_NGTCP2_CONN_TLS_EARLY_DATA_REJECTED
+       rv = ngtcp2_conn_tls_early_data_rejected(data->conn);
+#else
+       rv = ngtcp2_conn_early_data_rejected(data->conn);
+#endif
+       if(rv != 0) {
+               log_err("ngtcp2_conn_early_data_rejected failed: %s",
+                       ngtcp2_strerror(rv));
+               return;
+       }
+       /* move the streams back to the start state */
+       while(data->query_list_send->first) {
+               struct doq_client_stream* str = data->query_list_send->first;
+               /* move it back to the start list */
+               stream_list_move(str, data->query_list_send,
+                       data->query_list_start);
+               str->has_stream = 0;
+               /* remove stream id */
+               str->stream_id = 0;
+               /* initialise other members, in case they are altered,
+                * but unlikely, because early streams are rejected. */
+               str->nwrite = 0;
+               str->nread = 0;
+               str->answer_len = 0;
+               str->query_is_done = 0;
+               str->answer_is_complete = 0;
+               str->query_has_error = 0;
+               if(str->answer) {
+                       sldns_buffer_free(str->answer);
+                       str->answer = NULL;
+               }
+       }
+}
+
+/** the handshake completed callback from ngtcp2 */
+static int
+handshake_completed(ngtcp2_conn* ATTR_UNUSED(conn), void* user_data)
+{
+       struct doq_client_data* data = (struct doq_client_data*)user_data;
+       verbose(1, "handshake_completed callback");
+       verbose(1, "ngtcp2_conn_get_max_data_left is %d",
+               (int)ngtcp2_conn_get_max_data_left(data->conn));
+#ifdef HAVE_NGTCP2_CONN_GET_MAX_LOCAL_STREAMS_UNI
+       verbose(1, "ngtcp2_conn_get_max_local_streams_uni is %d",
+               (int)ngtcp2_conn_get_max_local_streams_uni(data->conn));
+#endif
+       verbose(1, "ngtcp2_conn_get_streams_uni_left is %d",
+               (int)ngtcp2_conn_get_streams_uni_left(data->conn));
+       verbose(1, "ngtcp2_conn_get_streams_bidi_left is %d",
+               (int)ngtcp2_conn_get_streams_bidi_left(data->conn));
+       verbose(1, "negotiated cipher name is %s",
+               SSL_get_cipher_name(data->ssl));
+       if(verbosity > 0) {
+               const unsigned char* alpn = NULL;
+               unsigned int alpnlen = 0;
+               char alpnstr[128];
+               SSL_get0_alpn_selected(data->ssl, &alpn, &alpnlen);
+               if(alpnlen > sizeof(alpnstr)-1)
+                       alpnlen = sizeof(alpnstr)-1;
+               memmove(alpnstr, alpn, alpnlen);
+               alpnstr[alpnlen]=0;
+               verbose(1, "negotiated ALPN is '%s'", alpnstr);
+       }
+       /* The SSL_get_early_data_status call works after the handshake
+        * completes. */
+       if(data->early_data_enabled) {
+               if(SSL_get_early_data_status(data->ssl) !=
+                       SSL_EARLY_DATA_ACCEPTED) {
+                       early_data_is_rejected(data);
+               } else {
+                       verbose(1, "early data was accepted by the server");
+               }
+       }
+#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_CLIENT_CONTEXT
+       if(data->transport_file) {
+               early_data_write_transport(data);
+       }
+#endif
+       return 0;
+}
+
+/** the extend_max_local_streams_bidi callback from ngtcp2 */
+static int
+extend_max_local_streams_bidi(ngtcp2_conn* ATTR_UNUSED(conn),
+       uint64_t max_streams, void* user_data)
+{
+       struct doq_client_data* data = (struct doq_client_data*)user_data;
+       verbose(1, "extend_max_local_streams_bidi callback, %d max_streams",
+               (int)max_streams);
+       verbose(1, "ngtcp2_conn_get_max_data_left is %d",
+               (int)ngtcp2_conn_get_max_data_left(data->conn));
+#ifdef HAVE_NGTCP2_CONN_GET_MAX_LOCAL_STREAMS_UNI
+       verbose(1, "ngtcp2_conn_get_max_local_streams_uni is %d",
+               (int)ngtcp2_conn_get_max_local_streams_uni(data->conn));
+#endif
+       verbose(1, "ngtcp2_conn_get_streams_uni_left is %d",
+               (int)ngtcp2_conn_get_streams_uni_left(data->conn));
+       verbose(1, "ngtcp2_conn_get_streams_bidi_left is %d",
+               (int)ngtcp2_conn_get_streams_bidi_left(data->conn));
+       query_streams_start(data);
+       return 0;
+}
+
+/** the recv_stream_data callback from ngtcp2 */
+static int
+recv_stream_data(ngtcp2_conn* ATTR_UNUSED(conn), uint32_t flags,
+       int64_t stream_id, uint64_t offset, const uint8_t* data,
+       size_t datalen, void* user_data, void* stream_user_data)
+{
+       struct doq_client_data* doqdata = (struct doq_client_data*)user_data;
+       struct doq_client_stream* str = (struct doq_client_stream*)
+               stream_user_data;
+       verbose(1, "recv_stream_data stream %d offset %d datalen %d%s%s",
+               (int)stream_id, (int)offset, (int)datalen,
+               ((flags&NGTCP2_STREAM_DATA_FLAG_FIN)!=0?" FIN":""),
+#ifdef NGTCP2_STREAM_DATA_FLAG_0RTT
+               ((flags&NGTCP2_STREAM_DATA_FLAG_0RTT)!=0?" 0RTT":"")
+#else
+               ((flags&NGTCP2_STREAM_DATA_FLAG_EARLY)!=0?" EARLY":"")
+#endif
+               );
+       if(verbosity > 0)
+               log_hex("data", (void*)data, datalen);
+       if(verbosity > 0) {
+               char* logs = client_stream_string(str);
+               verbose(1, "the stream_user_data is %s stream id %d, nread %d",
+                       logs, (int)str->stream_id, (int)str->nread);
+               free(logs);
+       }
+
+       /* append the data, if there is data */
+       if(datalen > 0) {
+               client_stream_recv_data(str, data, datalen);
+       }
+       if((flags&NGTCP2_STREAM_DATA_FLAG_FIN)!=0) {
+               client_stream_recv_fin(doqdata, str, 1);
+       }
+       ngtcp2_conn_extend_max_stream_offset(doqdata->conn, stream_id, datalen);
+       ngtcp2_conn_extend_max_offset(doqdata->conn, datalen);
+       return 0;
+}
+
+/** the stream reset callback from ngtcp2 */
+static int
+stream_reset(ngtcp2_conn* ATTR_UNUSED(conn), int64_t stream_id,
+       uint64_t final_size, uint64_t app_error_code, void* user_data,
+       void* stream_user_data)
+{
+       struct doq_client_data* doqdata = (struct doq_client_data*)user_data;
+       struct doq_client_stream* str = (struct doq_client_stream*)
+               stream_user_data;
+       verbose(1, "stream reset for stream %d final size %d app error code %d",
+               (int)stream_id, (int)final_size, (int)app_error_code);
+       client_stream_recv_fin(doqdata, str, 0);
+       return 0;
+}
+
+/** copy sockaddr into ngtcp2 addr */
+static void
+copy_ngaddr(struct ngtcp2_addr* ngaddr, struct sockaddr_storage* addr,
+       socklen_t addrlen)
+{
+       if(addr_is_ip6(addr, addrlen)) {
+#if defined(NGTCP2_USE_GENERIC_SOCKADDR) || defined(NGTCP2_USE_GENERIC_IPV6_SOCKADDR)
+               struct sockaddr_in* i6 = (struct sockaddr_in6*)addr;
+               struct ngtcp2_sockaddr_in6 a6;
+               ngaddr->addr = calloc(1, sizeof(a6));
+               if(!ngaddr->addr) fatal_exit("calloc failed: out of memory");
+               ngaddr->addrlen = sizeof(a6);
+               memset(&a6, 0, sizeof(a6));
+               a6.sin6_family = i6->sin6_family;
+               a6.sin6_port = i6->sin6_port;
+               a6.sin6_flowinfo = i6->sin6_flowinfo;
+               memmove(&a6.sin6_addr, i6->sin6_addr, sizeof(a6.sin6_addr);
+               a6.sin6_scope_id = i6->sin6_scope_id;
+               memmove(ngaddr->addr, &a6, sizeof(a6));
+#else
+               ngaddr->addr = (ngtcp2_sockaddr*)addr;
+               ngaddr->addrlen = addrlen;
+#endif
+       } else {
+#ifdef NGTCP2_USE_GENERIC_SOCKADDR
+               struct sockaddr_in* i4 = (struct sockaddr_in*)addr;
+               struct ngtcp2_sockaddr_in a4;
+               ngaddr->addr = calloc(1, sizeof(a4));
+               if(!ngaddr->addr) fatal_exit("calloc failed: out of memory");
+               ngaddr->addrlen = sizeof(a4);
+               memset(&a4, 0, sizeof(a4));
+               a4.sin_family = i4->sin_family;
+               a4.sin_port = i4->sin_port;
+               memmove(&a4.sin_addr, i4->sin_addr, sizeof(a4.sin_addr);
+               memmove(ngaddr->addr, &a4, sizeof(a4));
+#else
+               ngaddr->addr = (ngtcp2_sockaddr*)addr;
+               ngaddr->addrlen = addrlen;
+#endif
+       }
+}
+
+/** debug log printf for ngtcp2 connections */
+static void log_printf_for_doq(void* ATTR_UNUSED(user_data),
+       const char* fmt, ...)
+{
+       va_list ap;
+       va_start(ap, fmt);
+       fprintf(stderr, "libngtcp2: ");
+       vfprintf(stderr, fmt, ap);
+       va_end(ap);
+       fprintf(stderr, "\n");
+}
+
+/** get a timestamp in nanoseconds */
+static ngtcp2_tstamp get_timestamp_nanosec(void)
+{
+#ifdef CLOCK_REALTIME
+       struct timespec tp;
+       memset(&tp, 0, sizeof(tp));
+#ifdef CLOCK_MONOTONIC
+       if(clock_gettime(CLOCK_MONOTONIC, &tp) == -1) {
+#endif
+               if(clock_gettime(CLOCK_REALTIME, &tp) == -1) {
+                       log_err("clock_gettime failed: %s", strerror(errno));
+               }
+#ifdef CLOCK_MONOTONIC
+       }
+#endif
+       return ((uint64_t)tp.tv_sec)*((uint64_t)1000000000) +
+               ((uint64_t)tp.tv_nsec);
+#else
+       struct timeval tv;
+       if(gettimeofday(&tv, NULL) < 0) {
+               log_err("gettimeofday failed: %s", strerror(errno));
+       }
+       return ((uint64_t)tv.tv_sec)*((uint64_t)1000000000) +
+               ((uint64_t)tv.tv_usec)*((uint64_t)1000);
+#endif /* CLOCK_REALTIME */
+}
+
+/** create ngtcp2 client connection and set up. */
+static struct ngtcp2_conn* conn_client_setup(struct doq_client_data* data)
+{
+       struct ngtcp2_conn* conn = NULL;
+       int rv;
+       struct ngtcp2_cid dcid, scid;
+       struct ngtcp2_path path;
+       uint32_t client_chosen_version = NGTCP2_PROTO_VER_V1;
+       struct ngtcp2_callbacks cbs;
+       struct ngtcp2_settings settings;
+       struct ngtcp2_transport_params params;
+
+       memset(&cbs, 0, sizeof(cbs));
+       memset(&settings, 0, sizeof(settings));
+       memset(&params, 0, sizeof(params));
+       memset(&dcid, 0, sizeof(dcid));
+       memset(&scid, 0, sizeof(scid));
+       memset(&path, 0, sizeof(path));
+
+       data->quic_version = client_chosen_version;
+       ngtcp2_settings_default(&settings);
+       if(str_is_ip6(data->svr)) {
+#ifdef HAVE_STRUCT_NGTCP2_SETTINGS_MAX_TX_UDP_PAYLOAD_SIZE
+               settings.max_tx_udp_payload_size = 1232;
+#else
+               settings.max_udp_payload_size = 1232;
+#endif
+       }
+       settings.rand_ctx.native_handle = data->rnd;
+       if(verbosity > 0) {
+               /* make debug logs */
+               settings.log_printf = log_printf_for_doq;
+       }
+       settings.initial_ts = get_timestamp_nanosec();
+       ngtcp2_transport_params_default(&params);
+       params.initial_max_stream_data_bidi_local = 256*1024;
+       params.initial_max_stream_data_bidi_remote = 256*1024;
+       params.initial_max_stream_data_uni = 256*1024;
+       params.initial_max_data = 1024*1024;
+       params.initial_max_streams_bidi = 0;
+       params.initial_max_streams_uni = 100;
+       params.max_idle_timeout = 30*NGTCP2_SECONDS;
+       params.active_connection_id_limit = 7;
+       cid_randfill(&dcid, 16, data->rnd);
+       cid_randfill(&scid, 16, data->rnd);
+       cbs.client_initial = ngtcp2_crypto_client_initial_cb;
+       cbs.recv_crypto_data = ngtcp2_crypto_recv_crypto_data_cb;
+       cbs.encrypt = ngtcp2_crypto_encrypt_cb;
+       cbs.decrypt = ngtcp2_crypto_decrypt_cb;
+       cbs.hp_mask = ngtcp2_crypto_hp_mask_cb;
+       cbs.recv_retry = ngtcp2_crypto_recv_retry_cb;
+       cbs.update_key = ngtcp2_crypto_update_key_cb;
+       cbs.delete_crypto_aead_ctx = ngtcp2_crypto_delete_crypto_aead_ctx_cb;
+       cbs.delete_crypto_cipher_ctx =
+               ngtcp2_crypto_delete_crypto_cipher_ctx_cb;
+       cbs.get_path_challenge_data = ngtcp2_crypto_get_path_challenge_data_cb;
+       cbs.version_negotiation = ngtcp2_crypto_version_negotiation_cb;
+       cbs.get_new_connection_id = get_new_connection_id_cb;
+       cbs.handshake_completed = handshake_completed;
+       cbs.extend_max_local_streams_bidi = extend_max_local_streams_bidi;
+       cbs.rand = rand_cb;
+       cbs.recv_stream_data = recv_stream_data;
+       cbs.stream_reset = stream_reset;
+       copy_ngaddr(&path.local, &data->local_addr, data->local_addr_len);
+       copy_ngaddr(&path.remote, &data->dest_addr, data->dest_addr_len);
+
+       rv = ngtcp2_conn_client_new(&conn, &dcid, &scid, &path,
+               client_chosen_version, &cbs, &settings, &params,
+               NULL, /* ngtcp2_mem allocator, use default */
+               data /* callback argument */);
+       if(!conn) fatal_exit("could not ngtcp2_conn_client_new: %s",
+               ngtcp2_strerror(rv));
+       data->cc_algo = settings.cc_algo;
+       return conn;
+}
+
+#ifndef HAVE_NGTCP2_CONN_ENCODE_0RTT_TRANSPORT_PARAMS
+/** write the transport file */
+static void
+transport_file_write(const char* file, struct ngtcp2_transport_params* params)
+{
+       FILE* out;
+       out = fopen(file, "w");
+       if(!out) {
+               perror(file);
+               return;
+       }
+       fprintf(out, "initial_max_streams_bidi=%u\n",
+               (unsigned)params->initial_max_streams_bidi);
+       fprintf(out, "initial_max_streams_uni=%u\n",
+               (unsigned)params->initial_max_streams_uni);
+       fprintf(out, "initial_max_stream_data_bidi_local=%u\n",
+               (unsigned)params->initial_max_stream_data_bidi_local);
+       fprintf(out, "initial_max_stream_data_bidi_remote=%u\n",
+               (unsigned)params->initial_max_stream_data_bidi_remote);
+       fprintf(out, "initial_max_stream_data_uni=%u\n",
+               (unsigned)params->initial_max_stream_data_uni);
+       fprintf(out, "initial_max_data=%u\n",
+               (unsigned)params->initial_max_data);
+       fprintf(out, "active_connection_id_limit=%u\n",
+               (unsigned)params->active_connection_id_limit);
+       fprintf(out, "max_datagram_frame_size=%u\n",
+               (unsigned)params->max_datagram_frame_size);
+       if(ferror(out)) {
+               verbose(1, "There was an error writing %s: %s",
+                       file, strerror(errno));
+               fclose(out);
+               return;
+       }
+       fclose(out);
+}
+#endif /* HAVE_NGTCP2_CONN_ENCODE_0RTT_TRANSPORT_PARAMS */
+
+/** fetch and write the transport file */
+static void
+early_data_write_transport(struct doq_client_data* data)
+{
+#ifdef HAVE_NGTCP2_CONN_ENCODE_0RTT_TRANSPORT_PARAMS
+       FILE* out;
+       uint8_t buf[1024];
+       ngtcp2_ssize len = ngtcp2_conn_encode_0rtt_transport_params(data->conn,
+               buf, sizeof(buf));
+       if(len < 0) {
+               log_err("ngtcp2_conn_encode_0rtt_transport_params failed: %s",
+                       ngtcp2_strerror(len));
+               return;
+       }
+       out = fopen(data->transport_file, "w");
+       if(!out) {
+               perror(data->transport_file);
+               return;
+       }
+       if(fwrite(buf, 1, len, out) != (size_t)len) {
+               log_err("fwrite %s failed: %s", data->transport_file,
+                       strerror(errno));
+       }
+       if(ferror(out)) {
+               verbose(1, "There was an error writing %s: %s",
+                       data->transport_file, strerror(errno));
+       }
+       fclose(out);
+#else
+       struct ngtcp2_transport_params params;
+       memset(&params, 0, sizeof(params));
+       ngtcp2_conn_get_remote_transport_params(data->conn, &params);
+       transport_file_write(data->transport_file, &params);
+#endif
+}
+
+#ifndef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_CLIENT_CONTEXT
+/** applicatation rx key callback, this is where the rx key is set,
+ * and streams can be opened, like http3 unidirectional streams, like
+ * the http3 control and http3 qpack encode and decoder streams. */
+static int
+application_rx_key_cb(struct doq_client_data* data)
+{
+       verbose(1, "application_rx_key_cb callback");
+       verbose(1, "ngtcp2_conn_get_max_data_left is %d",
+               (int)ngtcp2_conn_get_max_data_left(data->conn));
+#ifdef HAVE_NGTCP2_CONN_GET_MAX_LOCAL_STREAMS_UNI
+       verbose(1, "ngtcp2_conn_get_max_local_streams_uni is %d",
+               (int)ngtcp2_conn_get_max_local_streams_uni(data->conn));
+#endif
+       verbose(1, "ngtcp2_conn_get_streams_uni_left is %d",
+               (int)ngtcp2_conn_get_streams_uni_left(data->conn));
+       verbose(1, "ngtcp2_conn_get_streams_bidi_left is %d",
+               (int)ngtcp2_conn_get_streams_bidi_left(data->conn));
+       if(data->transport_file) {
+               early_data_write_transport(data);
+       }
+       return 1;
+}
+
+/** quic_method set_encryption_secrets function */
+static int
+set_encryption_secrets(SSL *ssl, OSSL_ENCRYPTION_LEVEL ossl_level,
+       const uint8_t *read_secret, const uint8_t *write_secret,
+       size_t secret_len)
+{
+       struct doq_client_data* data = get_app_data(ssl);
+#ifdef HAVE_NGTCP2_ENCRYPTION_LEVEL
+       ngtcp2_encryption_level
+#else
+       ngtcp2_crypto_level
+#endif
+               level =
+#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_FROM_OSSL_ENCRYPTION_LEVEL
+               ngtcp2_crypto_quictls_from_ossl_encryption_level(ossl_level);
+#else
+               ngtcp2_crypto_openssl_from_ossl_encryption_level(ossl_level);
+#endif
+
+       if(read_secret) {
+               if(ngtcp2_crypto_derive_and_install_rx_key(data->conn, NULL,
+                       NULL, NULL, level, read_secret, secret_len) != 0) {
+                       log_err("ngtcp2_crypto_derive_and_install_rx_key failed");
+                       return 0;
+               }
+               if(level == NGTCP2_CRYPTO_LEVEL_APPLICATION) {
+                       if(!application_rx_key_cb(data))
+                               return 0;
+               }
+       }
+
+       if(write_secret) {
+               if(ngtcp2_crypto_derive_and_install_tx_key(data->conn, NULL,
+                       NULL, NULL, level, write_secret, secret_len) != 0) {
+                       log_err("ngtcp2_crypto_derive_and_install_tx_key failed");
+                       return 0;
+               }
+       }
+       return 1;
+}
+
+/** quic_method add_handshake_data function */
+static int
+add_handshake_data(SSL *ssl, OSSL_ENCRYPTION_LEVEL ossl_level,
+       const uint8_t *data, size_t len)
+{
+       struct doq_client_data* doqdata = get_app_data(ssl);
+#ifdef HAVE_NGTCP2_ENCRYPTION_LEVEL
+       ngtcp2_encryption_level
+#else
+       ngtcp2_crypto_level
+#endif
+               level =
+#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_FROM_OSSL_ENCRYPTION_LEVEL
+               ngtcp2_crypto_quictls_from_ossl_encryption_level(ossl_level);
+#else
+               ngtcp2_crypto_openssl_from_ossl_encryption_level(ossl_level);
+#endif
+       int rv;
+
+       rv = ngtcp2_conn_submit_crypto_data(doqdata->conn, level, data, len);
+       if(rv != 0) {
+               log_err("ngtcp2_conn_submit_crypto_data failed: %s",
+                       ngtcp2_strerror(rv));
+               ngtcp2_conn_set_tls_error(doqdata->conn, rv);
+               return 0;
+       }
+       return 1;
+}
+
+/** quic_method flush_flight function */
+static int
+flush_flight(SSL* ATTR_UNUSED(ssl))
+{
+       return 1;
+}
+
+/** quic_method send_alert function */
+static int
+send_alert(SSL *ssl, enum ssl_encryption_level_t ATTR_UNUSED(level),
+       uint8_t alert)
+{
+       struct doq_client_data* data = get_app_data(ssl);
+       data->tls_alert = alert;
+       return 1;
+}
+#endif /* HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_CLIENT_CONTEXT */
+
+/** new session callback. We can write it to file for resumption later. */
+static int
+new_session_cb(SSL* ssl, SSL_SESSION* session)
+{
+       struct doq_client_data* data = get_app_data(ssl);
+       BIO *f;
+       log_assert(data->session_file);
+       verbose(1, "new session cb: the ssl session max_early_data_size is %u",
+               (unsigned)SSL_SESSION_get_max_early_data(session));
+       f = BIO_new_file(data->session_file, "w");
+       if(!f) {
+               log_err("Could not open %s: %s", data->session_file,
+                       strerror(errno));
+               return 0;
+       }
+       PEM_write_bio_SSL_SESSION(f, session);
+       BIO_free(f);
+       verbose(1, "written tls session to %s", data->session_file);
+       return 0;
+}
+
+/** setup the TLS context */
+static SSL_CTX*
+ctx_client_setup(void)
+{
+       SSL_CTX* ctx = SSL_CTX_new(TLS_client_method());
+       if(!ctx) {
+               log_crypto_err("Could not SSL_CTX_new");
+               exit(1);
+       }
+       SSL_CTX_set_min_proto_version(ctx, TLS1_3_VERSION);
+       SSL_CTX_set_max_proto_version(ctx, TLS1_3_VERSION);
+       SSL_CTX_set_default_verify_paths(ctx);
+#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_CLIENT_CONTEXT
+       if(ngtcp2_crypto_quictls_configure_client_context(ctx) != 0) {
+               log_err("ngtcp2_crypto_quictls_configure_client_context failed");
+               exit(1);
+       }
+#else
+       memset(&quic_method, 0, sizeof(quic_method));
+       quic_method.set_encryption_secrets = &set_encryption_secrets;
+       quic_method.add_handshake_data = &add_handshake_data;
+       quic_method.flush_flight = &flush_flight;
+       quic_method.send_alert = &send_alert;
+       SSL_CTX_set_quic_method(ctx, &quic_method);
+#endif
+       return ctx;
+}
+
+
+/* setup the TLS object */
+static SSL*
+ssl_client_setup(struct doq_client_data* data)
+{
+       SSL* ssl = SSL_new(data->ctx);
+       if(!ssl) {
+               log_crypto_err("Could not SSL_new");
+               exit(1);
+       }
+       set_app_data(ssl, data);
+       SSL_set_connect_state(ssl);
+       if(!SSL_set_fd(ssl, data->fd)) {
+               log_crypto_err("Could not SSL_set_fd");
+               exit(1);
+       }
+       if((data->quic_version & 0xff000000) == 0xff000000) {
+               SSL_set_quic_use_legacy_codepoint(ssl, 1);
+       } else {
+               SSL_set_quic_use_legacy_codepoint(ssl, 0);
+       }
+       SSL_set_alpn_protos(ssl, (const unsigned char *)"\x03""doq", 4);
+       /* send the SNI host name */
+       SSL_set_tlsext_host_name(ssl, "localhost");
+       return ssl;
+}
+
+/** get packet ecn information */
+static uint32_t
+msghdr_get_ecn(struct msghdr* msg, int family)
+{
+#ifndef S_SPLINT_S
+       struct cmsghdr* cmsg;
+       if(family == AF_INET6) {
+               for(cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
+                       cmsg = CMSG_NXTHDR(msg, cmsg)) {
+                       if(cmsg->cmsg_level == IPPROTO_IPV6 &&
+                               cmsg->cmsg_type == IPV6_TCLASS &&
+                               cmsg->cmsg_len != 0) {
+                               uint8_t* ecn = (uint8_t*)CMSG_DATA(cmsg);
+                               return *ecn;
+                       }
+               }
+               return 0;
+       }
+       for(cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
+               cmsg = CMSG_NXTHDR(msg, cmsg)) {
+               if(cmsg->cmsg_level == IPPROTO_IP &&
+                       cmsg->cmsg_type == IP_TOS &&
+                       cmsg->cmsg_len != 0) {
+                       uint8_t* ecn = (uint8_t*)CMSG_DATA(cmsg);
+                       return *ecn;
+               }
+       }
+       return 0;
+#endif /* S_SPLINT_S */
+}
+
+/** set the ecn on the transmission */
+static void
+set_ecn(int fd, int family, uint32_t ecn)
+{
+       unsigned int val = ecn;
+       if(family == AF_INET6) {
+               if(setsockopt(fd, IPPROTO_IPV6, IPV6_TCLASS, &val,
+                       (socklen_t)sizeof(val)) == -1) {
+                       log_err("setsockopt(.. IPV6_TCLASS ..): %s",
+                               strerror(errno));
+               }
+               return;
+       }
+       if(setsockopt(fd, IPPROTO_IP, IP_TOS, &val,
+               (socklen_t)sizeof(val)) == -1) {
+               log_err("setsockopt(.. IP_TOS ..): %s",
+                       strerror(errno));
+       }
+}
+
+/** send a packet */
+static int
+doq_client_send_pkt(struct doq_client_data* data, uint32_t ecn, uint8_t* buf,
+       size_t buf_len, int is_blocked_pkt, int* send_is_blocked)
+{
+       struct msghdr msg;
+       struct iovec iov[1];
+       ssize_t ret;
+       iov[0].iov_base = buf;
+       iov[0].iov_len = buf_len;
+       memset(&msg, 0, sizeof(msg));
+       msg.msg_name = (void*)&data->dest_addr;
+       msg.msg_namelen = data->dest_addr_len;
+       msg.msg_iov = iov;
+       msg.msg_iovlen = 1;
+       set_ecn(data->fd, data->dest_addr.ss_family, ecn);
+
+       for(;;) {
+               ret = sendmsg(data->fd, &msg, MSG_DONTWAIT);
+               if(ret == -1 && errno == EINTR)
+                       continue;
+               break;
+       }
+       if(ret == -1) {
+               if(errno == EAGAIN) {
+                       if(buf_len >
+                               sldns_buffer_capacity(data->blocked_pkt))
+                               return 0; /* Cannot store it, but the buffers
+                               are equal length and large enough, so this
+                               should not happen. */
+                       data->have_blocked_pkt = 1;
+                       if(send_is_blocked)
+                               *send_is_blocked = 1;
+                       /* If we already send the previously blocked packet,
+                        * no need to copy it, otherwise store the packet for
+                        * later. */
+                       if(!is_blocked_pkt) {
+                               data->blocked_pkt_pi.ecn = ecn;
+                               sldns_buffer_clear(data->blocked_pkt);
+                               sldns_buffer_write(data->blocked_pkt, buf,
+                                       buf_len);
+                               sldns_buffer_flip(data->blocked_pkt);
+                       }
+                       return 0;
+               }
+               log_err("doq sendmsg: %s", strerror(errno));
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+               ngtcp2_ccerr_set_application_error(&data->ccerr, -1, NULL, 0);
+#else
+               ngtcp2_connection_close_error_set_application_error(&data->last_error, -1, NULL, 0);
+#endif
+               return 0;
+       }
+       return 1;
+}
+
+/** change event write on fd to when we have data or when congested */
+static void
+event_change_write(struct doq_client_data* data, int do_write)
+{
+       ub_event_del(data->ev);
+       if(do_write) {
+               ub_event_add_bits(data->ev, UB_EV_WRITE);
+       } else {
+               ub_event_del_bits(data->ev, UB_EV_WRITE);
+       }
+       if(ub_event_add(data->ev, NULL) != 0) {
+               fatal_exit("could not ub_event_add");
+       }
+}
+
+/** write the connection close, with possible error */
+static void
+write_conn_close(struct doq_client_data* data)
+{
+       struct ngtcp2_path_storage ps;
+       struct ngtcp2_pkt_info pi;
+       ngtcp2_ssize ret;
+       if(!data->conn ||
+#ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD
+               ngtcp2_conn_in_closing_period(data->conn) ||
+#else
+               ngtcp2_conn_is_in_closing_period(data->conn) ||
+#endif
+#ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD
+               ngtcp2_conn_in_draining_period(data->conn)
+#else
+               ngtcp2_conn_is_in_draining_period(data->conn)
+#endif
+               )
+               return;
+       /* Drop blocked packet if there is one, the connection is being
+        * closed. And thus no further data traffic. */
+       data->have_blocked_pkt = 0;
+       if(
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+               data->ccerr.type == NGTCP2_CCERR_TYPE_IDLE_CLOSE
+#else
+               data->last_error.type ==
+               NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT_IDLE_CLOSE
+#endif
+               ) {
+               /* do not call ngtcp2_conn_write_connection_close on the
+                * connection because the ngtcp2_conn_handle_expiry call
+                * has returned NGTCP2_ERR_IDLE_CLOSE. But continue to close
+                * the connection. */
+               return;
+       }
+       verbose(1, "write connection close");
+       ngtcp2_path_storage_zero(&ps);
+       sldns_buffer_clear(data->pkt_buf);
+       ret = ngtcp2_conn_write_connection_close(
+               data->conn, &ps.path, &pi, sldns_buffer_begin(data->pkt_buf),
+               sldns_buffer_remaining(data->pkt_buf),
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+               &data->ccerr
+#else
+               &data->last_error
+#endif
+               , get_timestamp_nanosec());
+       if(ret < 0) {
+               log_err("ngtcp2_conn_write_connection_close failed: %s",
+                       ngtcp2_strerror(ret));
+               return;
+       }
+       verbose(1, "write connection close packet length %d", (int)ret);
+       if(ret == 0)
+               return;
+       doq_client_send_pkt(data, pi.ecn, sldns_buffer_begin(data->pkt_buf),
+               ret, 0, NULL);
+}
+
+/** disconnect we are done */
+static void
+disconnect(struct doq_client_data* data)
+{
+       verbose(1, "disconnect");
+       write_conn_close(data);
+       ub_event_base_loopexit(data->base);
+}
+
+/** the expire timer callback */
+void doq_client_timer_cb(int ATTR_UNUSED(fd),
+       short ATTR_UNUSED(bits), void* arg)
+{
+       struct doq_client_data* data = (struct doq_client_data*)arg;
+       ngtcp2_tstamp now = get_timestamp_nanosec();
+       int rv;
+
+       verbose(1, "doq expire_timer");
+       data->expire_timer_added = 0;
+       rv = ngtcp2_conn_handle_expiry(data->conn, now);
+       if(rv != 0) {
+               log_err("ngtcp2_conn_handle_expiry failed: %s",
+                       ngtcp2_strerror(rv));
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+               ngtcp2_ccerr_set_liberr(&data->ccerr, rv, NULL, 0);
+#else
+               ngtcp2_connection_close_error_set_transport_error_liberr(
+                       &data->last_error, rv, NULL, 0);
+#endif
+               disconnect(data);
+               return;
+       }
+       update_timer(data);
+       on_write(data);
+}
+
+/** update the timers */
+static void
+update_timer(struct doq_client_data* data)
+{
+       ngtcp2_tstamp expiry = ngtcp2_conn_get_expiry(data->conn);
+       ngtcp2_tstamp now = get_timestamp_nanosec();
+       ngtcp2_tstamp t;
+       struct timeval tv;
+
+       if(expiry <= now) {
+               /* the timer has already expired, add with zero timeout */
+               t = 0;
+       } else {
+               t = expiry - now;
+       }
+
+       /* set the timer */
+       if(data->expire_timer_added) {
+               ub_timer_del(data->expire_timer);
+               data->expire_timer_added = 0;
+       }
+       memset(&tv, 0, sizeof(tv));
+       tv.tv_sec = t / NGTCP2_SECONDS;
+       tv.tv_usec = (t / NGTCP2_MICROSECONDS)%1000000;
+       verbose(1, "update_timer in %d.%6.6d secs", (int)tv.tv_sec,
+               (int)tv.tv_usec);
+       if(ub_timer_add(data->expire_timer, data->base,
+               &doq_client_timer_cb, data, &tv) != 0) {
+               log_err("timer_add failed: could not add expire timer");
+               return;
+       }
+       data->expire_timer_added = 1;
+}
+
+/** perform read operations on fd */
+static void
+on_read(struct doq_client_data* data)
+{
+       struct sockaddr_storage addr;
+       struct iovec iov[1];
+       struct msghdr msg;
+       union {
+               struct cmsghdr hdr;
+               char buf[256];
+       } ancil;
+       int i;
+       ssize_t rcv;
+       ngtcp2_pkt_info pi;
+       int rv;
+       struct ngtcp2_path path;
+
+       for(i=0; i<10; i++) {
+               msg.msg_name = &addr;
+               msg.msg_namelen = (socklen_t)sizeof(addr);
+               iov[0].iov_base = sldns_buffer_begin(data->pkt_buf);
+               iov[0].iov_len = sldns_buffer_remaining(data->pkt_buf);
+               msg.msg_iov = iov;
+               msg.msg_iovlen = 1;
+               msg.msg_control = ancil.buf;
+#ifndef S_SPLINT_S
+               msg.msg_controllen = sizeof(ancil.buf);
+#endif /* S_SPLINT_S */
+               msg.msg_flags = 0;
+
+               rcv = recvmsg(data->fd, &msg, MSG_DONTWAIT);
+               if(rcv == -1) {
+                       if(errno == EINTR || errno == EAGAIN)
+                               break;
+                       log_err_addr("doq recvmsg", strerror(errno),
+                               &data->dest_addr, sizeof(data->dest_addr_len));
+                       break;
+               }
+
+               pi.ecn = msghdr_get_ecn(&msg, addr.ss_family);
+               verbose(1, "recvmsg %d ecn=0x%x", (int)rcv, (int)pi.ecn);
+
+               memset(&path, 0, sizeof(path));
+               path.local.addr = (void*)&data->local_addr;
+               path.local.addrlen = data->local_addr_len;
+               path.remote.addr = (void*)msg.msg_name;
+               path.remote.addrlen = msg.msg_namelen;
+               rv = ngtcp2_conn_read_pkt(data->conn, &path, &pi,
+                       iov[0].iov_base, rcv, get_timestamp_nanosec());
+               if(rv != 0) {
+                       log_err("ngtcp2_conn_read_pkt failed: %s",
+                               ngtcp2_strerror(rv));
+                       if(
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+                               data->ccerr.error_code == 0
+#else
+                               data->last_error.error_code == 0
+#endif
+                               ) {
+                               if(rv == NGTCP2_ERR_CRYPTO) {
+                                       /* in picotls the tls alert may need
+                                        * to be copied, but this is with
+                                        * openssl. And we have the value
+                                        * data.tls_alert. */
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+                                       ngtcp2_ccerr_set_tls_alert(
+                                               &data->ccerr, data->tls_alert,
+                                               NULL, 0);
+#else
+                                       ngtcp2_connection_close_error_set_transport_error_tls_alert(
+                                               &data->last_error,
+                                               data->tls_alert, NULL, 0);
+#endif
+                               } else {
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+                                       ngtcp2_ccerr_set_liberr(&data->ccerr,
+                                               rv, NULL, 0);
+#else
+                                       ngtcp2_connection_close_error_set_transport_error_liberr(
+                                               &data->last_error, rv, NULL,
+                                               0);
+#endif
+                               }
+                       }
+                       disconnect(data);
+                       return;
+               }
+       }
+
+       update_timer(data);
+}
+
+/** the write of this query has completed, it has spooled to packets,
+ * set it to have the write done and move it to the list of receive streams. */
+static void
+query_write_is_done(struct doq_client_data* data,
+       struct doq_client_stream* str)
+{
+       if(verbosity > 0) {
+               char* logs = client_stream_string(str);
+               verbose(1, "query %s write is done", logs);
+               free(logs);
+       }
+       str->write_is_done = 1;
+       stream_list_move(str, data->query_list_send, data->query_list_receive);
+}
+
+/** write the data streams, if possible */
+static int
+write_streams(struct doq_client_data* data)
+{
+       ngtcp2_path_storage ps;
+       ngtcp2_tstamp ts = get_timestamp_nanosec();
+       struct doq_client_stream* str, *next;
+       uint32_t flags;
+       /* number of bytes that can be sent without packet pacing */
+       size_t send_quantum = ngtcp2_conn_get_send_quantum(data->conn);
+       /* Overhead is the stream overhead of adding a header onto the data,
+        * this make sure the number of bytes to send in data bytes plus
+        * the overhead overshoots the target quantum by a smaller margin,
+        * and then it stops sending more bytes. With zero it would overshoot
+        * more, an accurate number would not overshoot. It is based on the
+        * stream frame header size. */
+       size_t accumulated_send = 0, overhead_stream = 24, overhead_pkt = 60,
+               max_packet_size = 1200;
+       size_t num_packets = 0, max_packets = 65535;
+       ngtcp2_path_storage_zero(&ps);
+       str = data->query_list_send->first;
+
+       if(data->cc_algo != NGTCP2_CC_ALGO_BBR
+#ifdef NGTCP2_CC_ALGO_BBR_V2
+               && data->cc_algo != NGTCP2_CC_ALGO_BBR_V2
+#endif
+#ifdef NGTCP2_CC_ALGO_BBR2
+               && data->cc_algo != NGTCP2_CC_ALGO_BBR2
+#endif
+               ) {
+               /* If we do not have a packet pacing congestion control
+                * algorithm, limit the number of packets. */
+               max_packets = 10;
+       }
+
+       /* loop like this, because at the start, the send list is empty,
+        * and we want to send handshake packets. But when there is a
+        * send_list, loop through that. */
+       for(;;) {
+               int64_t stream_id;
+               ngtcp2_pkt_info pi;
+               ngtcp2_vec datav[2];
+               size_t datav_count = 0;
+               int fin;
+               ngtcp2_ssize ret;
+               ngtcp2_ssize ndatalen = 0;
+               int send_is_blocked = 0;
+
+               if(str) {
+                       /* pick up next in case this one is deleted */
+                       next = str->next;
+                       if(verbosity > 0) {
+                               char* logs = client_stream_string(str);
+                               verbose(1, "query %s write stream", logs);
+                               free(logs);
+                       }
+                       stream_id = str->stream_id;
+                       fin = 1;
+                       if(str->nwrite < 2) {
+                               str->data_tcplen = htons(str->data_len);
+                               datav[0].base = ((uint8_t*)&str->data_tcplen)+str->nwrite;
+                               datav[0].len = 2-str->nwrite;
+                               datav[1].base = str->data;
+                               datav[1].len = str->data_len;
+                               datav_count = 2;
+                       } else {
+                               datav[0].base = str->data + (str->nwrite-2);
+                               datav[0].len = str->data_len - (str->nwrite-2);
+                               datav_count = 1;
+                       }
+               } else {
+                       next = NULL;
+                       verbose(1, "write stream -1.");
+                       stream_id = -1;
+                       fin = 0;
+                       datav[0].base = NULL;
+                       datav[0].len = 0;
+                       datav_count = 1;
+               }
+
+               /* Does the first data entry fit into the send quantum? */
+               /* Check if the data size sent, with a max of one full packet,
+                * with added stream header and packet header is allowed
+                * within the send quantum number of bytes. If not, it does
+                * not fit, and wait. */
+               if(accumulated_send == 0 && ((datav_count == 1 &&
+                       (datav[0].len>max_packet_size?max_packet_size:
+                       datav[0].len)+overhead_stream+overhead_pkt >
+                       send_quantum) ||
+                       (datav_count == 2 &&
+                       (datav[0].len+datav[1].len>max_packet_size?
+                       max_packet_size:datav[0].len+datav[1].len)
+                       +overhead_stream+overhead_pkt > send_quantum))) {
+                       /* congestion limited */
+                       ngtcp2_conn_update_pkt_tx_time(data->conn, ts);
+                       event_change_write(data, 0);
+                       /* update the timer to wait until it is possible to
+                        * write again */
+                       update_timer(data);
+                       return 0;
+               }
+               flags = 0;
+               if(str && str->next != NULL) {
+                       /* Coalesce more data from more streams into this
+                        * packet, if possible */
+                       /* There is more than one data entry in this send
+                        * quantum, does the next one fit in the quantum? */
+                       size_t this_send, possible_next_send;
+                       if(datav_count == 1)
+                               this_send = datav[0].len;
+                       else    this_send = datav[0].len + datav[1].len;
+                       if(this_send > max_packet_size)
+                               this_send = max_packet_size;
+                       if(str->next->nwrite < 2)
+                               possible_next_send = (2-str->next->nwrite) +
+                                       str->next->data_len;
+                       else    possible_next_send = str->next->data_len -
+                                       (str->next->nwrite - 2);
+                       if(possible_next_send > max_packet_size)
+                               possible_next_send = max_packet_size;
+                       /* Check if the data lengths that writev returned
+                        * with stream headers added up so far, in
+                        * accumulated_send, with added the data length
+                        * of this send, with a max of one full packet, and
+                        * the data length of the next possible send, with
+                        * a max of one full packet, with a stream header for
+                        * this_send and a stream header for the next possible
+                        * send and a packet header, fit in the send quantum
+                        * number of bytes. If so, ask to add more content
+                        * to the packet with the more flag. */
+                       if(accumulated_send + this_send + possible_next_send
+                               +2*overhead_stream+ overhead_pkt < send_quantum)
+                               flags |= NGTCP2_WRITE_STREAM_FLAG_MORE;
+               }
+               if(fin) {
+                       /* This is the final part of data for this stream */
+                       flags |= NGTCP2_WRITE_STREAM_FLAG_FIN;
+               }
+               sldns_buffer_clear(data->pkt_buf);
+               ret = ngtcp2_conn_writev_stream(data->conn, &ps.path, &pi,
+                       sldns_buffer_begin(data->pkt_buf),
+                       sldns_buffer_remaining(data->pkt_buf), &ndatalen,
+                       flags, stream_id, datav, datav_count, ts);
+               if(ret < 0) {
+                       if(ret == NGTCP2_ERR_WRITE_MORE) {
+                               if(str) {
+                                       str->nwrite += ndatalen;
+                                       if(str->nwrite >= str->data_len+2)
+                                               query_write_is_done(data, str);
+                                       str = next;
+                                       accumulated_send += ndatalen + overhead_stream;
+                                       continue;
+                               }
+                       }
+                       log_err("ngtcp2_conn_writev_stream failed: %s",
+                               ngtcp2_strerror(ret));
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+                       ngtcp2_ccerr_set_liberr(&data->ccerr, ret, NULL, 0);
+#else
+                       ngtcp2_connection_close_error_set_transport_error_liberr(
+                               &data->last_error, ret, NULL, 0);
+#endif
+                       disconnect(data);
+                       return 0;
+               }
+               verbose(1, "writev_stream pkt size %d ndatawritten %d",
+                       (int)ret, (int)ndatalen);
+               if(ndatalen >= 0 && str) {
+                       /* add the new write offset */
+                       str->nwrite += ndatalen;
+                       if(str->nwrite >= str->data_len+2)
+                               query_write_is_done(data, str);
+               }
+               if(ret == 0) {
+                       /* congestion limited */
+                       ngtcp2_conn_update_pkt_tx_time(data->conn, ts);
+                       event_change_write(data, 0);
+                       /* update the timer to wait until it is possible to
+                        * write again */
+                       update_timer(data);
+                       return 0;
+               }
+               if(!doq_client_send_pkt(data, pi.ecn,
+                       sldns_buffer_begin(data->pkt_buf), ret, 0,
+                       &send_is_blocked)) {
+                       if(send_is_blocked) {
+                               /* Blocked packet, wait until it is possible
+                                * to write again and also set a timer. */
+                               event_change_write(data, 1);
+                               update_timer(data);
+                               return 0;
+                       }
+                       /* Packet could not be sent. Like lost and timeout. */
+                       ngtcp2_conn_update_pkt_tx_time(data->conn, ts);
+                       event_change_write(data, 0);
+                       update_timer(data);
+                       return 0;
+               }
+               /* continue */
+               if((size_t)ret >= send_quantum)
+                       break;
+               send_quantum -= ret;
+               accumulated_send = 0;
+               str = next;
+               if(str == NULL)
+                       break;
+               if(++num_packets == max_packets)
+                       break;
+       }
+       ngtcp2_conn_update_pkt_tx_time(data->conn, ts);
+       event_change_write(data, 1);
+       return 1;
+}
+
+/** send the blocked packet now that the stream is writable again. */
+static int
+send_blocked_pkt(struct doq_client_data* data)
+{
+       ngtcp2_tstamp ts = get_timestamp_nanosec();
+       int send_is_blocked = 0;
+       if(!doq_client_send_pkt(data, data->blocked_pkt_pi.ecn,
+               sldns_buffer_begin(data->pkt_buf),
+               sldns_buffer_limit(data->pkt_buf), 1, &send_is_blocked)) {
+               if(send_is_blocked) {
+                       /* Send was blocked, again. Wait, again to retry. */
+                       event_change_write(data, 1);
+                       /* make sure the timer is set while waiting */
+                       update_timer(data);
+                       return 0;
+               }
+               /* The packed could not be sent. Like it was lost, timeout. */
+               data->have_blocked_pkt = 0;
+               ngtcp2_conn_update_pkt_tx_time(data->conn, ts);
+               event_change_write(data, 0);
+               update_timer(data);
+               return 0;
+       }
+       /* The blocked packet has been sent, the holding buffer can be
+        * cleared. */
+       data->have_blocked_pkt = 0;
+       ngtcp2_conn_update_pkt_tx_time(data->conn, ts);
+       return 1;
+}
+
+/** perform write operations, if any, on fd */
+static void
+on_write(struct doq_client_data* data)
+{
+       if(data->have_blocked_pkt) {
+               if(!send_blocked_pkt(data))
+                       return;
+       }
+       if(
+#ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD
+               ngtcp2_conn_in_closing_period(data->conn)
+#else
+               ngtcp2_conn_is_in_closing_period(data->conn)
+#endif
+               )
+               return;
+       if(!write_streams(data))
+               return;
+       update_timer(data);
+}
+
+/** callback for main listening file descriptor */
+void
+doq_client_event_cb(int ATTR_UNUSED(fd), short bits, void* arg)
+{
+       struct doq_client_data* data = (struct doq_client_data*)arg;
+       verbose(1, "doq_client_event_cb %s%s%s",
+               ((bits&UB_EV_READ)!=0?"EV_READ":""),
+               ((bits&(UB_EV_READ|UB_EV_WRITE))==(UB_EV_READ|UB_EV_WRITE)?
+               " ":""),
+               ((bits&UB_EV_WRITE)!=0?"EV_WRITE":""));
+       if((bits&UB_EV_READ)) {
+               on_read(data);
+       }
+       /* Perform the write operation anyway. The read operation may
+        * have produced data, or there is content waiting and it is possible
+        * to write that. */
+       on_write(data);
+}
+
+/** read the TLS session from file */
+static int
+early_data_setup_session(struct doq_client_data* data)
+{
+       SSL_SESSION* session;
+       BIO* f = BIO_new_file(data->session_file, "r");
+       if(f == NULL) {
+               if(errno == ENOENT) {
+                       verbose(1, "session file %s does not exist",
+                               data->session_file);
+                       return 0;
+               }
+               log_err("Could not read %s: %s", data->session_file,
+                       strerror(errno));
+               return 0;
+       }
+       session = PEM_read_bio_SSL_SESSION(f, NULL, 0, NULL);
+       if(session == NULL) {
+               log_crypto_err("Could not read session file with PEM_read_bio_SSL_SESSION");
+               BIO_free(f);
+               return 0;
+       }
+       BIO_free(f);
+       if(!SSL_set_session(data->ssl, session)) {
+               log_crypto_err("Could not SSL_set_session");
+               SSL_SESSION_free(session);
+               return 0;
+       }
+       if(SSL_SESSION_get_max_early_data(session) == 0) {
+               log_err("TLS session early data is 0");
+               SSL_SESSION_free(session);
+               return 0;
+       }
+       SSL_set_quic_early_data_enabled(data->ssl, 1);
+       SSL_SESSION_free(session);
+       return 1;
+}
+
+#ifndef HAVE_NGTCP2_CONN_ENCODE_0RTT_TRANSPORT_PARAMS
+/** parse one line from the transport file */
+static int
+transport_parse_line(struct ngtcp2_transport_params* params, char* line)
+{
+       if(strncmp(line, "initial_max_streams_bidi=", 25) == 0) {
+               params->initial_max_streams_bidi = atoi(line+25);
+               return 1;
+       }
+       if(strncmp(line, "initial_max_streams_uni=", 24) == 0) {
+               params->initial_max_streams_uni = atoi(line+24);
+               return 1;
+       }
+       if(strncmp(line, "initial_max_stream_data_bidi_local=", 35) == 0) {
+               params->initial_max_stream_data_bidi_local = atoi(line+35);
+               return 1;
+       }
+       if(strncmp(line, "initial_max_stream_data_bidi_remote=", 36) == 0) {
+               params->initial_max_stream_data_bidi_remote = atoi(line+36);
+               return 1;
+       }
+       if(strncmp(line, "initial_max_stream_data_uni=", 28) == 0) {
+               params->initial_max_stream_data_uni = atoi(line+28);
+               return 1;
+       }
+       if(strncmp(line, "initial_max_data=", 17) == 0) {
+               params->initial_max_data = atoi(line+17);
+               return 1;
+       }
+       if(strncmp(line, "active_connection_id_limit=", 27) == 0) {
+               params->active_connection_id_limit = atoi(line+27);
+               return 1;
+       }
+       if(strncmp(line, "max_datagram_frame_size=", 24) == 0) {
+               params->max_datagram_frame_size = atoi(line+24);
+               return 1;
+       }
+       return 0;
+}
+#endif /* HAVE_NGTCP2_CONN_ENCODE_0RTT_TRANSPORT_PARAMS */
+
+/** setup the early data transport file and read it */
+static int
+early_data_setup_transport(struct doq_client_data* data)
+{
+#ifdef HAVE_NGTCP2_CONN_ENCODE_0RTT_TRANSPORT_PARAMS
+       FILE* in;
+       uint8_t buf[1024];
+       size_t len;
+       int rv;
+       in = fopen(data->transport_file, "r");
+       if(!in) {
+               if(errno == ENOENT) {
+                       verbose(1, "transport file %s does not exist",
+                               data->transport_file);
+                       return 0;
+               }
+               perror(data->transport_file);
+               return 0;
+       }
+       len = fread(buf, 1, sizeof(buf), in);
+       if(ferror(in)) {
+               log_err("%s: read failed: %s", data->transport_file,
+                       strerror(errno));
+               fclose(in);
+               return 0;
+       }
+       fclose(in);
+       rv = ngtcp2_conn_decode_and_set_0rtt_transport_params(data->conn,
+               buf, len);
+       if(rv != 0) {
+               log_err("ngtcp2_conn_decode_and_set_0rtt_transport_params failed: %s",
+                       ngtcp2_strerror(rv));
+               return 0;
+       }
+       return 1;
+#else
+       FILE* in;
+       char buf[1024];
+       struct ngtcp2_transport_params params;
+       memset(&params, 0, sizeof(params));
+       in = fopen(data->transport_file, "r");
+       if(!in) {
+               if(errno == ENOENT) {
+                       verbose(1, "transport file %s does not exist",
+                               data->transport_file);
+                       return 0;
+               }
+               perror(data->transport_file);
+               return 0;
+       }
+       while(!feof(in)) {
+               if(!fgets(buf, sizeof(buf), in)) {
+                       log_err("%s: read failed: %s", data->transport_file,
+                               strerror(errno));
+                       fclose(in);
+                       return 0;
+               }
+               if(!transport_parse_line(&params, buf)) {
+                       log_err("%s: could not parse line '%s'",
+                               data->transport_file, buf);
+                       fclose(in);
+                       return 0;
+               }
+       }
+       fclose(in);
+       ngtcp2_conn_set_early_remote_transport_params(data->conn, &params);
+#endif
+       return 1;
+}
+
+/** setup for early data, read the transport file and session file */
+static void
+early_data_setup(struct doq_client_data* data)
+{
+       if(!early_data_setup_session(data)) {
+               verbose(1, "TLS session resumption failed, early data is disabled");
+               data->early_data_enabled = 0;
+               return;
+       }
+       if(!early_data_setup_transport(data)) {
+               verbose(1, "Transport parameters set failed, early data is disabled");
+               data->early_data_enabled = 0;
+               return;
+       }
+}
+
+/** start the early data transmission */
+static void
+early_data_start(struct doq_client_data* data)
+{
+       query_streams_start(data);
+       on_write(data);
+}
+
+/** create doq_client_data */
+static struct doq_client_data*
+create_doq_client_data(const char* svr, int port, struct ub_event_base* base,
+       const char* transport_file, const char* session_file, int quiet)
+{
+       struct doq_client_data* data;
+       data = calloc(1, sizeof(*data));
+       if(!data) fatal_exit("calloc failed: out of memory");
+       data->base = base;
+       data->rnd = ub_initstate(NULL);
+       if(!data->rnd) fatal_exit("ub_initstate failed: out of memory");
+       data->svr = svr;
+       get_dest_addr(data, svr, port);
+       data->port = port;
+       data->quiet = quiet;
+       data->pkt_buf = sldns_buffer_new(65552);
+       if(!data->pkt_buf)
+               fatal_exit("sldns_buffer_new failed: out of memory");
+       data->blocked_pkt = sldns_buffer_new(65552);
+       if(!data->blocked_pkt)
+               fatal_exit("sldns_buffer_new failed: out of memory");
+       data->fd = open_svr_udp(data);
+       get_local_addr(data);
+       data->conn = conn_client_setup(data);
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+       ngtcp2_ccerr_default(&data->ccerr);
+#else
+       ngtcp2_connection_close_error_default(&data->last_error);
+#endif
+       data->transport_file = transport_file;
+       data->session_file = session_file;
+       if(data->transport_file && data->session_file)
+               data->early_data_enabled = 1;
+
+       generate_static_secret(data, 32);
+       data->ctx = ctx_client_setup();
+       if(data->session_file) {
+               SSL_CTX_set_session_cache_mode(data->ctx,
+                       SSL_SESS_CACHE_CLIENT |
+                       SSL_SESS_CACHE_NO_INTERNAL_STORE);
+               SSL_CTX_sess_set_new_cb(data->ctx, new_session_cb);
+       }
+       data->ssl = ssl_client_setup(data);
+       ngtcp2_conn_set_tls_native_handle(data->conn, data->ssl);
+       if(data->early_data_enabled)
+               early_data_setup(data);
+
+       data->ev = ub_event_new(base, data->fd, UB_EV_READ | UB_EV_WRITE |
+               UB_EV_PERSIST, doq_client_event_cb, data);
+       if(!data->ev) {
+               fatal_exit("could not ub_event_new");
+       }
+       if(ub_event_add(data->ev, NULL) != 0) {
+               fatal_exit("could not ub_event_add");
+       }
+       data->expire_timer = ub_event_new(data->base, -1,
+               UB_EV_TIMEOUT, &doq_client_timer_cb, data);
+       if(!data->expire_timer)
+               fatal_exit("could not ub_event_new");
+       data->query_list_start = stream_list_create();
+       data->query_list_send = stream_list_create();
+       data->query_list_receive = stream_list_create();
+       data->query_list_stop = stream_list_create();
+       return data;
+}
+
+/** delete doq_client_data */
+static void
+delete_doq_client_data(struct doq_client_data* data)
+{
+       if(!data)
+               return;
+#if defined(NGTCP2_USE_GENERIC_SOCKADDR) || defined(NGTCP2_USE_GENERIC_IPV6_SOCKADDR)
+       if(data->conn && data->dest_addr_len != 0) {
+               if(addr_is_ip6(&data->dest_addr, data->dest_addr_len)) {
+#  if defined(NGTCP2_USE_GENERIC_SOCKADDR) || defined(NGTCP2_USE_GENERIC_IPV6_SOCKADDR)
+                       const struct ngtcp2_path* path6 = ngtcp2_conn_get_path(data->conn);
+                       free(path6->local.addr);
+                       free(path6->remote.addr);
+#  endif
+               } else {
+#  if defined(NGTCP2_USE_GENERIC_SOCKADDR)
+                       const struct ngtcp2_path* path = ngtcp2_conn_get_path(data->conn);
+                       free(path->local.addr);
+                       free(path->remote.addr);
+#  endif
+               }
+       }
+#endif
+       ngtcp2_conn_del(data->conn);
+       SSL_free(data->ssl);
+       sldns_buffer_free(data->pkt_buf);
+       sldns_buffer_free(data->blocked_pkt);
+       if(data->fd != -1)
+               sock_close(data->fd);
+       SSL_CTX_free(data->ctx);
+       stream_list_free(data->query_list_start);
+       stream_list_free(data->query_list_send);
+       stream_list_free(data->query_list_receive);
+       stream_list_free(data->query_list_stop);
+       ub_randfree(data->rnd);
+       if(data->ev) {
+               ub_event_del(data->ev);
+               ub_event_free(data->ev);
+       }
+       if(data->expire_timer_added)
+               ub_timer_del(data->expire_timer);
+       ub_event_free(data->expire_timer);
+       free(data->static_secret_data);
+       free(data);
+}
+
+/** create the event base that registers events and timers */
+static struct ub_event_base*
+create_event_base(time_t* secs, struct timeval* now)
+{
+       struct ub_event_base* base;
+       const char *evnm="event", *evsys="", *evmethod="";
+
+       memset(now, 0, sizeof(*now));
+       base = ub_default_event_base(1, secs, now);
+       if(!base) fatal_exit("could not create ub_event base");
+
+       ub_get_event_sys(base, &evnm, &evsys, &evmethod);
+       if(verbosity) log_info("%s %s uses %s method", evnm, evsys, evmethod);
+
+       return base;
+}
+
+/** enter a query into the query list */
+static void
+client_enter_query_buf(struct doq_client_data* data, struct sldns_buffer* buf)
+{
+       struct doq_client_stream* str;
+       str = client_stream_create(buf);
+       if(!str)
+               fatal_exit("client_stream_create failed: out of memory");
+       stream_list_append(data->query_list_start, str);
+}
+
+/** enter the queries into the query list */
+static void
+client_enter_queries(struct doq_client_data* data, char** qs, int count)
+{
+       int i;
+       for(i=0; i<count; i+=3) {
+               struct sldns_buffer* buf = NULL;
+               buf = make_query(qs[i], qs[i+1], qs[i+2]);
+               if(verbosity > 0) {
+                       char* str;
+                       log_buf(1, "send query", buf);
+                       str = sldns_wire2str_pkt(sldns_buffer_begin(buf),
+                               sldns_buffer_limit(buf));
+                       if(!str) verbose(1, "could not sldns_wire2str_pkt");
+                       else verbose(1, "send query:\n%s", str);
+                       free(str);
+               }
+               client_enter_query_buf(data, buf);
+               sldns_buffer_free(buf);
+       }
+}
+
+/** run the dohclient queries */
+static void run(const char* svr, int port, char** qs, int count,
+       const char* transport_file, const char* session_file, int quiet)
+{
+       time_t secs = 0;
+       struct timeval now;
+       struct ub_event_base* base;
+       struct doq_client_data* data;
+
+       /* setup */
+       base = create_event_base(&secs, &now);
+       data = create_doq_client_data(svr, port, base, transport_file,
+               session_file, quiet);
+       client_enter_queries(data, qs, count);
+       if(data->early_data_enabled)
+               early_data_start(data);
+
+       /* run the queries */
+       ub_event_base_dispatch(base);
+
+       /* cleanup */
+       delete_doq_client_data(data);
+       ub_event_base_free(base);
+}
+#endif /* HAVE_NGTCP2 */
+
+#ifdef HAVE_NGTCP2
+/** getopt global, in case header files fail to declare it. */
+extern int optind;
+/** getopt global, in case header files fail to declare it. */
+extern char* optarg;
+int main(int ATTR_UNUSED(argc), char** ATTR_UNUSED(argv))
+{
+       int c;
+       int port = UNBOUND_DNS_OVER_QUIC_PORT, quiet = 0;
+       const char* svr = "127.0.0.1", *transport_file = NULL,
+               *session_file = NULL;
+#ifdef USE_WINSOCK
+       WSADATA wsa_data;
+       if(WSAStartup(MAKEWORD(2,2), &wsa_data) != 0) {
+               printf("WSAStartup failed\n");
+               return 1;
+       }
+#endif
+       checklock_set_output_name("ublocktrace-doqclient");
+       checklock_start();
+       log_init(0, 0, 0);
+       log_ident_set("doqclient");
+
+       while((c=getopt(argc, argv, "hp:qs:vx:y:")) != -1) {
+               switch(c) {
+                       case 'p':
+                               if(atoi(optarg)==0 && strcmp(optarg,"0")!=0) {
+                                       printf("error parsing port, "
+                                           "number expected: %s\n", optarg);
+                                       return 1;
+                               }
+                               port = atoi(optarg);
+                               break;
+                       case 'q':
+                               quiet++;
+                               break;
+                       case 's':
+                               svr = optarg;
+                               break;
+                       case 'v':
+                               verbosity++;
+                               break;
+                       case 'x':
+                               transport_file = optarg;
+                               break;
+                       case 'y':
+                               session_file = optarg;
+                               break;
+                       case 'h':
+                       case '?':
+                       default:
+                               usage(argv);
+               }
+       }
+
+       argc -= optind;
+       argv += optind;
+
+       if(argc%3!=0) {
+               printf("Invalid input. Specify qname, qtype, and qclass.\n");
+               return 1;
+       }
+       if(port == 53) {
+               printf("Error: port number 53 not for DNS over QUIC. Port number 53 is not allowed to be used with DNS over QUIC. It is used for DNS datagrams.\n");
+               return 1;
+       }
+
+       run(svr, port, argv, argc, transport_file, session_file, quiet);
+
+       checklock_stop();
+#ifdef USE_WINSOCK
+       WSACleanup();
+#endif
+       return 0;
+}
+#else /* HAVE_NGTCP2 */
+int main(int ATTR_UNUSED(argc), char** ATTR_UNUSED(argv))
+{
+       printf("Compiled without ngtcp2 for QUIC, cannot run doqclient.\n");
+       return 1;
+}
+#endif /* HAVE_NGTCP2 */
+
+/***--- definitions to make fptr_wlist work. ---***/
+/* These are callbacks, similar to smallapp callbacks, except the debug
+ * tool callbacks are not in it */
+struct tube;
+struct query_info;
+#include "util/data/packed_rrset.h"
+#include "daemon/worker.h"
+#include "daemon/remote.h"
+#include "util/fptr_wlist.h"
+#include "libunbound/context.h"
+
+void worker_handle_control_cmd(struct tube* ATTR_UNUSED(tube),
+       uint8_t* ATTR_UNUSED(buffer), size_t ATTR_UNUSED(len),
+       int ATTR_UNUSED(error), void* ATTR_UNUSED(arg))
+{
+       log_assert(0);
+}
+
+int worker_handle_request(struct comm_point* ATTR_UNUSED(c), 
+       void* ATTR_UNUSED(arg), int ATTR_UNUSED(error),
+        struct comm_reply* ATTR_UNUSED(repinfo))
+{
+       log_assert(0);
+       return 0;
+}
+
+int worker_handle_service_reply(struct comm_point* ATTR_UNUSED(c), 
+       void* ATTR_UNUSED(arg), int ATTR_UNUSED(error),
+        struct comm_reply* ATTR_UNUSED(reply_info))
+{
+       log_assert(0);
+       return 0;
+}
+
+int remote_accept_callback(struct comm_point* ATTR_UNUSED(c), 
+       void* ATTR_UNUSED(arg), int ATTR_UNUSED(error),
+        struct comm_reply* ATTR_UNUSED(repinfo))
+{
+       log_assert(0);
+       return 0;
+}
+
+int remote_control_callback(struct comm_point* ATTR_UNUSED(c), 
+       void* ATTR_UNUSED(arg), int ATTR_UNUSED(error),
+        struct comm_reply* ATTR_UNUSED(repinfo))
+{
+       log_assert(0);
+       return 0;
+}
+
+void worker_sighandler(int ATTR_UNUSED(sig), void* ATTR_UNUSED(arg))
+{
+       log_assert(0);
+}
+
+struct outbound_entry* worker_send_query(
+       struct query_info* ATTR_UNUSED(qinfo), uint16_t ATTR_UNUSED(flags),
+       int ATTR_UNUSED(dnssec), int ATTR_UNUSED(want_dnssec),
+       int ATTR_UNUSED(nocaps), int ATTR_UNUSED(check_ratelimit),
+       struct sockaddr_storage* ATTR_UNUSED(addr),
+       socklen_t ATTR_UNUSED(addrlen), uint8_t* ATTR_UNUSED(zone),
+       size_t ATTR_UNUSED(zonelen), int ATTR_UNUSED(tcp_upstream),
+       int ATTR_UNUSED(ssl_upstream), char* ATTR_UNUSED(tls_auth_name),
+       struct module_qstate* ATTR_UNUSED(q), int* ATTR_UNUSED(was_ratelimited))
+{
+       log_assert(0);
+       return 0;
+}
+
+#ifdef UB_ON_WINDOWS
+void
+worker_win_stop_cb(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev), void* 
+       ATTR_UNUSED(arg)) {
+       log_assert(0);
+}
+
+void
+wsvc_cron_cb(void* ATTR_UNUSED(arg))
+{
+       log_assert(0);
+}
+#endif /* UB_ON_WINDOWS */
+
+void 
+worker_alloc_cleanup(void* ATTR_UNUSED(arg))
+{
+       log_assert(0);
+}
+
+struct outbound_entry* libworker_send_query(
+       struct query_info* ATTR_UNUSED(qinfo), uint16_t ATTR_UNUSED(flags),
+       int ATTR_UNUSED(dnssec), int ATTR_UNUSED(want_dnssec),
+       int ATTR_UNUSED(nocaps), int ATTR_UNUSED(check_ratelimit),
+       struct sockaddr_storage* ATTR_UNUSED(addr),
+       socklen_t ATTR_UNUSED(addrlen), uint8_t* ATTR_UNUSED(zone),
+       size_t ATTR_UNUSED(zonelen), int ATTR_UNUSED(tcp_upstream),
+       int ATTR_UNUSED(ssl_upstream), char* ATTR_UNUSED(tls_auth_name),
+       struct module_qstate* ATTR_UNUSED(q), int* ATTR_UNUSED(was_ratelimited))
+{
+       log_assert(0);
+       return 0;
+}
+
+int libworker_handle_service_reply(struct comm_point* ATTR_UNUSED(c), 
+       void* ATTR_UNUSED(arg), int ATTR_UNUSED(error),
+        struct comm_reply* ATTR_UNUSED(reply_info))
+{
+       log_assert(0);
+       return 0;
+}
+
+void libworker_handle_control_cmd(struct tube* ATTR_UNUSED(tube),
+        uint8_t* ATTR_UNUSED(buffer), size_t ATTR_UNUSED(len),
+        int ATTR_UNUSED(error), void* ATTR_UNUSED(arg))
+{
+        log_assert(0);
+}
+
+void libworker_fg_done_cb(void* ATTR_UNUSED(arg), int ATTR_UNUSED(rcode), 
+       struct sldns_buffer* ATTR_UNUSED(buf), enum sec_status ATTR_UNUSED(s),
+       char* ATTR_UNUSED(why_bogus), int ATTR_UNUSED(was_ratelimited))
+{
+       log_assert(0);
+}
+
+void libworker_bg_done_cb(void* ATTR_UNUSED(arg), int ATTR_UNUSED(rcode), 
+       struct sldns_buffer* ATTR_UNUSED(buf), enum sec_status ATTR_UNUSED(s),
+       char* ATTR_UNUSED(why_bogus), int ATTR_UNUSED(was_ratelimited))
+{
+       log_assert(0);
+}
+
+void libworker_event_done_cb(void* ATTR_UNUSED(arg), int ATTR_UNUSED(rcode), 
+       struct sldns_buffer* ATTR_UNUSED(buf), enum sec_status ATTR_UNUSED(s),
+       char* ATTR_UNUSED(why_bogus), int ATTR_UNUSED(was_ratelimited))
+{
+       log_assert(0);
+}
+
+int context_query_cmp(const void* ATTR_UNUSED(a), const void* ATTR_UNUSED(b))
+{
+       log_assert(0);
+       return 0;
+}
+
+void worker_stat_timer_cb(void* ATTR_UNUSED(arg))
+{
+       log_assert(0);
+}
+
+void worker_probe_timer_cb(void* ATTR_UNUSED(arg))
+{
+       log_assert(0);
+}
+
+void worker_start_accept(void* ATTR_UNUSED(arg))
+{
+       log_assert(0);
+}
+
+void worker_stop_accept(void* ATTR_UNUSED(arg))
+{
+       log_assert(0);
+}
+
+/** keep track of lock id in lock-verify application */
+struct order_id {
+        /** the thread id that created it */
+        int thr;
+        /** the instance number of creation */
+        int instance;
+};
+
+int order_lock_cmp(const void* e1, const void* e2)
+{
+        const struct order_id* o1 = e1;
+        const struct order_id* o2 = e2;
+        if(o1->thr < o2->thr) return -1;
+        if(o1->thr > o2->thr) return 1;
+        if(o1->instance < o2->instance) return -1;
+        if(o1->instance > o2->instance) return 1;
+        return 0;
+}
+
+int
+codeline_cmp(const void* a, const void* b)
+{
+        return strcmp(a, b);
+}
+
+int replay_var_compare(const void* ATTR_UNUSED(a), const void* ATTR_UNUSED(b))
+{
+        log_assert(0);
+        return 0;
+}
+
+void remote_get_opt_ssl(char* ATTR_UNUSED(str), void* ATTR_UNUSED(arg))
+{
+        log_assert(0);
+}
index a517fa5f373ef175e5eda7e773dc3097a2df1464..2f60b1381e11a1624e3d2196997e64d09a7decfb 100644 (file)
@@ -939,6 +939,11 @@ listen_create(struct comm_base* base, struct listen_port* ATTR_UNUSED(ports),
        int ATTR_UNUSED(http_notls),
        struct tcl_list* ATTR_UNUSED(tcp_conn_limit),
        void* ATTR_UNUSED(sslctx), struct dt_env* ATTR_UNUSED(dtenv),
+       struct doq_table* ATTR_UNUSED(table),
+       struct ub_randstate* ATTR_UNUSED(rnd),
+       const char* ATTR_UNUSED(ssl_service_key),
+       const char* ATTR_UNUSED(ssl_service_pem),
+       struct config_file* ATTR_UNUSED(cfg),
        comm_point_callback_type* cb, void *cb_arg)
 {
        struct replay_runtime* runtime = (struct replay_runtime*)base;
index 70feb79727beaf3554148978fe1bfc63a5b1487d..442e23434eb4ffab337c5825789430cd84469588 100644 (file)
@@ -600,3 +600,52 @@ void listen_desetup_locks(void)
 {
        /* nothing */
 }
+
+#ifdef HAVE_NGTCP2
+void comm_point_doq_callback(int ATTR_UNUSED(fd), short ATTR_UNUSED(event),
+       void* ATTR_UNUSED(arg))
+{
+       /* nothing */
+}
+
+int doq_conn_cmp(const void* ATTR_UNUSED(key1), const void* ATTR_UNUSED(key2))
+{
+       return 0;
+}
+
+int doq_conid_cmp(const void* ATTR_UNUSED(key1), const void* ATTR_UNUSED(key2))
+{
+       return 0;
+}
+
+int doq_timer_cmp(const void* ATTR_UNUSED(key1), const void* ATTR_UNUSED(key2))
+{
+       return 0;
+}
+
+int doq_stream_cmp(const void* ATTR_UNUSED(key1), const void* ATTR_UNUSED(key2))
+{
+       return 0;
+}
+
+struct doq_table* doq_table_create(struct config_file* ATTR_UNUSED(cfg),
+       struct ub_randstate* ATTR_UNUSED(rnd))
+{
+       return calloc(1, sizeof(struct doq_table));
+}
+
+void doq_table_delete(struct doq_table* table)
+{
+       free(table);
+}
+
+void doq_timer_cb(void* ATTR_UNUSED(arg))
+{
+       /* nothing */
+}
+
+size_t doq_table_quic_size_get(struct doq_table* ATTR_UNUSED(table))
+{
+       return 0;
+}
+#endif
diff --git a/testcode/unitdoq.c b/testcode/unitdoq.c
new file mode 100644 (file)
index 0000000..2b91609
--- /dev/null
@@ -0,0 +1,84 @@
+/*
+ * testcode/unitdoq.c - unit test for doq routines.
+ *
+ * Copyright (c) 2022, NLnet Labs. All rights reserved.
+ *
+ * This software is open source.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of the NLNET LABS nor the names of its contributors may
+ * be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/**
+ * \file
+ * Calls doq related unit tests. Exits with code 1 on a failure.
+ */
+
+#include "config.h"
+
+#ifdef HAVE_NGTCP2
+
+#include "util/netevent.h"
+#include "services/listen_dnsport.h"
+#include "testcode/unitmain.h"
+
+/** check the size of a connection for doq */
+static void
+doq_size_conn_check()
+{
+       /* Printout the size of one doq connection, in memory usage.
+        * A connection with a couple cids, of type doq_conid, and
+        * it has one stream, and that has a query and an answer. */
+       size_t answer_size = 233; /* size of www.nlnetlabs.nl minimal answer
+               with dnssec and one A record. The unsigned answer is 176 with
+               additional data, 61 bytes minimal response one A record. */
+       size_t query_size = 45; /* size of query for www.nlnetlabs.nl, with
+               an EDNS record with DO flag. */
+       size_t conn_size = sizeof(struct doq_conn);
+       size_t conid_size = sizeof(struct doq_conid);
+       size_t stream_size = sizeof(struct doq_stream);
+
+       conn_size += 16; /* DCID len in the conn key */
+       conn_size += 0; /* the size of the ngtcp2_conn */
+       conn_size += 0; /* the size of the SSL record */
+       conn_size += 0; /* size of the close pkt,
+               but we do not count it here. Only if the conn gets closed. */
+       conid_size += 16; /* the dcid of the conn key */
+       conid_size += 16; /* the cid */
+       stream_size += query_size; /* size of in buffer */
+       stream_size += answer_size; /* size of out buffer */
+       printf("doq connection size %u bytes\n", (unsigned)(conn_size +
+               conid_size*3 + stream_size));
+}
+
+void doq_test(void)
+{
+       unit_show_feature("doq");
+       doq_size_conn_check();
+}
+#endif /* HAVE_NGTCP2 */
index 9129d722be0ef35dea18f2848b0b61f2a01f17ba..653d3efbe9040745d54bc000b87943755e244156 100644 (file)
@@ -1432,6 +1432,9 @@ main(int argc, char* argv[])
 #ifdef CLIENT_SUBNET
        ecs_test();
 #endif /* CLIENT_SUBNET */
+#ifdef HAVE_NGTCP2
+       doq_test();
+#endif /* HAVE_NGTCP2 */
        if(log_get_lock()) {
                lock_basic_destroy((lock_basic_type*)log_get_lock());
        }
index adcd74f77b523a96b3a8be878bc3dbd67ff031a0..99d5240d22173aeb2728e90651ba681cec045b0e 100644 (file)
@@ -84,5 +84,7 @@ void authzone_test(void);
 void zonemd_test(void);
 /** unit test for tcp_reuse functions */
 void tcpreuse_test(void);
+/** unit test for doq functions */
+void doq_test(void);
 
 #endif /* TESTCODE_UNITMAIN_H */
diff --git a/testdata/doq_downstream.tdir/doq_downstream.conf b/testdata/doq_downstream.tdir/doq_downstream.conf
new file mode 100644 (file)
index 0000000..babd350
--- /dev/null
@@ -0,0 +1,21 @@
+server:
+       verbosity: 2
+       # num-threads: 1
+       interface: 127.0.0.1@@PORT@
+       quic-port: @PORT@
+       tls-service-key: "unbound_server.key"
+       tls-service-pem: "unbound_server.pem"
+       use-syslog: no
+       directory: .
+       pidfile: "unbound.pid"
+       chroot: ""
+       username: ""
+       do-not-query-localhost: no
+
+       local-zone: "example.net" static
+       local-data: "www.example.net. IN A 1.2.3.4"
+       local-zone: "drop.net" deny
+
+forward-zone:
+       name: "."
+       forward-addr: "127.0.0.1@@TOPORT@"
diff --git a/testdata/doq_downstream.tdir/doq_downstream.dsc b/testdata/doq_downstream.tdir/doq_downstream.dsc
new file mode 100644 (file)
index 0000000..1e0b19d
--- /dev/null
@@ -0,0 +1,16 @@
+BaseName: doq_downstream
+Version: 1.0
+Description: Test DNS-over-QUIC query processing
+CreationDate: Mon Aug 01 16:00:00 CEST 2022
+Maintainer:
+Category: 
+Component:
+CmdDepends: 
+Depends: 
+Help:
+Pre: doq_downstream.pre
+Post: doq_downstream.post
+Test: doq_downstream.test
+AuxFiles: 
+Passed:
+Failure:
diff --git a/testdata/doq_downstream.tdir/doq_downstream.post b/testdata/doq_downstream.tdir/doq_downstream.post
new file mode 100644 (file)
index 0000000..f1a31be
--- /dev/null
@@ -0,0 +1,13 @@
+# #-- doq_downstream.post --#
+# source the master var file when it's there
+[ -f ../.tpkg.var.master ] && source ../.tpkg.var.master
+# source the test var file when it's there
+[ -f .tpkg.var.test ] && source .tpkg.var.test
+#
+# do your teardown here
+PRE="../.."
+. ../common.sh
+kill_pid $FWD_PID
+if test -f unbound.pid; then
+       kill_pid $UNBOUND_PID
+fi
diff --git a/testdata/doq_downstream.tdir/doq_downstream.pre b/testdata/doq_downstream.tdir/doq_downstream.pre
new file mode 100644 (file)
index 0000000..f748cc1
--- /dev/null
@@ -0,0 +1,44 @@
+# #-- doq_downstream.pre--#
+# source the master var file when it's there
+[ -f ../.tpkg.var.master ] && source ../.tpkg.var.master
+# use .tpkg.var.test for in test variable passing
+[ -f .tpkg.var.test ] && source .tpkg.var.test
+
+PRE="../.."
+. ../common.sh
+if grep "define HAVE_NGTCP2 1" $PRE/config.h; then echo test enabled; else skip_test "test skipped"; fi
+
+if test -f $PRE/unbound_do_valgrind_in_test; then
+       do_valgrind=yes
+else
+       do_valgrind=no
+fi
+VALGRIND_FLAGS="--leak-check=full --show-leak-kinds=all"
+
+get_random_port 2
+UNBOUND_PORT=$RND_PORT
+FWD_PORT=$(($RND_PORT + 1))
+echo "UNBOUND_PORT=$UNBOUND_PORT" >> .tpkg.var.test
+echo "FWD_PORT=$FWD_PORT" >> .tpkg.var.test
+
+# start forwarder
+get_ldns_testns
+$LDNS_TESTNS -p $FWD_PORT doq_downstream.testns >fwd.log 2>&1 &
+FWD_PID=$!
+echo "FWD_PID=$FWD_PID" >> .tpkg.var.test
+
+# make config file
+sed -e 's/@PORT\@/'$UNBOUND_PORT'/' -e 's/@TOPORT\@/'$FWD_PORT'/' < doq_downstream.conf > ub.conf
+# start unbound in the background
+if test $do_valgrind = "yes"; then
+valgrind $VALGRIND_FLAGS $PRE/unbound -vvvv -d -c ub.conf >unbound.log 2>&1 &
+else
+$PRE/unbound -vvvv -d -c ub.conf >unbound.log 2>&1 &
+fi
+UNBOUND_PID=$!
+echo "UNBOUND_PID=$UNBOUND_PID" >> .tpkg.var.test
+
+cat .tpkg.var.test
+wait_ldns_testns_up fwd.log
+wait_unbound_up unbound.log
+
diff --git a/testdata/doq_downstream.tdir/doq_downstream.test b/testdata/doq_downstream.tdir/doq_downstream.test
new file mode 100644 (file)
index 0000000..a302e8d
--- /dev/null
@@ -0,0 +1,109 @@
+# #-- doq_downstream.test --#
+# source the master var file when it's there
+[ -f ../.tpkg.var.master ] && source ../.tpkg.var.master
+# use .tpkg.var.test for in test variable passing
+[ -f .tpkg.var.test ] && source .tpkg.var.test
+
+PRE="../.."
+. ../common.sh
+get_make
+(cd $PRE; $MAKE doqclient)
+
+# test query from local-data, immediate like from cache
+echo "> query www.example.net."
+$PRE/doqclient -s 127.0.0.1 -p $UNBOUND_PORT www.example.net. A IN >outfile 2>&1
+cat outfile
+if test "$?" -ne 0; then
+       echo "exit status not OK"
+       echo "> cat logfiles"
+       cat outfile
+       cat fwd.log
+       cat unbound.log
+       echo "Not OK"
+       exit 1
+fi
+if grep "www.example.net" outfile | grep "1.2.3.4"; then
+       echo "content OK"
+else
+       echo "result contents not OK"
+       echo "> cat logfiles"
+       cat outfile
+       cat fwd.log
+       cat unbound.log
+       echo "result contents not OK"
+       exit 1
+fi
+echo "OK"
+
+# test query that is resolved
+echo "> query www.example.com."
+$PRE/doqclient -s 127.0.0.1 -p $UNBOUND_PORT www.example.com. A IN >outfile 2>&1
+cat outfile
+if test "$?" -ne 0; then
+       echo "exit status not OK"
+       echo "> cat logfiles"
+       cat outfile
+       cat fwd.log
+       cat unbound.log
+       echo "Not OK"
+       exit 1
+fi
+if grep "www.example.com" outfile | grep "10.20.30.40"; then
+       echo "content OK"
+else
+       echo "result contents not OK"
+       echo "> cat logfiles"
+       cat outfile
+       cat fwd.log
+       cat unbound.log
+       echo "result contents not OK"
+       exit 1
+fi
+echo "OK"
+
+# Perform the lock verify tests, stop the server first.
+kill_pid $UNBOUND_PID
+cat unbound.log
+# Remove pidfile so that the post script does not try to stop the server,
+# it is already stopped.
+rm -f unbound.pid
+if test -f ublocktrace-doqclient.0; then
+       if $PRE/lock-verify ublocktrace-doqclient.* 2>&1; then
+               echo "lock-verify test ublocktrace-doqclient worked."
+       else
+               echo "lock-verify test ublocktrace-doqclient failed."
+               exit 1
+       fi
+fi
+if test -f ublocktrace.0; then
+       if $PRE/lock-verify ublocktrace.* 2>&1; then
+               echo "lock-verify test ublocktrace worked."
+       else
+               echo "lock-verify test ublocktrace failed."
+               exit 1
+       fi
+       if grep "lock error" unbound.log >/dev/null; then
+               echo "lock error"
+               exit 1
+       fi
+fi
+# check valgrind output
+if test -f $PRE/unbound_do_valgrind_in_test; then
+       if grep "All heap blocks were freed -- no leaks are possible" unbound.log; then
+               :  # clean
+       else
+               grep "^==" unbound.log
+               echo "Memory leaked"
+               grep "in use at exit" unbound.log
+               exit 1
+       fi
+       if grep "ERROR SUMMARY: 0 errors from 0 contexts" unbound.log; then
+               :  # clean
+       else
+               grep "^==" unbound.log
+               echo "Errors"
+               grep "ERROR SUMMARY" unbound.log
+               exit 1
+       fi
+fi
+exit 0
diff --git a/testdata/doq_downstream.tdir/doq_downstream.testns b/testdata/doq_downstream.tdir/doq_downstream.testns
new file mode 100644 (file)
index 0000000..2d0ea45
--- /dev/null
@@ -0,0 +1,13 @@
+; nameserver test file
+$ORIGIN example.com.
+$TTL 3600
+
+ENTRY_BEGIN
+MATCH opcode qtype qname
+REPLY QR AA NOERROR
+ADJUST copy_id
+SECTION QUESTION
+www    IN      A
+SECTION ANSWER
+www    IN      A       10.20.30.40
+ENTRY_END
diff --git a/testdata/doq_downstream.tdir/unbound_server.key b/testdata/doq_downstream.tdir/unbound_server.key
new file mode 100644 (file)
index 0000000..4256c42
--- /dev/null
@@ -0,0 +1,15 @@
+-----BEGIN RSA PRIVATE KEY-----
+MIICWwIBAAKBgQC3F7Jsv2u01pLL9rFnjsMU/IaCFUIz/624DcaE84Z4gjMl5kWA
+3axQcqul1wlwSrbKwrony+d9hH/+MX0tZwvl8w3OmhmOAiaQ+SHCsIuOjVwQjX0s
+RLB61Pz5+PAiVvnPa9JIYB5QrK6DVEsxIHj8MOc5JKORrnESsFDh6yeMeQIDAQAB
+AoGAAuWoGBprTOA8UGfl5LqYkaNxSWumsYXxLMFjC8WCsjN1NbtQDDr1uAwodSZS
+6ujzvX+ZTHnofs7y64XC8k34HTOCD2zlW7kijWbT8YjRYFU6o9F5zUGD9RCan0ds
+sVscT2psLSzfdsmFAcbmnGdxYkXk2PC1FHtaqExxehralGUCQQDcqrg9uQKXlhQi
+XAaPr8SiWvtRm2a9IMMZkRfUWZclPHq6fCWNuUaCD+cTat4wAuqeknAz33VEosw3
+fXGsok//AkEA1GjIHXrOcSlpfVJb6NeOBugjRtZ7ZDT5gbtnMS9ob0qntKV6saaL
+CNmJwuD9Q3XkU5j1+uHvYGP2NzcJd2CjhwJACV0hNlVMe9w9fHvFN4Gw6WbM9ViP
+0oS6YrJafYNTu5vGZXVxLoNnL4u3NYa6aPUmuZXjNwBLfJ8f5VboZPf6RwJAINd2
+oYA8bSi/A755MX4qmozH74r4Fx1Nuq5UHTm8RwDe/0Javx8F/j9MWpJY9lZDEF3l
+In5OebPa/NyInSmW/wJAZuP9aRn0nDBkHYri++1A7NykMiJ/nH0mDECbnk+wxx0S
+LwqIetBhxb8eQwMg45+iAH7CHAMQ8BQuF/nFE6eotg==
+-----END RSA PRIVATE KEY-----
diff --git a/testdata/doq_downstream.tdir/unbound_server.pem b/testdata/doq_downstream.tdir/unbound_server.pem
new file mode 100644 (file)
index 0000000..aeda3ff
--- /dev/null
@@ -0,0 +1,11 @@
+-----BEGIN CERTIFICATE-----
+MIIBmzCCAQQCCQDsNJ1UmphEFzANBgkqhkiG9w0BAQUFADASMRAwDgYDVQQDEwd1
+bmJvdW5kMB4XDTA4MDkxMTA5MDk0MFoXDTI4MDUyOTA5MDk0MFowEjEQMA4GA1UE
+AxMHdW5ib3VuZDCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEAtxeybL9rtNaS
+y/axZ47DFPyGghVCM/+tuA3GhPOGeIIzJeZFgN2sUHKrpdcJcEq2ysK6J8vnfYR/
+/jF9LWcL5fMNzpoZjgImkPkhwrCLjo1cEI19LESwetT8+fjwIlb5z2vSSGAeUKyu
+g1RLMSB4/DDnOSSjka5xErBQ4esnjHkCAwEAATANBgkqhkiG9w0BAQUFAAOBgQAZ
+9N0lnLENs4JMvPS+mn8C5m9bkkFITd32IiLjf0zgYpIUbFXH6XaEr9GNZBUG8feG
+l/6WRXnbnVSblI5odQ4XxGZ9inYY6qtW30uv76HvoKp+QZ1c3460ddR8NauhcCHH
+Z7S+QbLXi+r2JAhpPozZCjBHlRD0ixzA1mKQTJhJZg==
+-----END CERTIFICATE-----
index 2eb81fcee33885f252aaa425465d44f68c6cb6c5..879764bd9ed65708e2405a1296ab409fc97ad8d0 100644 (file)
@@ -135,6 +135,8 @@ config_create(void)
        cfg->http_query_buffer_size = 4*1024*1024;
        cfg->http_response_buffer_size = 4*1024*1024;
        cfg->http_nodelay = 1;
+       cfg->quic_port = UNBOUND_DNS_OVER_QUIC_PORT;
+       cfg->quic_size = 8*1024*1024;
        cfg->use_syslog = 1;
        cfg->log_identity = NULL; /* changed later with argv[0] */
        cfg->log_time_ascii = 0;
@@ -604,6 +606,8 @@ int config_set_option(struct config_file* cfg, const char* opt,
        else S_MEMSIZE("http-response-buffer-size:", http_response_buffer_size)
        else S_YNO("http-nodelay:", http_nodelay)
        else S_YNO("http-notls-downstream:", http_notls_downstream)
+       else S_NUMBER_NONZERO("quic-port:", quic_port)
+       else S_MEMSIZE("quic-size:", quic_size)
        else S_YNO("interface-automatic:", if_automatic)
        else S_STR("interface-automatic-ports:", if_automatic_ports)
        else S_YNO("use-systemd:", use_systemd)
@@ -1154,6 +1158,8 @@ config_get_option(struct config_file* cfg, const char* opt,
        else O_MEM(opt, "http-response-buffer-size", http_response_buffer_size)
        else O_YNO(opt, "http-nodelay", http_nodelay)
        else O_YNO(opt, "http-notls-downstream", http_notls_downstream)
+       else O_DEC(opt, "quic-port", quic_port)
+       else O_MEM(opt, "quic-size", quic_size)
        else O_YNO(opt, "use-systemd", use_systemd)
        else O_YNO(opt, "do-daemonize", do_daemonize)
        else O_STR(opt, "chroot", chrootdir)
@@ -2821,3 +2827,15 @@ if_is_dnscrypt(const char* ifname, const char* port, int dnscrypt_port)
        return 0;
 #endif
 }
+
+/** see if interface is quic, its port number == the quic port number */
+int
+if_is_quic(const char* ifname, const char* port, int quic_port)
+{
+       char* p = strchr(ifname, '@');
+       if(!p && atoi(port) == quic_port)
+               return 1;
+       if(p && atoi(p+1) == quic_port)
+               return 1;
+       return 0;
+}
index fbb09aa62bdce4f96e502489585e2a08fcef1bc6..2969f8433963313c8ed811e3df1029acbe065789 100644 (file)
@@ -161,6 +161,11 @@ struct config_file {
        /** Disable TLS for http sockets downstream */
        int http_notls_downstream;
 
+       /** port on which to provide DNS over QUIC service */
+       int quic_port;
+       /** size of the quic data, max bytes */
+       size_t quic_size;
+
        /** outgoing port range number of ports (per thread) */
        int outgoing_num_ports;
        /** number of outgoing tcp buffers per (per thread) */
@@ -1406,6 +1411,10 @@ int if_is_pp2(const char* ifname, const char* port,
 
 /** see if interface is DNSCRYPT, its port number == the dnscrypt port number */
 int if_is_dnscrypt(const char* ifname, const char* port, int dnscrypt_port);
+
+/** see if interface is quic, its port number == the quic port number */
+int if_is_quic(const char* ifname, const char* port, int quic_port);
+
 #ifdef USE_LINUX_IP_LOCAL_PORT_RANGE
 #define LINUX_IP_LOCAL_PORT_RANGE_PATH "/proc/sys/net/ipv4/ip_local_port_range"
 #endif
index dbf2910de89bf9fe73107539269bb22202502281..4c0416f734df5102cce825cd37defb7f69715de8 100644 (file)
@@ -269,6 +269,8 @@ http-query-buffer-size{COLON}       { YDVAR(1, VAR_HTTP_QUERY_BUFFER_SIZE) }
 http-response-buffer-size{COLON} { YDVAR(1, VAR_HTTP_RESPONSE_BUFFER_SIZE) }
 http-nodelay{COLON}            { YDVAR(1, VAR_HTTP_NODELAY) }
 http-notls-downstream{COLON}   { YDVAR(1, VAR_HTTP_NOTLS_DOWNSTREAM) }
+quic-port{COLON}               { YDVAR(1, VAR_QUIC_PORT) }
+quic-size{COLON}               { YDVAR(1, VAR_QUIC_SIZE) }
 use-systemd{COLON}             { YDVAR(1, VAR_USE_SYSTEMD) }
 do-daemonize{COLON}            { YDVAR(1, VAR_DO_DAEMONIZE) }
 interface{COLON}               { YDVAR(1, VAR_INTERFACE) }
index 2ca16f81c454d3717972fb3654edb5faae98f8b7..9978e12f9572be4fa4ca7faedbbbfc2a9ee75313 100644 (file)
@@ -203,6 +203,7 @@ extern struct config_parser_state* cfg_parser;
 %token VAR_RPZ_SIGNAL_NXDOMAIN_RA VAR_INTERFACE_AUTOMATIC_PORTS VAR_EDE
 %token VAR_INTERFACE_ACTION VAR_INTERFACE_VIEW VAR_INTERFACE_TAG
 %token VAR_INTERFACE_TAG_ACTION VAR_INTERFACE_TAG_DATA
+%token VAR_QUIC_PORT VAR_QUIC_SIZE
 %token VAR_PROXY_PROTOCOL_PORT VAR_STATISTICS_INHIBIT_ZERO
 %token VAR_HARDEN_UNKNOWN_ADDITIONAL VAR_DISABLE_EDNS_DO VAR_CACHEDB_NO_STORE
 %token VAR_LOG_DESTADDR VAR_CACHEDB_CHECK_WHEN_SERVE_EXPIRED
@@ -342,6 +343,7 @@ content_server: server_num_threads | server_verbosity | server_port |
        server_edns_client_string_opcode | server_nsid |
        server_zonemd_permissive_mode | server_max_reuse_tcp_queries |
        server_tcp_reuse_timeout | server_tcp_auth_query_timeout |
+       server_quic_port | server_quic_size |
        server_interface_automatic_ports | server_ede |
        server_proxy_protocol_port | server_statistics_inhibit_zero |
        server_harden_unknown_additional | server_disable_edns_do |
@@ -1209,6 +1211,21 @@ server_http_notls_downstream: VAR_HTTP_NOTLS_DOWNSTREAM STRING_ARG
                else cfg_parser->cfg->http_notls_downstream = (strcmp($2, "yes")==0);
                free($2);
        };
+server_quic_port: VAR_QUIC_PORT STRING_ARG
+       {
+               OUTYY(("P(server_quic_port:%s)\n", $2));
+               if(atoi($2) == 0)
+                       yyerror("port number expected");
+               else cfg_parser->cfg->quic_port = atoi($2);
+               free($2);
+       };
+server_quic_size: VAR_QUIC_SIZE STRING_ARG
+       {
+               OUTYY(("P(server_quic_size:%s)\n", $2));
+               if(!cfg_parse_memsize($2, &cfg_parser->cfg->quic_size))
+                       yyerror("memory size expected");
+               free($2);
+       };
 server_use_systemd: VAR_USE_SYSTEMD STRING_ARG
        {
                OUTYY(("P(server_use_systemd:%s)\n", $2));
index 705dc1bbe3c4eabd741f962c11bc5ec32c2e5063..e94ec5bbce850ec1debb7b242d2e429e747cbf96 100644 (file)
@@ -47,6 +47,7 @@
 #include "util/fptr_wlist.h"
 #include "util/mini_event.h"
 #include "services/outside_network.h"
+#include "services/listen_dnsport.h"
 #include "services/mesh.h"
 #include "services/localzone.h"
 #include "services/authzone.h"
@@ -132,6 +133,9 @@ fptr_whitelist_comm_timer(void (*fptr)(void*))
        else if(fptr == &worker_stat_timer_cb) return 1;
        else if(fptr == &worker_probe_timer_cb) return 1;
        else if(fptr == &validate_suspend_timer_cb) return 1;
+#ifdef HAVE_NGTCP2
+       else if(fptr == &doq_timer_cb) return 1;
+#endif
 #ifdef UB_ON_WINDOWS
        else if(fptr == &wsvc_cron_cb) return 1;
 #endif
@@ -181,6 +185,9 @@ fptr_whitelist_event(void (*fptr)(int, short, void *))
        else if(fptr == &tube_handle_signal) return 1;
        else if(fptr == &comm_base_handle_slow_accept) return 1;
        else if(fptr == &comm_point_http_handle_callback) return 1;
+#ifdef HAVE_NGTCP2
+       else if(fptr == &comm_point_doq_callback) return 1;
+#endif
 #ifdef USE_DNSTAP
        else if(fptr == &dtio_output_cb) return 1;
        else if(fptr == &dtio_cmd_cb) return 1;
@@ -190,6 +197,10 @@ fptr_whitelist_event(void (*fptr)(int, short, void *))
        else if(fptr == &dtio_tap_callback) return 1;
        else if(fptr == &dtio_mainfdcallback) return 1;
 #endif
+#ifdef HAVE_NGTCP2
+       else if(fptr == &doq_client_event_cb) return 1;
+       else if(fptr == &doq_client_timer_cb) return 1;
+#endif
 #ifdef UB_ON_WINDOWS
        else if(fptr == &worker_win_stop_cb) return 1;
 #endif
@@ -248,6 +259,12 @@ fptr_whitelist_rbtree_cmp(int (*fptr) (const void *, const void *))
        else if(fptr == &auth_zone_cmp) return 1;
        else if(fptr == &auth_data_cmp) return 1;
        else if(fptr == &auth_xfer_cmp) return 1;
+#ifdef HAVE_NGTCP2
+       else if(fptr == &doq_conn_cmp) return 1;
+       else if(fptr == &doq_conid_cmp) return 1;
+       else if(fptr == &doq_timer_cmp) return 1;
+       else if(fptr == &doq_stream_cmp) return 1;
+#endif
        return 0;
 }
 
index d86ee4923f73f1e401f93df4f56432b3787dffbf..eb698cb759aa05c9e3cc1c0e2af448a3e7826e5c 100644 (file)
@@ -88,6 +88,7 @@
 #define lock_get_mem(lock) (0) /* nothing */
 #define checklock_start() /* nop */
 #define checklock_stop() /* nop */
+#define checklock_set_output_name(name) /* nop */
 
 #ifdef HAVE_PTHREAD
 #include <pthread.h>
index 9d5131da96568bb6d56d61dc5e36adf3d8de8db6..55ea4e9aaaa96f90e6cd73faebc1fc1e29263723 100644 (file)
@@ -53,6 +53,7 @@
 #include "dnstap/dnstap.h"
 #include "dnscrypt/dnscrypt.h"
 #include "services/listen_dnsport.h"
+#include "util/random.h"
 #ifdef HAVE_SYS_TYPES_H
 #include <sys/types.h>
 #endif
 #ifdef HAVE_OPENSSL_ERR_H
 #include <openssl/err.h>
 #endif
+
+#ifdef HAVE_NGTCP2
+#include <ngtcp2/ngtcp2.h>
+#include <ngtcp2/ngtcp2_crypto.h>
+#endif
+
 #ifdef HAVE_LINUX_NET_TSTAMP_H
 #include <linux/net_tstamp.h>
 #endif
+
 /* -------- Start of local definitions -------- */
 /** if CMSG_ALIGN is not defined on this platform, a workaround */
 #ifndef CMSG_ALIGN
@@ -1057,108 +1065,1876 @@ comm_point_udp_ancil_callback(int fd, short event, void* arg)
                        p_ancil("receive_udp on interface", &rep);
 #endif /* S_SPLINT_S */
 
-               if(rep.c->pp2_enabled && !consume_pp2_header(rep.c->buffer,
-                       &rep, 0)) {
-                       log_err("proxy_protocol: could not consume PROXYv2 header");
-                       return;
-               }
-               if(!rep.is_proxied) {
-                       rep.client_addrlen = rep.remote_addrlen;
-                       memmove(&rep.client_addr, &rep.remote_addr,
-                               rep.remote_addrlen);
-               }
+               if(rep.c->pp2_enabled && !consume_pp2_header(rep.c->buffer,
+                       &rep, 0)) {
+                       log_err("proxy_protocol: could not consume PROXYv2 header");
+                       return;
+               }
+               if(!rep.is_proxied) {
+                       rep.client_addrlen = rep.remote_addrlen;
+                       memmove(&rep.client_addr, &rep.remote_addr,
+                               rep.remote_addrlen);
+               }
+
+               fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
+               if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
+                       /* send back immediate reply */
+                       struct sldns_buffer *buffer;
+#ifdef USE_DNSCRYPT
+                       buffer = rep.c->dnscrypt_buffer;
+#else
+                       buffer = rep.c->buffer;
+#endif
+                       (void)comm_point_send_udp_msg_if(rep.c, buffer,
+                               (struct sockaddr*)&rep.remote_addr,
+                               rep.remote_addrlen, &rep);
+               }
+               if(!rep.c || rep.c->fd == -1) /* commpoint closed */
+                       break;
+       }
+}
+#endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */
+
+void
+comm_point_udp_callback(int fd, short event, void* arg)
+{
+       struct comm_reply rep;
+       ssize_t rcv;
+       int i;
+       struct sldns_buffer *buffer;
+
+       rep.c = (struct comm_point*)arg;
+       log_assert(rep.c->type == comm_udp);
+
+       if(!(event&UB_EV_READ))
+               return;
+       log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
+       ub_comm_base_now(rep.c->ev->base);
+       for(i=0; i<NUM_UDP_PER_SELECT; i++) {
+               sldns_buffer_clear(rep.c->buffer);
+               rep.remote_addrlen = (socklen_t)sizeof(rep.remote_addr);
+               log_assert(fd != -1);
+               log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
+               rcv = recvfrom(fd, (void*)sldns_buffer_begin(rep.c->buffer),
+                       sldns_buffer_remaining(rep.c->buffer), MSG_DONTWAIT,
+                       (struct sockaddr*)&rep.remote_addr, &rep.remote_addrlen);
+               if(rcv == -1) {
+#ifndef USE_WINSOCK
+                       if(errno != EAGAIN && errno != EINTR
+                               && udp_recv_needs_log(errno))
+                               log_err("recvfrom %d failed: %s",
+                                       fd, strerror(errno));
+#else
+                       if(WSAGetLastError() != WSAEINPROGRESS &&
+                               WSAGetLastError() != WSAECONNRESET &&
+                               WSAGetLastError()!= WSAEWOULDBLOCK &&
+                               udp_recv_needs_log(WSAGetLastError()))
+                               log_err("recvfrom failed: %s",
+                                       wsa_strerror(WSAGetLastError()));
+#endif
+                       return;
+               }
+               sldns_buffer_skip(rep.c->buffer, rcv);
+               sldns_buffer_flip(rep.c->buffer);
+               rep.srctype = 0;
+               rep.is_proxied = 0;
+
+               if(rep.c->pp2_enabled && !consume_pp2_header(rep.c->buffer,
+                       &rep, 0)) {
+                       log_err("proxy_protocol: could not consume PROXYv2 header");
+                       return;
+               }
+               if(!rep.is_proxied) {
+                       rep.client_addrlen = rep.remote_addrlen;
+                       memmove(&rep.client_addr, &rep.remote_addr,
+                               rep.remote_addrlen);
+               }
+
+               fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
+               if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
+                       /* send back immediate reply */
+#ifdef USE_DNSCRYPT
+                       buffer = rep.c->dnscrypt_buffer;
+#else
+                       buffer = rep.c->buffer;
+#endif
+                       (void)comm_point_send_udp_msg(rep.c, buffer,
+                               (struct sockaddr*)&rep.remote_addr,
+                               rep.remote_addrlen, 0);
+               }
+               if(!rep.c || rep.c->fd != fd) /* commpoint closed to -1 or reused for
+               another UDP port. Note rep.c cannot be reused with TCP fd. */
+                       break;
+       }
+}
+
+#ifdef HAVE_NGTCP2
+void
+doq_pkt_addr_init(struct doq_pkt_addr* paddr)
+{
+       paddr->addrlen = (socklen_t)sizeof(paddr->addr);
+       paddr->localaddrlen = (socklen_t)sizeof(paddr->localaddr);
+       paddr->ifindex = 0;
+}
+
+/** set the ecn on the transmission */
+static void
+doq_set_ecn(int fd, int family, uint32_t ecn)
+{
+       unsigned int val = ecn;
+       if(family == AF_INET6) {
+               if(setsockopt(fd, IPPROTO_IPV6, IPV6_TCLASS, &val,
+                       (socklen_t)sizeof(val)) == -1) {
+                       log_err("setsockopt(.. IPV6_TCLASS ..): %s",
+                               strerror(errno));
+               }
+               return;
+       }
+       if(setsockopt(fd, IPPROTO_IP, IP_TOS, &val,
+               (socklen_t)sizeof(val)) == -1) {
+               log_err("setsockopt(.. IP_TOS ..): %s",
+                       strerror(errno));
+       }
+}
+
+/** set the local address in the control ancillary data */
+static void
+doq_set_localaddr_cmsg(struct msghdr* msg, size_t control_size,
+       struct doq_addr_storage* localaddr, socklen_t localaddrlen,
+       int ifindex)
+{
+#ifndef S_SPLINT_S
+       struct cmsghdr* cmsg;
+#endif /* S_SPLINT_S */
+#ifndef S_SPLINT_S
+       cmsg = CMSG_FIRSTHDR(msg);
+       if(localaddr->sockaddr.in.sin_family == AF_INET) {
+#ifdef IP_PKTINFO
+               struct sockaddr_in* sa = (struct sockaddr_in*)localaddr;
+               struct in_pktinfo v4info;
+               log_assert(localaddrlen >= sizeof(struct sockaddr_in));
+               msg->msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo));
+               memset(msg->msg_control, 0, msg->msg_controllen);
+               log_assert(msg->msg_controllen <= control_size);
+               cmsg->cmsg_level = IPPROTO_IP;
+               cmsg->cmsg_type = IP_PKTINFO;
+               memset(&v4info, 0, sizeof(v4info));
+#  ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST
+               memmove(&v4info.ipi_spec_dst, &sa->sin_addr,
+                       sizeof(struct in_addr));
+#  else
+               memmove(&v4info.ipi_addr, &sa->sin_addr,
+                       sizeof(struct in_addr));
+#  endif
+               v4info.ipi_ifindex = ifindex;
+               memmove(CMSG_DATA(cmsg), &v4info, sizeof(struct in_pktinfo));
+               cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
+#elif defined(IP_SENDSRCADDR)
+               struct sockaddr_in* sa= (struct sockaddr_in*)localaddr;
+               log_assert(localaddrlen >= sizeof(struct sockaddr_in));
+               msg->msg_controllen = CMSG_SPACE(sizeof(struct in_addr));
+               memset(msg->msg_control, 0, msg->msg_controllen);
+               log_assert(msg->msg_controllen <= control_size);
+               cmsg->cmsg_level = IPPROTO_IP;
+               cmsg->cmsg_type = IP_SENDSRCADDR;
+               memmove(CMSG_DATA(cmsg),  &sa->sin_addr,
+                       sizeof(struct in_addr));
+               cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
+#endif
+       } else {
+               struct sockaddr_in6* sa6 = (struct sockaddr_in6*)localaddr;
+               struct in6_pktinfo v6info;
+               log_assert(localaddrlen >= sizeof(struct sockaddr_in6));
+               msg->msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
+               memset(msg->msg_control, 0, msg->msg_controllen);
+               log_assert(msg->msg_controllen <= control_size);
+               cmsg->cmsg_level = IPPROTO_IPV6;
+               cmsg->cmsg_type = IPV6_PKTINFO;
+               memset(&v6info, 0, sizeof(v6info));
+               memmove(&v6info.ipi6_addr, &sa6->sin6_addr,
+                       sizeof(struct in6_addr));
+               v6info.ipi6_ifindex = ifindex;
+               memmove(CMSG_DATA(cmsg), &v6info, sizeof(struct in6_pktinfo));
+               cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
+       }
+#endif /* S_SPLINT_S */
+       /* Ignore unused variables, if no assertions are compiled. */
+       (void)localaddrlen;
+       (void)control_size;
+}
+
+/** write address and port into strings */
+static int
+doq_print_addr_port(struct doq_addr_storage* addr, socklen_t addrlen,
+       char* host, size_t hostlen, char* port, size_t portlen)
+{
+       if(addr->sockaddr.in.sin_family == AF_INET) {
+               struct sockaddr_in* sa = (struct sockaddr_in*)addr;
+               log_assert(addrlen >= sizeof(*sa));
+               if(inet_ntop(sa->sin_family, &sa->sin_addr, host,
+                       (socklen_t)hostlen) == 0) {
+                       log_hex("inet_ntop error: address", &sa->sin_addr,
+                               sizeof(sa->sin_addr));
+                       return 0;
+               }
+               snprintf(port, portlen, "%u", (unsigned)ntohs(sa->sin_port));
+       } else if(addr->sockaddr.in.sin_family == AF_INET6) {
+               struct sockaddr_in6* sa6 = (struct sockaddr_in6*)addr;
+               log_assert(addrlen >= sizeof(*sa6));
+               if(inet_ntop(sa6->sin6_family, &sa6->sin6_addr, host,
+                       (socklen_t)hostlen) == 0) {
+                       log_hex("inet_ntop error: address", &sa6->sin6_addr,
+                               sizeof(sa6->sin6_addr));
+                       return 0;
+               }
+               snprintf(port, portlen, "%u", (unsigned)ntohs(sa6->sin6_port));
+       }
+       return 1;
+}
+
+/** doq store the blocked packet when write has blocked */
+static void
+doq_store_blocked_pkt(struct comm_point* c, struct doq_pkt_addr* paddr,
+       uint32_t ecn)
+{
+       if(c->doq_socket->have_blocked_pkt)
+               return; /* should not happen that we write when there is
+               already a blocked write, but if so, drop it. */
+       if(sldns_buffer_limit(c->doq_socket->pkt_buf) >
+               sldns_buffer_capacity(c->doq_socket->blocked_pkt))
+               return; /* impossibly large, drop packet. impossible because
+               pkt_buf and blocked_pkt are the same size. */
+       c->doq_socket->have_blocked_pkt = 1;
+       c->doq_socket->blocked_pkt_pi.ecn = ecn;
+       memcpy(c->doq_socket->blocked_paddr, paddr,
+               sizeof(*c->doq_socket->blocked_paddr));
+       sldns_buffer_clear(c->doq_socket->blocked_pkt);
+       sldns_buffer_write(c->doq_socket->blocked_pkt,
+               sldns_buffer_begin(c->doq_socket->pkt_buf),
+               sldns_buffer_limit(c->doq_socket->pkt_buf));
+       sldns_buffer_flip(c->doq_socket->blocked_pkt);
+}
+
+void
+doq_send_pkt(struct comm_point* c, struct doq_pkt_addr* paddr, uint32_t ecn)
+{
+       struct msghdr msg;
+       struct iovec iov[1];
+       union {
+               struct cmsghdr hdr;
+               char buf[256];
+       } control;
+       ssize_t ret;
+       iov[0].iov_base = sldns_buffer_begin(c->doq_socket->pkt_buf);
+       iov[0].iov_len = sldns_buffer_limit(c->doq_socket->pkt_buf);
+       memset(&msg, 0, sizeof(msg));
+       msg.msg_name = (void*)&paddr->addr;
+       msg.msg_namelen = paddr->addrlen;
+       msg.msg_iov = iov;
+       msg.msg_iovlen = 1;
+       msg.msg_control = control.buf;
+#ifndef S_SPLINT_S
+       msg.msg_controllen = sizeof(control.buf);
+#endif /* S_SPLINT_S */
+       msg.msg_flags = 0;
+
+       doq_set_localaddr_cmsg(&msg, sizeof(control.buf), &paddr->localaddr,
+               paddr->localaddrlen, paddr->ifindex);
+       doq_set_ecn(c->fd, paddr->addr.sockaddr.in.sin_family, ecn);
+
+       for(;;) {
+               ret = sendmsg(c->fd, &msg, MSG_DONTWAIT);
+               if(ret == -1 && errno == EINTR)
+                       continue;
+               break;
+       }
+       if(ret == -1) {
+#ifndef USE_WINSOCK
+               if(errno == EAGAIN ||
+#  ifdef EWOULDBLOCK
+                       errno == EWOULDBLOCK ||
+#  endif
+                       errno == ENOBUFS)
+#else
+               if(WSAGetLastError() == WSAEINPROGRESS ||
+                       WSAGetLastError() == WSAENOBUFS ||
+                       WSAGetLastError() == WSAEWOULDBLOCK)
+#endif
+               {
+                       /* udp send has blocked */
+                       doq_store_blocked_pkt(c, paddr, ecn);
+                       return;
+               }
+               if(!udp_send_errno_needs_log((void*)&paddr->addr,
+                       paddr->addrlen))
+                       return;
+               if(verbosity >= VERB_OPS) {
+                       char host[256], port[32];
+                       if(doq_print_addr_port(&paddr->addr, paddr->addrlen,
+                               host, sizeof(host), port, sizeof(port))) {
+                               verbose(VERB_OPS, "doq sendmsg to %s %s "
+                                       "failed: %s", host, port,
+                                       strerror(errno));
+                       } else {
+                               verbose(VERB_OPS, "doq sendmsg failed: %s",
+                                       strerror(errno));
+                       }
+               }
+               return;
+       } else if(ret != (ssize_t)sldns_buffer_limit(c->doq_socket->pkt_buf)) {
+               char host[256], port[32];
+               if(doq_print_addr_port(&paddr->addr, paddr->addrlen, host,
+                       sizeof(host), port, sizeof(port))) {
+                       log_err("doq sendmsg to %s %s failed: "
+                               "sent %d in place of %d bytes", 
+                               host, port, (int)ret,
+                               (int)sldns_buffer_limit(c->doq_socket->pkt_buf));
+               } else {
+                       log_err("doq sendmsg failed: "
+                               "sent %d in place of %d bytes", 
+                               (int)ret, (int)sldns_buffer_limit(c->doq_socket->pkt_buf));
+               }
+               return;
+       }
+}
+
+/** fetch port number */
+static int
+doq_sockaddr_get_port(struct doq_addr_storage* addr)
+{
+       if(addr->sockaddr.in.sin_family == AF_INET) {
+               struct sockaddr_in* sa = (struct sockaddr_in*)addr;
+               return ntohs(sa->sin_port);
+       } else if(addr->sockaddr.in.sin_family == AF_INET6) {
+               struct sockaddr_in6* sa6 = (struct sockaddr_in6*)addr;
+               return ntohs(sa6->sin6_port);
+       }
+       return 0;
+}
+
+/** get local address from ancillary data headers */
+static int
+doq_get_localaddr_cmsg(struct comm_point* c, struct doq_pkt_addr* paddr,
+       int* pkt_continue, struct msghdr* msg)
+{
+#ifndef S_SPLINT_S
+       struct cmsghdr* cmsg;
+#endif /* S_SPLINT_S */
+
+       memset(&paddr->localaddr, 0, sizeof(paddr->localaddr));
+#ifndef S_SPLINT_S
+       for(cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
+               cmsg = CMSG_NXTHDR(msg, cmsg)) {
+               if( cmsg->cmsg_level == IPPROTO_IPV6 &&
+                       cmsg->cmsg_type == IPV6_PKTINFO) {
+                       struct in6_pktinfo* v6info =
+                               (struct in6_pktinfo*)CMSG_DATA(cmsg);
+                       struct sockaddr_in6* sa= (struct sockaddr_in6*)
+                               &paddr->localaddr;
+                       struct sockaddr_in6* rema = (struct sockaddr_in6*)
+                               &paddr->addr;
+                       if(rema->sin6_family != AF_INET6) {
+                               log_err("doq cmsg family mismatch cmsg is ip6");
+                               *pkt_continue = 1;
+                               return 0;
+                       }
+                       sa->sin6_family = AF_INET6;
+                       sa->sin6_port = htons(doq_sockaddr_get_port(
+                               (void*)c->socket->addr));
+                       paddr->ifindex = v6info->ipi6_ifindex;
+                       memmove(&sa->sin6_addr, &v6info->ipi6_addr,
+                               sizeof(struct in6_addr));
+                       paddr->localaddrlen = sizeof(struct sockaddr_in6);
+                       break;
+#ifdef IP_PKTINFO
+               } else if( cmsg->cmsg_level == IPPROTO_IP &&
+                       cmsg->cmsg_type == IP_PKTINFO) {
+                       struct in_pktinfo* v4info =
+                               (struct in_pktinfo*)CMSG_DATA(cmsg);
+                       struct sockaddr_in* sa= (struct sockaddr_in*)
+                               &paddr->localaddr;
+                       struct sockaddr_in* rema = (struct sockaddr_in*)
+                               &paddr->addr;
+                       if(rema->sin_family != AF_INET) {
+                               log_err("doq cmsg family mismatch cmsg is ip4");
+                               *pkt_continue = 1;
+                               return 0;
+                       }
+                       sa->sin_family = AF_INET;
+                       sa->sin_port = htons(doq_sockaddr_get_port(
+                               (void*)c->socket->addr));
+                       paddr->ifindex = v4info->ipi_ifindex;
+                       memmove(&sa->sin_addr, &v4info->ipi_addr,
+                               sizeof(struct in_addr));
+                       paddr->localaddrlen = sizeof(struct sockaddr_in);
+                       break;
+#elif defined(IP_RECVDSTADDR)
+               } else if( cmsg->cmsg_level == IPPROTO_IP &&
+                       cmsg->cmsg_type == IP_RECVDSTADDR) {
+                       struct sockaddr_in* sa= (struct sockaddr_in*)
+                               &paddr->localaddr;
+                       struct sockaddr_in* rema = (struct sockaddr_in*)
+                               &paddr->addr;
+                       if(rema->sin_family != AF_INET) {
+                               log_err("doq cmsg family mismatch cmsg is ip4");
+                               *pkt_continue = 1;
+                               return 0;
+                       }
+                       sa->sin_family = AF_INET;
+                       sa->sin_port = htons(doq_sockaddr_get_port(
+                               (void*)c->socket->addr));
+                       paddr->ifindex = 0;
+                       memmove(&sa.sin_addr, CMSG_DATA(cmsg),
+                               sizeof(struct in_addr));
+                       paddr->localaddrlen = sizeof(struct sockaddr_in);
+                       break;
+#endif /* IP_PKTINFO or IP_RECVDSTADDR */
+               }
+       }
+#endif /* S_SPLINT_S */
+
+return 1;
+}
+
+/** get packet ecn information */
+static uint32_t
+msghdr_get_ecn(struct msghdr* msg, int family)
+{
+#ifndef S_SPLINT_S
+       struct cmsghdr* cmsg;
+       if(family == AF_INET6) {
+               for(cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
+                       cmsg = CMSG_NXTHDR(msg, cmsg)) {
+                       if(cmsg->cmsg_level == IPPROTO_IPV6 &&
+                               cmsg->cmsg_type == IPV6_TCLASS &&
+                               cmsg->cmsg_len != 0) {
+                               uint8_t* ecn = (uint8_t*)CMSG_DATA(cmsg);
+                               return *ecn;
+                       }
+               }
+               return 0;
+       }
+       for(cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
+               cmsg = CMSG_NXTHDR(msg, cmsg)) {
+               if(cmsg->cmsg_level == IPPROTO_IP &&
+                       cmsg->cmsg_type == IP_TOS &&
+                       cmsg->cmsg_len != 0) {
+                       uint8_t* ecn = (uint8_t*)CMSG_DATA(cmsg);
+                       return *ecn;
+               }
+       }
+#endif /* S_SPLINT_S */
+       return 0;
+}
+
+/** receive packet for DoQ on UDP. get ancillary data for addresses,
+ * return false if failed and the callback can stop receiving UDP packets
+ * if pkt_continue is false. */
+static int
+doq_recv(struct comm_point* c, struct doq_pkt_addr* paddr, int* pkt_continue,
+       struct ngtcp2_pkt_info* pi)
+{
+       struct msghdr msg;
+       struct iovec iov[1];
+       ssize_t rcv;
+       union {
+               struct cmsghdr hdr;
+               char buf[256];
+       } ancil;
+
+       msg.msg_name = &paddr->addr;
+       msg.msg_namelen = (socklen_t)sizeof(paddr->addr);
+       iov[0].iov_base = sldns_buffer_begin(c->doq_socket->pkt_buf);
+       iov[0].iov_len = sldns_buffer_remaining(c->doq_socket->pkt_buf);
+       msg.msg_iov = iov;
+       msg.msg_iovlen = 1;
+       msg.msg_control = ancil.buf;
+#ifndef S_SPLINT_S
+       msg.msg_controllen = sizeof(ancil.buf);
+#endif /* S_SPLINT_S */
+       msg.msg_flags = 0;
+
+       rcv = recvmsg(c->fd, &msg, MSG_DONTWAIT);
+       if(rcv == -1) {
+               if(errno != EAGAIN && errno != EINTR
+                       && udp_recv_needs_log(errno)) {
+                       log_err("recvmsg failed for doq: %s", strerror(errno));
+               }
+               *pkt_continue = 0;
+               return 0;
+       }
+
+       paddr->addrlen = msg.msg_namelen;
+       sldns_buffer_skip(c->doq_socket->pkt_buf, rcv);
+       sldns_buffer_flip(c->doq_socket->pkt_buf);
+       if(!doq_get_localaddr_cmsg(c, paddr, pkt_continue, &msg))
+               return 0;
+       pi->ecn = msghdr_get_ecn(&msg, paddr->addr.sockaddr.in.sin_family);
+       return 1;
+}
+
+/** send the version negotiation for doq. scid and dcid are flipped around
+ * to send back to the client. */
+static void
+doq_send_version_negotiation(struct comm_point* c, struct doq_pkt_addr* paddr,
+       const uint8_t* dcid, size_t dcidlen, const uint8_t* scid,
+       size_t scidlen)
+{
+       uint32_t versions[2];
+       size_t versions_len = 0;
+       ngtcp2_ssize ret;
+       uint8_t unused_random;
+
+       /* fill the array with supported versions */
+       versions[0] = NGTCP2_PROTO_VER_V1;
+       versions_len = 1;
+       unused_random = ub_random_max(c->doq_socket->rnd, 256);
+       sldns_buffer_clear(c->doq_socket->pkt_buf);
+       ret = ngtcp2_pkt_write_version_negotiation(
+               sldns_buffer_begin(c->doq_socket->pkt_buf),
+               sldns_buffer_capacity(c->doq_socket->pkt_buf), unused_random,
+               dcid, dcidlen, scid, scidlen, versions, versions_len);
+       if(ret < 0) {
+               log_err("ngtcp2_pkt_write_version_negotiation failed: %s",
+                       ngtcp2_strerror(ret));
+               return;
+       }
+       sldns_buffer_set_position(c->doq_socket->pkt_buf, ret);
+       sldns_buffer_flip(c->doq_socket->pkt_buf);
+       doq_send_pkt(c, paddr, 0);
+}
+
+/** Find the doq_conn object by remote address and dcid */
+static struct doq_conn*
+doq_conn_find(struct doq_table* table, struct doq_addr_storage* addr,
+       socklen_t addrlen, struct doq_addr_storage* localaddr,
+       socklen_t localaddrlen, int ifindex, const uint8_t* dcid,
+       size_t dcidlen)
+{
+       struct rbnode_type* node;
+       struct doq_conn key;
+       memset(&key.node, 0, sizeof(key.node));
+       key.node.key = &key;
+       memmove(&key.key.paddr.addr, addr, addrlen);
+       key.key.paddr.addrlen = addrlen;
+       memmove(&key.key.paddr.localaddr, localaddr, localaddrlen);
+       key.key.paddr.localaddrlen = localaddrlen;
+       key.key.paddr.ifindex = ifindex;
+       key.key.dcid = (void*)dcid;
+       key.key.dcidlen = dcidlen;
+       node = rbtree_search(table->conn_tree, &key);
+       if(node)
+               return (struct doq_conn*)node->key;
+       return NULL;
+}
+
+/** find the doq_con by the connection id */
+static struct doq_conn*
+doq_conn_find_by_id(struct doq_table* table, const uint8_t* dcid,
+       size_t dcidlen)
+{
+       struct doq_conid* conid;
+       lock_rw_rdlock(&table->conid_lock);
+       conid = doq_conid_find(table, dcid, dcidlen);
+       if(conid) {
+               /* make a copy of the key */
+               struct doq_conn* conn;
+               struct doq_conn_key key = conid->key;
+               uint8_t cid[NGTCP2_MAX_CIDLEN];
+               log_assert(conid->key.dcidlen <= NGTCP2_MAX_CIDLEN);
+               memcpy(cid, conid->key.dcid, conid->key.dcidlen);
+               key.dcid = cid;
+               lock_rw_unlock(&table->conid_lock);
+
+               /* now that the conid lock is released, look up the conn */
+               lock_rw_rdlock(&table->lock);
+               conn = doq_conn_find(table, &key.paddr.addr,
+                       key.paddr.addrlen, &key.paddr.localaddr,
+                       key.paddr.localaddrlen, key.paddr.ifindex, key.dcid,
+                       key.dcidlen);
+               if(!conn) {
+                       /* The connection got deleted between the conid lookup
+                        * and the connection lock grab, it no longer exists,
+                        * so return null. */
+                       lock_rw_unlock(&table->lock);
+                       return NULL;
+               }
+               lock_basic_lock(&conn->lock);
+               if(conn->is_deleted) {
+                       lock_rw_unlock(&table->lock);
+                       lock_basic_unlock(&conn->lock);
+                       return NULL;
+               }
+               lock_rw_unlock(&table->lock);
+               return conn;
+       }
+       lock_rw_unlock(&table->conid_lock);
+       return NULL;
+}
+
+/** Find the doq_conn, by addr or by connection id */
+static struct doq_conn*
+doq_conn_find_by_addr_or_cid(struct doq_table* table,
+       struct doq_pkt_addr* paddr, const uint8_t* dcid, size_t dcidlen)
+{
+       struct doq_conn* conn;
+       lock_rw_rdlock(&table->lock);
+       conn = doq_conn_find(table, &paddr->addr, paddr->addrlen,
+               &paddr->localaddr, paddr->localaddrlen, paddr->ifindex,
+               dcid, dcidlen);
+       if(conn && conn->is_deleted) {
+               conn = NULL;
+       }
+       if(conn) {
+               lock_basic_lock(&conn->lock);
+               lock_rw_unlock(&table->lock);
+               verbose(VERB_ALGO, "doq: found connection by address, dcid");
+       } else {
+               lock_rw_unlock(&table->lock);
+               conn = doq_conn_find_by_id(table, dcid, dcidlen);
+               if(conn) {
+                       verbose(VERB_ALGO, "doq: found connection by dcid");
+               }
+       }
+       return conn;
+}
+
+/** decode doq packet header, false on handled or failure, true to continue
+ * to process the packet */
+static int
+doq_decode_pkt_header_negotiate(struct comm_point* c,
+       struct doq_pkt_addr* paddr, struct doq_conn** conn)
+{
+#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
+       struct ngtcp2_version_cid vc;
+#else
+       uint32_t version;
+       const uint8_t *dcid, *scid;
+       size_t dcidlen, scidlen;
+#endif
+       int rv;
+
+#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
+       rv = ngtcp2_pkt_decode_version_cid(&vc,
+               sldns_buffer_begin(c->doq_socket->pkt_buf),
+               sldns_buffer_limit(c->doq_socket->pkt_buf),
+               c->doq_socket->sv_scidlen);
+#else
+       rv = ngtcp2_pkt_decode_version_cid(&version, &dcid, &dcidlen,
+               &scid, &scidlen, sldns_buffer_begin(c->doq_socket->pkt_buf),
+               sldns_buffer_limit(c->doq_socket->pkt_buf), c->doq_socket->sv_scidlen);
+#endif
+       if(rv != 0) {
+               if(rv == NGTCP2_ERR_VERSION_NEGOTIATION) {
+                       /* send the version negotiation */
+                       doq_send_version_negotiation(c, paddr,
+#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
+                       vc.scid, vc.scidlen, vc.dcid, vc.dcidlen
+#else
+                       scid, scidlen, dcid, dcidlen
+#endif
+                       );
+                       return 0;
+               }
+               verbose(VERB_ALGO, "doq: could not decode version "
+                       "and CID from QUIC packet header: %s",
+                       ngtcp2_strerror(rv));
+               return 0;
+       }
+
+       if(verbosity >= VERB_ALGO) {
+               verbose(VERB_ALGO, "ngtcp2_pkt_decode_version_cid packet has "
+                       "QUIC protocol version %u", (unsigned)
+#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
+                       vc.
+#endif
+                       version
+                       );
+               log_hex("dcid",
+#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
+                       (void*)vc.dcid, vc.dcidlen
+#else
+                       (void*)dcid, dcidlen
+#endif
+                       );
+               log_hex("scid",
+#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
+                       (void*)vc.scid, vc.scidlen
+#else
+                       (void*)scid, scidlen
+#endif
+                       );
+       }
+       *conn = doq_conn_find_by_addr_or_cid(c->doq_socket->table, paddr,
+#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
+               vc.dcid, vc.dcidlen
+#else
+               dcid, dcidlen
+#endif
+               );
+       if(*conn)
+               (*conn)->doq_socket = c->doq_socket;
+       return 1;
+}
+
+/** fill cid structure with random data */
+static void doq_cid_randfill(struct ngtcp2_cid* cid, size_t datalen,
+       struct ub_randstate* rnd)
+{
+       uint8_t buf[32];
+       if(datalen > sizeof(buf))
+               datalen = sizeof(buf);
+       doq_fill_rand(rnd, buf, datalen);
+       ngtcp2_cid_init(cid, buf, datalen);
+}
+
+/** send retry packet for doq connection. */
+static void
+doq_send_retry(struct comm_point* c, struct doq_pkt_addr* paddr,
+       struct ngtcp2_pkt_hd* hd)
+{
+       char host[256], port[32];
+       struct ngtcp2_cid scid;
+       uint8_t token[NGTCP2_CRYPTO_MAX_RETRY_TOKENLEN];
+       ngtcp2_tstamp ts;
+       ngtcp2_ssize tokenlen, ret;
+
+       if(!doq_print_addr_port(&paddr->addr, paddr->addrlen, host,
+               sizeof(host), port, sizeof(port))) {
+               log_err("doq_send_retry failed");
+               return;
+       }
+       verbose(VERB_ALGO, "doq: sending retry packet to %s %s", host, port);
+
+       /* the server chosen source connection ID */
+       scid.datalen = c->doq_socket->sv_scidlen;
+       doq_cid_randfill(&scid, scid.datalen, c->doq_socket->rnd);
+
+       ts = doq_get_timestamp_nanosec();
+
+       tokenlen = ngtcp2_crypto_generate_retry_token(token,
+               c->doq_socket->static_secret, c->doq_socket->static_secret_len,
+               hd->version, (void*)&paddr->addr, paddr->addrlen, &scid,
+               &hd->dcid, ts);
+       if(tokenlen < 0) {
+               log_err("ngtcp2_crypto_generate_retry_token failed: %s",
+                       ngtcp2_strerror(tokenlen));
+               return;
+       }
+
+       sldns_buffer_clear(c->doq_socket->pkt_buf);
+       ret = ngtcp2_crypto_write_retry(sldns_buffer_begin(c->doq_socket->pkt_buf),
+               sldns_buffer_capacity(c->doq_socket->pkt_buf), hd->version,
+               &hd->scid, &scid, &hd->dcid, token, tokenlen);
+       if(ret < 0) {
+               log_err("ngtcp2_crypto_write_retry failed: %s",
+                       ngtcp2_strerror(ret));
+               return;
+       }
+       sldns_buffer_set_position(c->doq_socket->pkt_buf, ret);
+       sldns_buffer_flip(c->doq_socket->pkt_buf);
+       doq_send_pkt(c, paddr, 0);
+}
+
+/** doq send stateless connection close */
+static void
+doq_send_stateless_connection_close(struct comm_point* c,
+       struct doq_pkt_addr* paddr, struct ngtcp2_pkt_hd* hd,
+       uint64_t error_code)
+{
+       ngtcp2_ssize ret;
+       sldns_buffer_clear(c->doq_socket->pkt_buf);
+       ret = ngtcp2_crypto_write_connection_close(
+               sldns_buffer_begin(c->doq_socket->pkt_buf),
+               sldns_buffer_capacity(c->doq_socket->pkt_buf), hd->version, &hd->scid,
+               &hd->dcid, error_code, NULL, 0);
+       if(ret < 0) {
+               log_err("ngtcp2_crypto_write_connection_close failed: %s",
+                       ngtcp2_strerror(ret));
+               return;
+       }
+       sldns_buffer_set_position(c->doq_socket->pkt_buf, ret);
+       sldns_buffer_flip(c->doq_socket->pkt_buf);
+       doq_send_pkt(c, paddr, 0);
+}
+
+/** doq verify retry token, false on failure */
+static int
+doq_verify_retry_token(struct comm_point* c, struct doq_pkt_addr* paddr,
+       struct ngtcp2_cid* ocid, struct ngtcp2_pkt_hd* hd)
+{
+       char host[256], port[32];
+       ngtcp2_tstamp ts;
+       if(!doq_print_addr_port(&paddr->addr, paddr->addrlen, host,
+               sizeof(host), port, sizeof(port))) {
+               log_err("doq_verify_retry_token failed");
+               return 0;
+       }
+       ts = doq_get_timestamp_nanosec();
+       verbose(VERB_ALGO, "doq: verifying retry token from %s %s", host,
+               port);
+       if(ngtcp2_crypto_verify_retry_token(ocid,
+#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
+               hd->token, hd->tokenlen,
+#else
+               hd->token.base, hd->token.len,
+#endif
+               c->doq_socket->static_secret,
+               c->doq_socket->static_secret_len, hd->version,
+               (void*)&paddr->addr, paddr->addrlen, &hd->dcid,
+               10*NGTCP2_SECONDS, ts) != 0) {
+               verbose(VERB_ALGO, "doq: could not verify retry token "
+                       "from %s %s", host, port);
+               return 0;
+       }
+       verbose(VERB_ALGO, "doq: verified retry token from %s %s", host, port);
+       return 1;
+}
+
+/** doq verify token, false on failure */
+static int
+doq_verify_token(struct comm_point* c, struct doq_pkt_addr* paddr,
+       struct ngtcp2_pkt_hd* hd)
+{
+       char host[256], port[32];
+       ngtcp2_tstamp ts;
+       if(!doq_print_addr_port(&paddr->addr, paddr->addrlen, host,
+               sizeof(host), port, sizeof(port))) {
+               log_err("doq_verify_token failed");
+               return 0;
+       }
+       ts = doq_get_timestamp_nanosec();
+       verbose(VERB_ALGO, "doq: verifying token from %s %s", host, port);
+       if(ngtcp2_crypto_verify_regular_token(
+#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
+               hd->token, hd->tokenlen,
+#else
+               hd->token.base, hd->token.len,
+#endif
+               c->doq_socket->static_secret, c->doq_socket->static_secret_len,
+               (void*)&paddr->addr, paddr->addrlen, 3600*NGTCP2_SECONDS,
+               ts) != 0) {
+               verbose(VERB_ALGO, "doq: could not verify token from %s %s",
+                       host, port);
+               return 0;
+       }
+       verbose(VERB_ALGO, "doq: verified token from %s %s", host, port);
+       return 1;
+}
+
+/** delete and remove from the lookup tree the doq_conn connection */
+static void
+doq_delete_connection(struct comm_point* c, struct doq_conn* conn)
+{
+       struct doq_conn copy;
+       uint8_t cid[NGTCP2_MAX_CIDLEN];
+       rbnode_type* node;
+       if(!conn)
+               return;
+       /* Copy the key and set it deleted. */
+       conn->is_deleted = 1;
+       doq_conn_write_disable(conn);
+       copy.key = conn->key;
+       log_assert(conn->key.dcidlen <= NGTCP2_MAX_CIDLEN);
+       memcpy(cid, conn->key.dcid, conn->key.dcidlen);
+       copy.key.dcid = cid;
+       copy.node.key = &copy;
+       lock_basic_unlock(&conn->lock);
+
+       /* Now get the table lock to delete it from the tree */
+       lock_rw_wrlock(&c->doq_socket->table->lock);
+       node = rbtree_delete(c->doq_socket->table->conn_tree, copy.node.key);
+       if(node) {
+               conn = (struct doq_conn*)node->key;
+               lock_basic_lock(&conn->lock);
+               doq_conn_write_list_remove(c->doq_socket->table, conn);
+               if(conn->timer.timer_in_list) {
+                       /* Remove timer from list first, because finding the
+                        * rbnode element of the setlist of same timeouts
+                        * needs tree lookup. Edit the tree structure after
+                        * that lookup. */
+                       doq_timer_list_remove(c->doq_socket->table,
+                               &conn->timer);
+               }
+               if(conn->timer.timer_in_tree)
+                       doq_timer_tree_remove(c->doq_socket->table,
+                               &conn->timer);
+       }
+       lock_rw_unlock(&c->doq_socket->table->lock);
+       if(node) {
+               lock_basic_unlock(&conn->lock);
+               doq_table_quic_size_subtract(c->doq_socket->table,
+                       sizeof(*conn)+conn->key.dcidlen);
+               doq_conn_delete(conn, c->doq_socket->table);
+       }
+}
+
+/** create and setup a new doq connection, to a new destination, or with
+ * a new dcid. It has a new set of streams. It is inserted in the lookup tree.
+ * Returns NULL on failure. */
+static struct doq_conn*
+doq_setup_new_conn(struct comm_point* c, struct doq_pkt_addr* paddr,
+       struct ngtcp2_pkt_hd* hd, struct ngtcp2_cid* ocid)
+{
+       struct doq_conn* conn;
+       if(!doq_table_quic_size_available(c->doq_socket->table,
+               c->doq_socket->cfg, sizeof(*conn)+hd->dcid.datalen
+               + sizeof(struct doq_stream)
+               + 100 /* estimated input query */
+               + 1200 /* estimated output query */)) {
+               verbose(VERB_ALGO, "doq: no mem available for new connection");
+               doq_send_stateless_connection_close(c, paddr, hd,
+                       NGTCP2_CONNECTION_REFUSED);
+               return NULL;
+       }
+       conn = doq_conn_create(c, paddr, hd->dcid.data, hd->dcid.datalen,
+               hd->version);
+       if(!conn) {
+               log_err("doq: could not allocate doq_conn");
+               return NULL;
+       }
+       lock_rw_wrlock(&c->doq_socket->table->lock);
+       lock_basic_lock(&conn->lock);
+       if(!rbtree_insert(c->doq_socket->table->conn_tree, &conn->node)) {
+               lock_rw_unlock(&c->doq_socket->table->lock);
+               log_err("doq: duplicate connection");
+               /* conn has no entry in writelist, and no timer yet. */
+               lock_basic_unlock(&conn->lock);
+               doq_conn_delete(conn, c->doq_socket->table);
+               return NULL;
+       }
+       lock_rw_unlock(&c->doq_socket->table->lock);
+       doq_table_quic_size_add(c->doq_socket->table,
+               sizeof(*conn)+conn->key.dcidlen);
+       verbose(VERB_ALGO, "doq: created new connection");
+
+       /* the scid and dcid switch meaning from the accepted client
+        * connection to the server connection. The 'source' and 'destination'
+        * meaning is reversed. */
+       if(!doq_conn_setup(conn, hd->scid.data, hd->scid.datalen,
+               (ocid?ocid->data:NULL), (ocid?ocid->datalen:0),
+#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
+               hd->token, hd->tokenlen
+#else
+               hd->token.base, hd->token.len
+#endif
+               )) {
+               log_err("doq: could not set up connection");
+               doq_delete_connection(c, conn);
+               return NULL;
+       }
+       return conn;
+}
+
+/** perform doq address validation */
+static int
+doq_address_validation(struct comm_point* c, struct doq_pkt_addr* paddr,
+       struct ngtcp2_pkt_hd* hd, struct ngtcp2_cid* ocid,
+       struct ngtcp2_cid** pocid)
+{
+#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
+       const uint8_t* token = hd->token;
+       size_t tokenlen = hd->tokenlen;
+#else
+       const uint8_t* token = hd->token.base;
+       size_t tokenlen = hd->token.len;
+#endif
+       verbose(VERB_ALGO, "doq stateless address validation");
+
+       if(tokenlen == 0 || token == NULL) {
+               doq_send_retry(c, paddr, hd);
+               return 0;
+       }
+       if(token[0] != NGTCP2_CRYPTO_TOKEN_MAGIC_RETRY &&
+               hd->dcid.datalen < NGTCP2_MIN_INITIAL_DCIDLEN) {
+               doq_send_stateless_connection_close(c, paddr, hd,
+                       NGTCP2_INVALID_TOKEN);
+               return 0;
+       }
+       if(token[0] == NGTCP2_CRYPTO_TOKEN_MAGIC_RETRY) {
+               if(!doq_verify_retry_token(c, paddr, ocid, hd)) {
+                       doq_send_stateless_connection_close(c, paddr, hd,
+                               NGTCP2_INVALID_TOKEN);
+                       return 0;
+               }
+               *pocid = ocid;
+       } else if(token[0] == NGTCP2_CRYPTO_TOKEN_MAGIC_REGULAR) {
+               if(!doq_verify_token(c, paddr, hd)) {
+                       doq_send_retry(c, paddr, hd);
+                       return 0;
+               }
+#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
+               hd->token = NULL;
+               hd->tokenlen = 0;
+#else
+               hd->token.base = NULL;
+               hd->token.len = 0;
+#endif
+       } else {
+               verbose(VERB_ALGO, "doq address validation: unrecognised "
+                       "token in hd.token.base with magic byte 0x%2.2x",
+                       (int)token[0]);
+               if(c->doq_socket->validate_addr) {
+                       doq_send_retry(c, paddr, hd);
+                       return 0;
+               }
+#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
+               hd->token = NULL;
+               hd->tokenlen = 0;
+#else
+               hd->token.base = NULL;
+               hd->token.len = 0;
+#endif
+       }
+       return 1;
+}
+
+/** the doq accept, returns false if no further processing of content */
+static int
+doq_accept(struct comm_point* c, struct doq_pkt_addr* paddr,
+       struct doq_conn** conn, struct ngtcp2_pkt_info* pi)
+{
+       int rv;
+       struct ngtcp2_pkt_hd hd;
+       struct ngtcp2_cid ocid, *pocid=NULL;
+       int err_retry;
+       memset(&hd, 0, sizeof(hd));
+       rv = ngtcp2_accept(&hd, sldns_buffer_begin(c->doq_socket->pkt_buf),
+               sldns_buffer_limit(c->doq_socket->pkt_buf));
+       if(rv != 0) {
+               if(rv == NGTCP2_ERR_RETRY) {
+                       doq_send_retry(c, paddr, &hd);
+                       return 0;
+               }
+               log_err("doq: initial packet failed, ngtcp2_accept failed: %s",
+                       ngtcp2_strerror(rv));
+               return 0;
+       }
+       if(c->doq_socket->validate_addr ||
+#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
+               hd.tokenlen
+#else
+               hd.token.len
+#endif
+               ) {
+               if(!doq_address_validation(c, paddr, &hd, &ocid, &pocid))
+                       return 0;
+       }
+       *conn = doq_setup_new_conn(c, paddr, &hd, pocid);
+       if(!*conn)
+               return 0;
+       (*conn)->doq_socket = c->doq_socket;
+       if(!doq_conn_recv(c, paddr, *conn, pi, &err_retry, NULL)) {
+               if(err_retry)
+                       doq_send_retry(c, paddr, &hd);
+               doq_delete_connection(c, *conn);
+               *conn = NULL;
+               return 0;
+       }
+       return 1;
+}
+
+/** doq pickup a timer to wait for for the worker. If any timer exists. */
+static void
+doq_pickup_timer(struct comm_point* c)
+{
+       struct doq_timer* t;
+       struct timeval tv;
+       int have_time = 0;
+       memset(&tv, 0, sizeof(tv));
+
+       lock_rw_wrlock(&c->doq_socket->table->lock);
+       RBTREE_FOR(t, struct doq_timer*, c->doq_socket->table->timer_tree) {
+               if(t->worker_doq_socket == NULL ||
+                       t->worker_doq_socket == c->doq_socket) {
+                       /* pick up this element */
+                       t->worker_doq_socket = c->doq_socket;
+                       have_time = 1;
+                       memcpy(&tv, &t->time, sizeof(tv));
+                       break;
+               }
+       }
+       lock_rw_unlock(&c->doq_socket->table->lock);
+
+       if(have_time) {
+               struct timeval rel;
+               timeval_subtract(&rel, &tv, c->doq_socket->now_tv);
+               comm_timer_set(c->doq_socket->timer, &rel);
+               memcpy(&c->doq_socket->marked_time, &tv,
+                       sizeof(c->doq_socket->marked_time));
+               verbose(VERB_ALGO, "doq pickup timer at %d.%6.6d in %d.%6.6d",
+                       (int)tv.tv_sec, (int)tv.tv_usec, (int)rel.tv_sec,
+                       (int)rel.tv_usec);
+       } else {
+               if(comm_timer_is_set(c->doq_socket->timer))
+                       comm_timer_disable(c->doq_socket->timer);
+               memset(&c->doq_socket->marked_time, 0,
+                       sizeof(c->doq_socket->marked_time));
+               verbose(VERB_ALGO, "doq timer disabled");
+       }
+}
+
+/** doq done with connection, release locks and setup timer and write */
+static void
+doq_done_setup_timer_and_write(struct comm_point* c, struct doq_conn* conn)
+{
+       struct doq_conn copy;
+       uint8_t cid[NGTCP2_MAX_CIDLEN];
+       rbnode_type* node;
+       struct timeval new_tv;
+       int write_change = 0, timer_change = 0;
+
+       /* No longer in callbacks, so the pointer to doq_socket is back
+        * to NULL. */
+       conn->doq_socket = NULL;
+
+       if(doq_conn_check_timer(conn, &new_tv))
+               timer_change = 1;
+       if( (conn->write_interest && !conn->on_write_list) ||
+               (!conn->write_interest && conn->on_write_list))
+               write_change = 1;
+
+       if(!timer_change && !write_change) {
+               /* Nothing to do. */
+               lock_basic_unlock(&conn->lock);
+               return;
+       }
+
+       /* The table lock is needed to change the write list and timer tree.
+        * So the connection lock is release and then the connection is
+        * looked up again. */
+       copy.key = conn->key;
+       log_assert(conn->key.dcidlen <= NGTCP2_MAX_CIDLEN);
+       memcpy(cid, conn->key.dcid, conn->key.dcidlen);
+       copy.key.dcid = cid;
+       copy.node.key = &copy;
+       lock_basic_unlock(&conn->lock);
+
+       lock_rw_wrlock(&c->doq_socket->table->lock);
+       node = rbtree_search(c->doq_socket->table->conn_tree, copy.node.key);
+       if(!node) {
+               lock_rw_unlock(&c->doq_socket->table->lock);
+               /* Must have been deleted in the mean time. */
+               return;
+       }
+       conn = (struct doq_conn*)node->key;
+       lock_basic_lock(&conn->lock);
+       if(conn->is_deleted) {
+               /* It is deleted now. */
+               lock_rw_unlock(&c->doq_socket->table->lock);
+               lock_basic_unlock(&conn->lock);
+               return;
+       }
+
+       if(write_change) {
+               /* Edit the write lists, we are holding the table.lock and can
+                * edit the list first,last and also prev,next and on_list
+                * elements in the doq_conn structures. */
+               doq_conn_set_write_list(c->doq_socket->table, conn);
+       }
+       if(timer_change) {
+               doq_timer_set(c->doq_socket->table, &conn->timer,
+                       c->doq_socket, &new_tv);
+       }
+       lock_rw_unlock(&c->doq_socket->table->lock);
+       lock_basic_unlock(&conn->lock);
+}
+
+/** doq done with connection callbacks, release locks and setup write */
+static void
+doq_done_with_conn_cb(struct comm_point* c, struct doq_conn* conn)
+{
+       struct doq_conn copy;
+       uint8_t cid[NGTCP2_MAX_CIDLEN];
+       rbnode_type* node;
+
+       /* no longer in callbacks, so the pointer to doq_socket is back
+        * to NULL. */
+       conn->doq_socket = NULL;
+
+       if( (conn->write_interest && conn->on_write_list) ||
+               (!conn->write_interest && !conn->on_write_list)) {
+               /* The connection already has the required write list
+                * status. */
+               lock_basic_unlock(&conn->lock);
+               return;
+       }
+
+       /* To edit the write list of connections we have to hold the table
+        * lock, so we release the connection and then look it up again. */
+       copy.key = conn->key;
+       log_assert(conn->key.dcidlen <= NGTCP2_MAX_CIDLEN);
+       memcpy(cid, conn->key.dcid, conn->key.dcidlen);
+       copy.key.dcid = cid;
+       copy.node.key = &copy;
+       lock_basic_unlock(&conn->lock);
+
+       lock_rw_wrlock(&c->doq_socket->table->lock);
+       node = rbtree_search(c->doq_socket->table->conn_tree, copy.node.key);
+       if(!node) {
+               lock_rw_unlock(&c->doq_socket->table->lock);
+               /* must have been deleted in the mean time */
+               return;
+       }
+       conn = (struct doq_conn*)node->key;
+       lock_basic_lock(&conn->lock);
+       if(conn->is_deleted) {
+               /* it is deleted now. */
+               lock_rw_unlock(&c->doq_socket->table->lock);
+               lock_basic_unlock(&conn->lock);
+               return;
+       }
+
+       /* edit the write lists, we are holding the table.lock and can
+        * edit the list first,last and also prev,next and on_list elements
+        * in the doq_conn structures. */
+       doq_conn_set_write_list(c->doq_socket->table, conn);
+       lock_rw_unlock(&c->doq_socket->table->lock);
+       lock_basic_unlock(&conn->lock);
+}
+
+/** doq count the length of the write list */
+static size_t
+doq_write_list_length(struct comm_point* c)
+{
+       size_t count = 0;
+       struct doq_conn* conn;
+       lock_rw_rdlock(&c->doq_socket->table->lock);
+       conn = c->doq_socket->table->write_list_first;
+       while(conn) {
+               count++;
+               conn = conn->write_next;
+       }
+       lock_rw_unlock(&c->doq_socket->table->lock);
+       return count;
+}
+
+/** doq pop the first element from the write list to have write events */
+static struct doq_conn*
+doq_pop_write_conn(struct comm_point* c)
+{
+       struct doq_conn* conn;
+       lock_rw_wrlock(&c->doq_socket->table->lock);
+       conn = doq_table_pop_first(c->doq_socket->table);
+       while(conn && conn->is_deleted) {
+               lock_basic_unlock(&conn->lock);
+               conn = doq_table_pop_first(c->doq_socket->table);
+       }
+       lock_rw_unlock(&c->doq_socket->table->lock);
+       if(conn)
+               conn->doq_socket = c->doq_socket;
+       return conn;
+}
+
+/** doq the connection is done with write callbacks, release it. */
+static void
+doq_done_with_write_cb(struct comm_point* c, struct doq_conn* conn,
+       int delete_it)
+{
+       if(delete_it) {
+               doq_delete_connection(c, conn);
+               return;
+       }
+       doq_done_setup_timer_and_write(c, conn);
+}
+
+/** see if the doq socket wants to write packets */
+static int
+doq_socket_want_write(struct comm_point* c)
+{
+       int want_write = 0;
+       if(c->doq_socket->have_blocked_pkt)
+               return 1;
+       lock_rw_rdlock(&c->doq_socket->table->lock);
+       if(c->doq_socket->table->write_list_first)
+               want_write = 1;
+       lock_rw_unlock(&c->doq_socket->table->lock);
+       return want_write;
+}
+
+/** enable write event for the doq server socket fd */
+static void
+doq_socket_write_enable(struct comm_point* c)
+{
+       verbose(VERB_ALGO, "doq socket want write");
+       if(c->doq_socket->event_has_write)
+               return;
+       comm_point_listen_for_rw(c, 1, 1);
+       c->doq_socket->event_has_write = 1;
+}
+
+/** disable write event for the doq server socket fd */
+static void
+doq_socket_write_disable(struct comm_point* c)
+{
+       verbose(VERB_ALGO, "doq socket want no write");
+       if(!c->doq_socket->event_has_write)
+               return;
+       comm_point_listen_for_rw(c, 1, 0);
+       c->doq_socket->event_has_write = 0;
+}
+
+/** write blocked packet, if possible. returns false if failed, again. */
+static int
+doq_write_blocked_pkt(struct comm_point* c)
+{
+       struct doq_pkt_addr paddr;
+       if(!c->doq_socket->have_blocked_pkt)
+               return 1;
+       c->doq_socket->have_blocked_pkt = 0;
+       if(sldns_buffer_limit(c->doq_socket->blocked_pkt) >
+               sldns_buffer_remaining(c->doq_socket->pkt_buf))
+               return 1; /* impossibly large, drop it.
+               impossible since pkt_buf is same size as blocked_pkt buf. */
+       sldns_buffer_clear(c->doq_socket->pkt_buf);
+       sldns_buffer_write(c->doq_socket->pkt_buf,
+               sldns_buffer_begin(c->doq_socket->blocked_pkt),
+               sldns_buffer_limit(c->doq_socket->blocked_pkt));
+       sldns_buffer_flip(c->doq_socket->pkt_buf);
+       memcpy(&paddr, c->doq_socket->blocked_paddr, sizeof(paddr));
+       doq_send_pkt(c, &paddr, c->doq_socket->blocked_pkt_pi.ecn);
+       if(c->doq_socket->have_blocked_pkt)
+               return 0;
+       return 1;
+}
+
+/** doq find a timer that timeouted and return the conn, locked. */
+static struct doq_conn*
+doq_timer_timeout_conn(struct doq_server_socket* doq_socket)
+{
+       struct doq_conn* conn = NULL;
+       struct rbnode_type* node;
+       lock_rw_wrlock(&doq_socket->table->lock);
+       node = rbtree_first(doq_socket->table->timer_tree);
+       if(node && node != RBTREE_NULL) {
+               struct doq_timer* t = (struct doq_timer*)node;
+               conn = t->conn;
+
+               /* If now < timer then no further timeouts in tree. */
+               if(timeval_smaller(doq_socket->now_tv, &t->time)) {
+                       lock_rw_unlock(&doq_socket->table->lock);
+                       return NULL;
+               }
+
+               lock_basic_lock(&conn->lock);
+               conn->doq_socket = doq_socket;
+
+               /* Now that the timer is fired, remove it. */
+               doq_timer_unset(doq_socket->table, t);
+               lock_rw_unlock(&doq_socket->table->lock);
+               return conn;
+       }
+       lock_rw_unlock(&doq_socket->table->lock);
+       return NULL;
+}
+
+/** doq timer erase the marker that said which timer the worker uses. */
+static void
+doq_timer_erase_marker(struct doq_server_socket* doq_socket)
+{
+       struct doq_timer* t;
+       lock_rw_wrlock(&doq_socket->table->lock);
+       t = doq_timer_find_time(doq_socket->table, &doq_socket->marked_time);
+       if(t && t->worker_doq_socket == doq_socket)
+               t->worker_doq_socket = NULL;
+       lock_rw_unlock(&doq_socket->table->lock);
+       memset(&doq_socket->marked_time, 0, sizeof(doq_socket->marked_time));
+}
+
+void
+doq_timer_cb(void* arg)
+{
+       struct doq_server_socket* doq_socket = (struct doq_server_socket*)arg;
+       struct doq_conn* conn;
+       verbose(VERB_ALGO, "doq timer callback");
+
+       doq_timer_erase_marker(doq_socket);
 
-               fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
-               if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
-                       /* send back immediate reply */
-                       struct sldns_buffer *buffer;
-#ifdef USE_DNSCRYPT
-                       buffer = rep.c->dnscrypt_buffer;
+       while((conn = doq_timer_timeout_conn(doq_socket)) != NULL) {
+               if(conn->is_deleted ||
+#ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD
+                       ngtcp2_conn_in_closing_period(conn->conn) ||
 #else
-                       buffer = rep.c->buffer;
+                       ngtcp2_conn_is_in_closing_period(conn->conn) ||
 #endif
-                       (void)comm_point_send_udp_msg_if(rep.c, buffer,
-                               (struct sockaddr*)&rep.remote_addr,
-                               rep.remote_addrlen, &rep);
+#ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD
+                       ngtcp2_conn_in_draining_period(conn->conn)
+#else
+                       ngtcp2_conn_is_in_draining_period(conn->conn)
+#endif
+                       ) {
+                       if(verbosity >= VERB_ALGO) {
+                               char remotestr[256];
+                               addr_to_str((void*)&conn->key.paddr.addr,
+                                       conn->key.paddr.addrlen, remotestr,
+                                       sizeof(remotestr));
+                               verbose(VERB_ALGO, "doq conn %s is deleted "
+                                       "after timeout", remotestr);
+                       }
+                       doq_delete_connection(doq_socket->cp, conn);
+                       continue;
                }
-               if(!rep.c || rep.c->fd == -1) /* commpoint closed */
-                       break;
+               if(!doq_conn_handle_timeout(conn))
+                       doq_delete_connection(doq_socket->cp, conn);
+               else doq_done_setup_timer_and_write(doq_socket->cp, conn);
        }
+
+       if(doq_socket_want_write(doq_socket->cp))
+               doq_socket_write_enable(doq_socket->cp);
+       else doq_socket_write_disable(doq_socket->cp);
+       doq_pickup_timer(doq_socket->cp);
 }
-#endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */
 
 void
-comm_point_udp_callback(int fd, short event, void* arg)
+comm_point_doq_callback(int fd, short event, void* arg)
 {
-       struct comm_reply rep;
-       ssize_t rcv;
-       int i;
-       struct sldns_buffer *buffer;
+       struct comm_point* c;
+       struct doq_pkt_addr paddr;
+       int i, pkt_continue, err_drop;
+       struct doq_conn* conn;
+       struct ngtcp2_pkt_info pi;
+       size_t count, num_len;
 
-       rep.c = (struct comm_point*)arg;
-       log_assert(rep.c->type == comm_udp);
+       c = (struct comm_point*)arg;
+       log_assert(c->type == comm_doq);
 
-       if(!(event&UB_EV_READ))
-               return;
-       log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
-       ub_comm_base_now(rep.c->ev->base);
-       for(i=0; i<NUM_UDP_PER_SELECT; i++) {
-               sldns_buffer_clear(rep.c->buffer);
-               rep.remote_addrlen = (socklen_t)sizeof(rep.remote_addr);
-               log_assert(fd != -1);
-               log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
-               rcv = recvfrom(fd, (void*)sldns_buffer_begin(rep.c->buffer),
-                       sldns_buffer_remaining(rep.c->buffer), MSG_DONTWAIT,
-                       (struct sockaddr*)&rep.remote_addr, &rep.remote_addrlen);
-               if(rcv == -1) {
-#ifndef USE_WINSOCK
-                       if(errno != EAGAIN && errno != EINTR
-                               && udp_recv_needs_log(errno))
-                               log_err("recvfrom %d failed: %s",
-                                       fd, strerror(errno));
+       log_assert(c && c->doq_socket->pkt_buf && c->fd == fd);
+       ub_comm_base_now(c->ev->base);
+
+       /* see if there is a blocked packet, and send that if possible.
+        * do not attempt to read yet, even if possible, that would just
+        * push more answers in reply to those read packets onto the list
+        * of written replies. First attempt to clear the write content out.
+        * That keeps the memory usage from bloating up. */
+       if(c->doq_socket->have_blocked_pkt) {
+               if(!doq_write_blocked_pkt(c)) {
+                       /* this write has also blocked, attempt to write
+                        * later. Make sure the event listens to write
+                        * events. */
+                       if(!c->doq_socket->event_has_write)
+                               doq_socket_write_enable(c);
+                       doq_pickup_timer(c);
+                       return;
+               }
+       }
+
+       /* see if there is write interest */
+       count = 0;
+       num_len = doq_write_list_length(c);
+       while((conn = doq_pop_write_conn(c)) != NULL) {
+               if(conn->is_deleted ||
+#ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD
+                       ngtcp2_conn_in_closing_period(conn->conn) ||
 #else
-                       if(WSAGetLastError() != WSAEINPROGRESS &&
-                               WSAGetLastError() != WSAECONNRESET &&
-                               WSAGetLastError()!= WSAEWOULDBLOCK &&
-                               udp_recv_needs_log(WSAGetLastError()))
-                               log_err("recvfrom failed: %s",
-                                       wsa_strerror(WSAGetLastError()));
+                       ngtcp2_conn_is_in_closing_period(conn->conn) ||
 #endif
+#ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD
+                       ngtcp2_conn_in_draining_period(conn->conn)
+#else
+                       ngtcp2_conn_is_in_draining_period(conn->conn)
+#endif
+                       ) {
+                       conn->doq_socket = NULL;
+                       lock_basic_unlock(&conn->lock);
+                       if(c->doq_socket->have_blocked_pkt) {
+                               if(!c->doq_socket->event_has_write)
+                                       doq_socket_write_enable(c);
+                               doq_pickup_timer(c);
+                               return;
+                       }
+                       if(++count > num_len*2)
+                               break;
+                       continue;
+               }
+               if(verbosity >= VERB_ALGO) {
+                       char remotestr[256];
+                       addr_to_str((void*)&conn->key.paddr.addr,
+                               conn->key.paddr.addrlen, remotestr,
+                               sizeof(remotestr));
+                       verbose(VERB_ALGO, "doq write connection %s %d",
+                               remotestr, doq_sockaddr_get_port(
+                               &conn->key.paddr.addr));
+               }
+               if(doq_conn_write_streams(c, conn, &err_drop))
+                       err_drop = 0;
+               doq_done_with_write_cb(c, conn, err_drop);
+               if(c->doq_socket->have_blocked_pkt) {
+                       if(!c->doq_socket->event_has_write)
+                               doq_socket_write_enable(c);
+                       doq_pickup_timer(c);
                        return;
                }
-               sldns_buffer_skip(rep.c->buffer, rcv);
-               sldns_buffer_flip(rep.c->buffer);
-               rep.srctype = 0;
-               rep.is_proxied = 0;
+               /* Stop overly long write lists that are created
+                * while we are processing. Do those next time there
+                * is a write callback. Stops long loops, and keeps
+                * fair for other events. */
+               if(++count > num_len*2)
+                       break;
+       }
 
-               if(rep.c->pp2_enabled && !consume_pp2_header(rep.c->buffer,
-                       &rep, 0)) {
-                       log_err("proxy_protocol: could not consume PROXYv2 header");
+       /* check for data to read */
+       if((event&UB_EV_READ)!=0)
+         for(i=0; i<NUM_UDP_PER_SELECT; i++) {
+               /* there may be a blocked write packet and if so, stop
+                * reading because the reply cannot get written. The
+                * blocked packet could be written during the conn_recv
+                * handling of replies, or for a connection close. */
+               if(c->doq_socket->have_blocked_pkt) {
+                       if(!c->doq_socket->event_has_write)
+                               doq_socket_write_enable(c);
+                       doq_pickup_timer(c);
                        return;
                }
-               if(!rep.is_proxied) {
-                       rep.client_addrlen = rep.remote_addrlen;
-                       memmove(&rep.client_addr, &rep.remote_addr,
-                               rep.remote_addrlen);
+               sldns_buffer_clear(c->doq_socket->pkt_buf);
+               doq_pkt_addr_init(&paddr);
+               log_assert(fd != -1);
+               log_assert(sldns_buffer_remaining(c->doq_socket->pkt_buf) > 0);
+               if(!doq_recv(c, &paddr, &pkt_continue, &pi)) {
+                       if(pkt_continue)
+                               continue;
+                       break;
                }
 
-               fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
-               if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
-                       /* send back immediate reply */
-#ifdef USE_DNSCRYPT
-                       buffer = rep.c->dnscrypt_buffer;
+               /* handle incoming packet from remote addr to localaddr */
+               if(verbosity >= VERB_ALGO) {
+                       char remotestr[256], localstr[256];
+                       addr_to_str((void*)&paddr.addr, paddr.addrlen,
+                               remotestr, sizeof(remotestr));
+                       addr_to_str((void*)&paddr.localaddr,
+                               paddr.localaddrlen, localstr,
+                               sizeof(localstr));
+                       log_info("incoming doq packet from %s port %d on "
+                               "%s port %d ifindex %d",
+                               remotestr, doq_sockaddr_get_port(&paddr.addr),
+                               localstr,
+                               doq_sockaddr_get_port(&paddr.localaddr),
+                               paddr.ifindex);
+                       log_info("doq_recv length %d ecn 0x%x",
+                               (int)sldns_buffer_limit(c->doq_socket->pkt_buf),
+                               (int)pi.ecn);
+               }
+
+               if(sldns_buffer_limit(c->doq_socket->pkt_buf) == 0)
+                       continue;
+
+               conn = NULL;
+               if(!doq_decode_pkt_header_negotiate(c, &paddr, &conn))
+                       continue;
+               if(!conn) {
+                       if(!doq_accept(c, &paddr, &conn, &pi))
+                               continue;
+                       if(!doq_conn_write_streams(c, conn, NULL)) {
+                               doq_delete_connection(c, conn);
+                               continue;
+                       }
+                       doq_done_setup_timer_and_write(c, conn);
+                       continue;
+               }
+               if(
+#ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD
+                       ngtcp2_conn_in_closing_period(conn->conn)
 #else
-                       buffer = rep.c->buffer;
+                       ngtcp2_conn_is_in_closing_period(conn->conn)
 #endif
-                       (void)comm_point_send_udp_msg(rep.c, buffer,
-                               (struct sockaddr*)&rep.remote_addr,
-                               rep.remote_addrlen, 0);
+                       ) {
+                       if(!doq_conn_send_close(c, conn)) {
+                               doq_delete_connection(c, conn);
+                       } else {
+                               doq_done_setup_timer_and_write(c, conn);
+                       }
+                       continue;
                }
-               if(!rep.c || rep.c->fd != fd) /* commpoint closed to -1 or reused for
-               another UDP port. Note rep.c cannot be reused with TCP fd. */
-                       break;
+               if(
+#ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD
+                       ngtcp2_conn_in_draining_period(conn->conn)
+#else
+                       ngtcp2_conn_is_in_draining_period(conn->conn)
+#endif
+                       ) {
+                       doq_done_setup_timer_and_write(c, conn);
+                       continue;
+               }
+               if(!doq_conn_recv(c, &paddr, conn, &pi, NULL, &err_drop)) {
+                       /* The receive failed, and if it also failed to send
+                        * a close, drop the connection. That means it is not
+                        * in the closing period. */
+                       if(err_drop) {
+                               doq_delete_connection(c, conn);
+                       } else {
+                               doq_done_setup_timer_and_write(c, conn);
+                       }
+                       continue;
+               }
+               if(!doq_conn_write_streams(c, conn, &err_drop)) {
+                       if(err_drop) {
+                               doq_delete_connection(c, conn);
+                       } else {
+                               doq_done_setup_timer_and_write(c, conn);
+                       }
+                       continue;
+               }
+               doq_done_setup_timer_and_write(c, conn);
+       }
+
+       /* see if we want to have more write events */
+       verbose(VERB_ALGO, "doq check write enable");
+       if(doq_socket_want_write(c))
+               doq_socket_write_enable(c);
+       else doq_socket_write_disable(c);
+       doq_pickup_timer(c);
+}
+
+/** create new doq server socket structure */
+static struct doq_server_socket*
+doq_server_socket_create(struct doq_table* table, struct ub_randstate* rnd,
+       const char* ssl_service_key, const char* ssl_service_pem,
+       struct comm_point* c, struct comm_base* base, struct config_file* cfg)
+{
+       size_t doq_buffer_size = 4096; /* bytes buffer size, for one packet. */
+       struct doq_server_socket* doq_socket;
+       doq_socket = calloc(1, sizeof(*doq_socket));
+       if(!doq_socket) {
+               return NULL;
+       }
+       doq_socket->table = table;
+       doq_socket->rnd = rnd;
+       doq_socket->validate_addr = 1;
+       if(ssl_service_key == NULL || ssl_service_key[0]==0) {
+               log_err("doq server socket create: no tls-service-key");
+               free(doq_socket);
+               return NULL;
+       }
+       if(ssl_service_pem == NULL || ssl_service_pem[0]==0) {
+               log_err("doq server socket create: no tls-service-pem");
+               free(doq_socket);
+               return NULL;
+       }
+       doq_socket->ssl_service_key = strdup(ssl_service_key);
+       if(!doq_socket->ssl_service_key) {
+               free(doq_socket);
+               return NULL;
+       }
+       doq_socket->ssl_service_pem = strdup(ssl_service_pem);
+       if(!doq_socket->ssl_service_pem) {
+               free(doq_socket->ssl_service_key);
+               free(doq_socket);
+               return NULL;
+       }
+       doq_socket->ssl_verify_pem = NULL;
+       /* the doq_socket has its own copy of the static secret, as
+        * well as other config values, so that they do not need table.lock */
+       doq_socket->static_secret_len = table->static_secret_len;
+       doq_socket->static_secret = memdup(table->static_secret,
+               table->static_secret_len);
+       if(!doq_socket->static_secret) {
+               free(doq_socket->ssl_service_key);
+               free(doq_socket->ssl_service_pem);
+               free(doq_socket->ssl_verify_pem);
+               free(doq_socket);
+               return NULL;
+       }
+       if(!doq_socket_setup_ctx(doq_socket)) {
+               free(doq_socket->ssl_service_key);
+               free(doq_socket->ssl_service_pem);
+               free(doq_socket->ssl_verify_pem);
+               free(doq_socket->static_secret);
+               free(doq_socket);
+               return NULL;
+       }
+       doq_socket->idle_timeout = table->idle_timeout;
+       doq_socket->sv_scidlen = table->sv_scidlen;
+       doq_socket->cp = c;
+       doq_socket->pkt_buf = sldns_buffer_new(doq_buffer_size);
+       if(!doq_socket->pkt_buf) {
+               free(doq_socket->ssl_service_key);
+               free(doq_socket->ssl_service_pem);
+               free(doq_socket->ssl_verify_pem);
+               free(doq_socket->static_secret);
+               SSL_CTX_free(doq_socket->ctx);
+               free(doq_socket);
+               return NULL;
+       }
+       doq_socket->blocked_pkt = sldns_buffer_new(
+               sldns_buffer_capacity(doq_socket->pkt_buf));
+       if(!doq_socket->pkt_buf) {
+               free(doq_socket->ssl_service_key);
+               free(doq_socket->ssl_service_pem);
+               free(doq_socket->ssl_verify_pem);
+               free(doq_socket->static_secret);
+               SSL_CTX_free(doq_socket->ctx);
+               sldns_buffer_free(doq_socket->pkt_buf);
+               free(doq_socket);
+               return NULL;
+       }
+       doq_socket->blocked_paddr = calloc(1,
+               sizeof(*doq_socket->blocked_paddr));
+       if(!doq_socket->blocked_paddr) {
+               free(doq_socket->ssl_service_key);
+               free(doq_socket->ssl_service_pem);
+               free(doq_socket->ssl_verify_pem);
+               free(doq_socket->static_secret);
+               SSL_CTX_free(doq_socket->ctx);
+               sldns_buffer_free(doq_socket->pkt_buf);
+               sldns_buffer_free(doq_socket->blocked_pkt);
+               free(doq_socket);
+               return NULL;
+       }
+       doq_socket->timer = comm_timer_create(base, doq_timer_cb, doq_socket);
+       if(!doq_socket->timer) {
+               free(doq_socket->ssl_service_key);
+               free(doq_socket->ssl_service_pem);
+               free(doq_socket->ssl_verify_pem);
+               free(doq_socket->static_secret);
+               SSL_CTX_free(doq_socket->ctx);
+               sldns_buffer_free(doq_socket->pkt_buf);
+               sldns_buffer_free(doq_socket->blocked_pkt);
+               free(doq_socket->blocked_paddr);
+               free(doq_socket);
+               return NULL;
+       }
+       memset(&doq_socket->marked_time, 0, sizeof(doq_socket->marked_time));
+       comm_base_timept(base, &doq_socket->now_tt, &doq_socket->now_tv);
+       doq_socket->cfg = cfg;
+       return doq_socket;
+}
+
+/** delete doq server socket structure */
+static void
+doq_server_socket_delete(struct doq_server_socket* doq_socket)
+{
+       if(!doq_socket)
+               return;
+       free(doq_socket->static_secret);
+       SSL_CTX_free(doq_socket->ctx);
+#ifndef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT
+       free(doq_socket->quic_method);
+#endif
+       free(doq_socket->ssl_service_key);
+       free(doq_socket->ssl_service_pem);
+       free(doq_socket->ssl_verify_pem);
+       sldns_buffer_free(doq_socket->pkt_buf);
+       sldns_buffer_free(doq_socket->blocked_pkt);
+       free(doq_socket->blocked_paddr);
+       comm_timer_delete(doq_socket->timer);
+       free(doq_socket);
+}
+
+/** find repinfo in the doq table */
+static struct doq_conn*
+doq_lookup_repinfo(struct doq_table* table, struct comm_reply* repinfo)
+{
+       struct doq_conn* conn;
+       struct doq_conn_key key;
+       doq_conn_key_from_repinfo(&key, repinfo);
+       lock_rw_rdlock(&table->lock);
+       conn = doq_conn_find(table, &key.paddr.addr,
+               key.paddr.addrlen, &key.paddr.localaddr,
+               key.paddr.localaddrlen, key.paddr.ifindex, key.dcid,
+               key.dcidlen);
+       if(conn) {
+               lock_basic_lock(&conn->lock);
+               lock_rw_unlock(&table->lock);
+               return conn;
+       }
+       lock_rw_unlock(&table->lock);
+       return NULL;
+}
+
+/** doq find connection and stream. From inside callbacks from worker. */
+static int
+doq_lookup_conn_stream(struct comm_reply* repinfo, struct comm_point* c,
+       struct doq_conn** conn, struct doq_stream** stream)
+{
+       if(c->doq_socket->current_conn) {
+               *conn = c->doq_socket->current_conn;
+       } else {
+               *conn = doq_lookup_repinfo(c->doq_socket->table, repinfo);
+               if((*conn) && (*conn)->is_deleted) {
+                       lock_basic_unlock(&(*conn)->lock);
+                       *conn = NULL;
+               }
+               if(*conn) {
+                       (*conn)->doq_socket = c->doq_socket;
+               }
+       }
+       if(!*conn) {
+               *stream = NULL;
+               return 0;
+       }
+       *stream = doq_stream_find(*conn, repinfo->doq_streamid);
+       if(!*stream) {
+               if(!c->doq_socket->current_conn) {
+                       /* Not inside callbacks, we have our own lock on conn.
+                        * Release it. */
+                       lock_basic_unlock(&(*conn)->lock);
+               }
+               return 0;
+       }
+       if((*stream)->is_closed) {
+               /* stream is closed, ignore reply or drop */
+               if(!c->doq_socket->current_conn) {
+                       /* Not inside callbacks, we have our own lock on conn.
+                        * Release it. */
+                       lock_basic_unlock(&(*conn)->lock);
+               }
+               return 0;
+       }
+       return 1;
+}
+
+/** doq send a reply from a comm reply */
+static void
+doq_socket_send_reply(struct comm_reply* repinfo)
+{
+       struct doq_conn* conn;
+       struct doq_stream* stream;
+       log_assert(repinfo->c->type == comm_doq);
+       if(!doq_lookup_conn_stream(repinfo, repinfo->c, &conn, &stream)) {
+               verbose(VERB_ALGO, "doq: send_reply but %s is gone",
+                       (conn?"stream":"connection"));
+               /* No stream, it may have been closed. */
+               /* Drop the reply, it cannot be sent. */
+               return;
+       }
+       if(!doq_stream_send_reply(conn, stream, repinfo->c->buffer))
+               doq_stream_close(conn, stream, 1);
+       if(!repinfo->c->doq_socket->current_conn) {
+               /* Not inside callbacks, we have our own lock on conn.
+                * Release it. */
+               doq_done_with_conn_cb(repinfo->c, conn);
+               /* since we sent a reply, or closed it, the assumption is
+                * that there is something to write, so enable write event.
+                * It waits until the write event happens to write the
+                * streams with answers, this allows some answers to be
+                * answered before the event loop reaches the doq fd, in
+                * repinfo->c->fd, and that collates answers. That would
+                * not happen if we write doq packets right now. */
+               doq_socket_write_enable(repinfo->c);
+       }
+}
+
+/** doq drop a reply from a comm reply */
+static void
+doq_socket_drop_reply(struct comm_reply* repinfo)
+{
+       struct doq_conn* conn;
+       struct doq_stream* stream;
+       log_assert(repinfo->c->type == comm_doq);
+       if(!doq_lookup_conn_stream(repinfo, repinfo->c, &conn, &stream)) {
+               verbose(VERB_ALGO, "doq: drop_reply but %s is gone",
+                       (conn?"stream":"connection"));
+               /* The connection or stream is already gone. */
+               return;
+       }
+       doq_stream_close(conn, stream, 1);
+       if(!repinfo->c->doq_socket->current_conn) {
+               /* Not inside callbacks, we have our own lock on conn.
+                * Release it. */
+               doq_done_with_conn_cb(repinfo->c, conn);
+               doq_socket_write_enable(repinfo->c);
        }
 }
+#endif /* HAVE_NGTCP2 */
 
 int adjusted_tcp_timeout(struct comm_point* c)
 {
@@ -4081,6 +5857,96 @@ comm_point_create_udp_ancil(struct comm_base *base, int fd,
 }
 #endif
 
+struct comm_point*
+comm_point_create_doq(struct comm_base *base, int fd, sldns_buffer* buffer,
+       comm_point_callback_type* callback, void* callback_arg,
+       struct unbound_socket* socket, struct doq_table* table,
+       struct ub_randstate* rnd, const char* ssl_service_key,
+       const char* ssl_service_pem, struct config_file* cfg)
+{
+#ifdef HAVE_NGTCP2
+       struct comm_point* c = (struct comm_point*)calloc(1,
+               sizeof(struct comm_point));
+       short evbits;
+       if(!c)
+               return NULL;
+       c->ev = (struct internal_event*)calloc(1,
+               sizeof(struct internal_event));
+       if(!c->ev) {
+               free(c);
+               return NULL;
+       }
+       c->ev->base = base;
+       c->fd = fd;
+       c->buffer = buffer;
+       c->timeout = NULL;
+       c->tcp_is_reading = 0;
+       c->tcp_byte_count = 0;
+       c->tcp_parent = NULL;
+       c->max_tcp_count = 0;
+       c->cur_tcp_count = 0;
+       c->tcp_handlers = NULL;
+       c->tcp_free = NULL;
+       c->type = comm_doq;
+       c->tcp_do_close = 0;
+       c->do_not_close = 0;
+       c->tcp_do_toggle_rw = 0;
+       c->tcp_check_nb_connect = 0;
+#ifdef USE_MSG_FASTOPEN
+       c->tcp_do_fastopen = 0;
+#endif
+#ifdef USE_DNSCRYPT
+       c->dnscrypt = 0;
+       c->dnscrypt_buffer = NULL;
+#endif
+#ifdef HAVE_NGTCP2
+       c->doq_socket = doq_server_socket_create(table, rnd, ssl_service_key,
+               ssl_service_pem, c, base, cfg);
+       if(!c->doq_socket) {
+               log_err("could not create doq comm_point");
+               comm_point_delete(c);
+               return NULL;
+       }
+#endif
+       c->inuse = 0;
+       c->callback = callback;
+       c->cb_arg = callback_arg;
+       c->socket = socket;
+       c->pp2_enabled = 0;
+       c->pp2_header_state = pp2_header_none;
+       evbits = UB_EV_READ | UB_EV_PERSIST;
+       /* ub_event stuff */
+       c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
+               comm_point_doq_callback, c);
+       if(c->ev->ev == NULL) {
+               log_err("could not baseset udp event");
+               comm_point_delete(c);
+               return NULL;
+       }
+       if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) {
+               log_err("could not add udp event");
+               comm_point_delete(c);
+               return NULL;
+       }
+       c->event_added = 1;
+       return c;
+#else
+       /* no libngtcp2, so no QUIC support */
+       (void)base;
+       (void)buffer;
+       (void)callback;
+       (void)callback_arg;
+       (void)socket;
+       (void)rnd;
+       (void)table;
+       (void)ssl_service_key;
+       (void)ssl_service_pem;
+       (void)cfg;
+       sock_close(fd);
+       return NULL;
+#endif /* HAVE_NGTCP2 */
+}
+
 static struct comm_point*
 comm_point_create_tcp_handler(struct comm_base *base,
        struct comm_point* parent, size_t bufsize,
@@ -4749,6 +6615,10 @@ comm_point_delete(struct comm_point* c)
                        http2_session_delete(c->h2_session);
                }
        }
+#ifdef HAVE_NGTCP2
+       if(c->doq_socket)
+               doq_server_socket_delete(c->doq_socket);
+#endif
        ub_event_free(c->ev->ev);
        free(c->ev);
        free(c);
@@ -4811,6 +6681,10 @@ comm_point_send_reply(struct comm_reply *repinfo)
                        comm_point_start_listening(repinfo->c, -1,
                                adjusted_tcp_timeout(repinfo->c));
                        return;
+#ifdef HAVE_NGTCP2
+               } else if(repinfo->c->doq_socket) {
+                       doq_socket_send_reply(repinfo);
+#endif
                } else {
                        comm_point_start_listening(repinfo->c, -1,
                                adjusted_tcp_timeout(repinfo->c));
@@ -4838,6 +6712,11 @@ comm_point_drop_reply(struct comm_reply* repinfo)
                }
                reclaim_http_handler(repinfo->c);
                return;
+#ifdef HAVE_NGTCP2
+       } else if(repinfo->c->type == comm_doq) {
+               doq_socket_drop_reply(repinfo);
+               return;
+#endif
        }
        reclaim_tcp_handler(repinfo->c);
 }
index 6f43ce56c8c9c9d3570660e57e82c89c693d33e3..acc4887b11e3dadf36e6da6425a7204d64d5ff5c 100644 (file)
@@ -65,6 +65,9 @@
 #ifdef HAVE_NGHTTP2_NGHTTP2_H
 #include <nghttp2/nghttp2.h>
 #endif
+#ifdef HAVE_NGTCP2
+#include <ngtcp2/ngtcp2.h>
+#endif
 
 struct sldns_buffer;
 struct comm_point;
@@ -72,6 +75,11 @@ struct comm_reply;
 struct tcl_list;
 struct ub_event_base;
 struct unbound_socket;
+struct doq_server_socket;
+struct doq_table;
+struct doq_conn;
+struct config_file;
+struct ub_randstate;
 
 struct mesh_state;
 struct mesh_area;
@@ -105,6 +113,8 @@ typedef int comm_point_callback_type(struct comm_point*, void*, int,
 #define NETEVENT_SLOW_ACCEPT_TIME 2000
 /** timeout to slow down log print, so it does not spam the logs, in sec */
 #define SLOW_LOG_TIME 10
+/** for doq, the maximum dcid length, in ngtcp2 it is 20. */
+#define DOQ_MAX_CIDLEN 24
 
 /**
  * A communication point dispatcher. Thread specific.
@@ -164,6 +174,19 @@ struct comm_reply {
        struct sockaddr_storage client_addr;
        /** the original address length */
        socklen_t client_addrlen;
+#ifdef HAVE_NGTCP2
+       /** the doq ifindex, together with addr and localaddr in pktinfo,
+        * and dcid makes the doq_conn_key to find the connection */
+       int doq_ifindex;
+       /** the doq dcid, the connection id used to find the connection */
+       uint8_t doq_dcid[DOQ_MAX_CIDLEN];
+       /** the length of the doq dcid */
+       size_t doq_dcidlen;
+       /** the doq stream id where the query came in on */
+       int64_t doq_streamid;
+       /** port number for doq */
+       int doq_srcport;
+#endif /* HAVE_NGTCP2 */
 };
 
 /**
@@ -266,6 +289,11 @@ struct comm_point {
        /** maximum number of HTTP/2 streams per connection. Send in HTTP/2
         * SETTINGS frame. */
        uint32_t http2_max_streams;
+       /* -------- DoQ ------- */
+#ifdef HAVE_NGTCP2
+       /** the doq server socket, with list of doq connections */
+       struct doq_server_socket* doq_socket;
+#endif
 
        /* -------- dnstap ------- */
        /** the dnstap environment */
@@ -281,6 +309,8 @@ struct comm_point {
                comm_tcp,
                /** HTTP handler socket */
                comm_http,
+               /** DOQ handler socket */
+               comm_doq,
                /** AF_UNIX socket - for internal commands. */
                comm_local,
                /** raw - not DNS format - for pipe readers and writers */
@@ -552,6 +582,30 @@ struct comm_point* comm_point_create_udp_ancil(struct comm_base* base,
        int fd, struct sldns_buffer* buffer, int pp2_enabled,
        comm_point_callback_type* callback, void* callback_arg, struct unbound_socket* socket);
 
+/**
+ * Create an UDP comm point for DoQ. Calls malloc.
+ * setups the structure with the parameters you provide.
+ * @param base: in which base to alloc the commpoint.
+ * @param fd : file descriptor of open UDP socket.
+ * @param buffer: shared buffer by UDP sockets from this thread.
+ * @param callback: callback function pointer.
+ * @param callback_arg: will be passed to your callback function.
+ * @param socket: and opened socket properties will be passed to your callback function.
+ * @param table: the doq connection table for the host.
+ * @param rnd: random generator to use.
+ * @param ssl_service_key: the ssl service key file.
+ * @param ssl_service_pem: the ssl service pem file.
+ * @param cfg: config file struct.
+ * @return: returns the allocated communication point. NULL on error.
+ * Sets timeout to NULL. Turns off TCP options.
+ */
+struct comm_point* comm_point_create_doq(struct comm_base* base,
+       int fd, struct sldns_buffer* buffer,
+       comm_point_callback_type* callback, void* callback_arg,
+       struct unbound_socket* socket, struct doq_table* table,
+       struct ub_randstate* rnd, const char* ssl_service_key,
+       const char* ssl_service_pem, struct config_file* cfg);
+
 /**
  * Create a TCP listener comm point. Calls malloc.
  * Setups the structure with the parameters you provide.
@@ -821,6 +875,16 @@ void comm_point_udp_callback(int fd, short event, void* arg);
  */
 void comm_point_udp_ancil_callback(int fd, short event, void* arg);
 
+/**
+ * This routine is published for checks and tests, and is only used internally.
+ * handle libevent callback for doq comm point.
+ * @param fd: file descriptor.
+ * @param event: event bits from libevent:
+ *     EV_READ, EV_WRITE, EV_SIGNAL, EV_TIMEOUT.
+ * @param arg: the comm_point structure.
+ */
+void comm_point_doq_callback(int fd, short event, void* arg);
+
 /**
  * This routine is published for checks and tests, and is only used internally.
  * handle libevent callback for tcp accept comm point
@@ -958,6 +1022,106 @@ void http2_stream_add_meshstate(struct http2_stream* h2_stream,
 /** Remove mesh state from stream. When the mesh state has been removed. */
 void http2_stream_remove_mesh_state(struct http2_stream* h2_stream);
 
+/**
+ * DoQ socket address storage for IP4 or IP6 address. Smaller than
+ * the sockaddr_storage because not with af_unix pathnames.
+ */
+struct doq_addr_storage {
+       union {
+               struct sockaddr_in in;
+#ifdef AF_INET6
+               struct sockaddr_in6 in6;
+#endif
+       } sockaddr;
+};
+
+/**
+ * The DoQ server socket information, for DNS over QUIC.
+ */
+struct doq_server_socket {
+       /** the doq connection table */
+       struct doq_table* table;
+       /** random generator */
+       struct ub_randstate* rnd;
+       /** if address validation is enabled */
+       uint8_t validate_addr;
+       /** the ssl service key file */
+       char* ssl_service_key;
+       /** the ssl service pem file */
+       char* ssl_service_pem;
+       /** the ssl verify pem file */
+       char* ssl_verify_pem;
+       /** the server scid length */
+       int sv_scidlen;
+       /** the idle timeout in nanoseconds */
+       uint64_t idle_timeout;
+       /** the static secret for the server */
+       uint8_t* static_secret;
+       /** length of the static secret */
+       size_t static_secret_len;
+       /** ssl context, SSL_CTX* */
+       void* ctx;
+#ifndef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT
+       /** quic method functions, SSL_QUIC_METHOD* */
+       void* quic_method;
+#endif
+       /** the comm point for this doq server socket */
+       struct comm_point* cp;
+       /** the buffer for packets, doq in and out */
+       struct sldns_buffer* pkt_buf;
+       /** the current doq connection when we are in callbacks to worker,
+        * so that we have the already locked structure at our disposal. */
+       struct doq_conn* current_conn;
+       /** if the callback event on the fd has write flags */
+       uint8_t event_has_write;
+       /** if there is a blocked packet in the blocked_pkt buffer */
+       int have_blocked_pkt;
+       /** store blocked packet, a packet that could not be send on the
+        * nonblocking socket. It has to be sent later, when the write on
+        * the udp socket unblocks. */
+       struct sldns_buffer* blocked_pkt;
+#ifdef HAVE_NGTCP2
+       /** the ecn info for the blocked packet, congestion information. */
+       struct ngtcp2_pkt_info blocked_pkt_pi;
+#endif
+       /** the packet destination for the blocked packet. */
+       struct doq_pkt_addr* blocked_paddr;
+       /** timer for this worker on this comm_point to wait on. */
+       struct comm_timer* timer;
+       /** the timer that is marked by the doq_socket as waited on. */
+       struct timeval marked_time;
+       /** the current time for use by time functions, time_t. */
+       time_t* now_tt;
+       /** the current time for use by time functions, timeval. */
+       struct timeval* now_tv;
+       /** config file for the worker. */
+       struct config_file* cfg;
+};
+
+/**
+ * DoQ packet address information. From pktinfo, stores local and remote
+ * address and ifindex, so the packet can be sent there.
+ */
+struct doq_pkt_addr {
+       /** the remote addr, and local addr */
+       struct doq_addr_storage addr, localaddr;
+       /** length of addr and length of localaddr */
+       socklen_t addrlen, localaddrlen;
+       /** interface index from pktinfo ancillary information */
+       int ifindex;
+};
+
+/** Initialize the pkt addr with lengths set to sizeof. That is ready for
+ * a call to recv. */
+void doq_pkt_addr_init(struct doq_pkt_addr* paddr);
+
+/** send doq packet over UDP. */
+void doq_send_pkt(struct comm_point* c, struct doq_pkt_addr* paddr,
+       uint32_t ecn);
+
+/** doq timer callback function. */
+void doq_timer_cb(void* arg);
+
 /**
  * This routine is published for checks and tests, and is only used internally.
  * handle libevent callback for timer comm.