daemon: basic implementation of TCP and UDP with protolayers

author Oto Šťáva <oto.stava@nic.cz>

Tue, 2 Aug 2022 08:53:38 +0000 (10:53 +0200)

committer Oto Šťáva <oto.stava@nic.cz>

Thu, 26 Jan 2023 11:56:07 +0000 (12:56 +0100)
author Oto Šťáva <oto.stava@nic.cz>
Tue, 2 Aug 2022 08:53:38 +0000 (10:53 +0200)
committer Oto Šťáva <oto.stava@nic.cz>
Thu, 26 Jan 2023 11:56:07 +0000 (12:56 +0100)
diff --git a/daemon/bindings/net.c b/daemon/bindings/net.c

index 9a6374cd8c93526dfea93f2c6e61e1d9e10aebae..ae3a3c6997b90bf5b6127d9530c5c67331867805 100644 (file)
--- a/daemon/bindings/net.c
+++ b/daemon/bindings/net.c
@@ -532,32 +532,35 @@ static int net_pipeline(lua_State *L)
  
  static int net_tls(lua_State *L)
  {
-       if (kr_fails_assert(the_network)) {
-               return 0;
-       }
-
-       /* Only return current credentials. */
-       if (lua_gettop(L) == 0) {
-               /* No credentials configured yet. */
-               if (!the_network->tls_credentials) {
-                       return 0;
-               }
-               lua_newtable(L);
-               lua_pushstring(L, the_network->tls_credentials->tls_cert);
-               lua_setfield(L, -2, "cert_file");
-               lua_pushstring(L, the_network->tls_credentials->tls_key);
-               lua_setfield(L, -2, "key_file");
-               return 1;
-       }
-
-       if ((lua_gettop(L) != 2) || !lua_isstring(L, 1) || !lua_isstring(L, 2))
-               lua_error_p(L, "net.tls takes two parameters: (\"cert_file\", \"key_file\")");
-
-       int r = tls_certificate_set(lua_tostring(L, 1), lua_tostring(L, 2));
-       lua_error_maybe(L, r);
-
-       lua_pushboolean(L, true);
-       return 1;
+       /* TODO */
+       kr_assert(false && "Unimplemented");
+       return 0;
+//     if (kr_fails_assert(the_network)) {
+//             return 0;
+//     }
+//
+//     /* Only return current credentials. */
+//     if (lua_gettop(L) == 0) {
+//             /* No credentials configured yet. */
+//             if (!the_network->tls_credentials) {
+//                     return 0;
+//             }
+//             lua_newtable(L);
+//             lua_pushstring(L, the_network->tls_credentials->tls_cert);
+//             lua_setfield(L, -2, "cert_file");
+//             lua_pushstring(L, the_network->tls_credentials->tls_key);
+//             lua_setfield(L, -2, "key_file");
+//             return 1;
+//     }
+//
+//     if ((lua_gettop(L) != 2) || !lua_isstring(L, 1) || !lua_isstring(L, 2))
+//             lua_error_p(L, "net.tls takes two parameters: (\"cert_file\", \"key_file\")");
+//
+//     int r = tls_certificate_set(lua_tostring(L, 1), lua_tostring(L, 2));
+//     lua_error_maybe(L, r);
+//
+//     lua_pushboolean(L, true);
+//     return 1;
  }
  
  /** Configure HTTP headers for DoH requests. */
@@ -614,90 +617,93 @@ static int net_doh_headers(lua_State *L)
   * more precisely, it's in a compatible canonical form. */
  static int tls_params2lua(lua_State *L, trie_t *params)
  {
-       lua_newtable(L);
-       if (!params) /* Allowed special case. */
-               return 1;
-       trie_it_t *it;
-       size_t list_index = 0;
-       for (it = trie_it_begin(params); !trie_it_finished(it); trie_it_next(it)) {
-               /* Prepare table for the current address
-                * and its index in the returned list. */
-               lua_pushinteger(L, ++list_index);
-               lua_createtable(L, 0, 2);
-
-               /* Get the "addr#port" string... */
-               size_t ia_len;
-               const char *key = trie_it_key(it, &ia_len);
-               int af = AF_UNSPEC;
-               if (ia_len == 2 + sizeof(struct in_addr)) {
-                       af = AF_INET;
-               } else if (ia_len == 2 + sizeof(struct in6_addr)) {
-                       af = AF_INET6;
-               }
-               if (kr_fails_assert(key && af != AF_UNSPEC))
-                       lua_error_p(L, "internal error: bad IP address");
-               uint16_t port;
-               memcpy(&port, key, sizeof(port));
-               port = ntohs(port);
-               const char *ia = key + sizeof(port);
-               char str[INET6_ADDRSTRLEN + 1 + 5 + 1];
-               size_t len = sizeof(str);
-               if (kr_fails_assert(kr_ntop_str(af, ia, port, str, &len) == kr_ok()))
-                       lua_error_p(L, "internal error: bad IP address conversion");
-               /* ...and push it as [1]. */
-               lua_pushinteger(L, 1);
-               lua_pushlstring(L, str, len - 1 /* len includes '\0' */);
-               lua_settable(L, -3);
-
-               const tls_client_param_t *e = *trie_it_val(it);
-               if (kr_fails_assert(e))
-                       lua_error_p(L, "internal problem - NULL entry for %s", str);
-
-               /* .hostname = */
-               if (e->hostname) {
-                       lua_pushstring(L, e->hostname);
-                       lua_setfield(L, -2, "hostname");
-               }
-
-               /* .ca_files = */
-               if (e->ca_files.len) {
-                       lua_createtable(L, e->ca_files.len, 0);
-                       for (size_t i = 0; i < e->ca_files.len; ++i) {
-                               lua_pushinteger(L, i + 1);
-                               lua_pushstring(L, e->ca_files.at[i]);
-                               lua_settable(L, -3);
-                       }
-                       lua_setfield(L, -2, "ca_files");
-               }
-
-               /* .pin_sha256 = ... ; keep sane indentation via goto. */
-               if (!e->pins.len) goto no_pins;
-               lua_createtable(L, e->pins.len, 0);
-               for (size_t i = 0; i < e->pins.len; ++i) {
-                       uint8_t pin_base64[TLS_SHA256_BASE64_BUFLEN];
-                       int err = kr_base64_encode(e->pins.at[i], TLS_SHA256_RAW_LEN,
-                                               pin_base64, sizeof(pin_base64));
-                       if (kr_fails_assert(err >= 0))
-                               lua_error_p(L,
-                                       "internal problem when converting pin_sha256: %s",
-                                       kr_strerror(err));
-                       lua_pushinteger(L, i + 1);
-                       lua_pushlstring(L, (const char *)pin_base64, err);
-                               /* pin_base64 isn't 0-terminated     ^^^ */
-                       lua_settable(L, -3);
-               }
-               lua_setfield(L, -2, "pin_sha256");
-
-       no_pins:/* .insecure = */
-               if (e->insecure) {
-                       lua_pushboolean(L, true);
-                       lua_setfield(L, -2, "insecure");
-               }
-               /* Now the whole table is pushed atop the returned list. */
-               lua_settable(L, -3);
-       }
-       trie_it_free(it);
-       return 1;
+       /* TODO */
+       kr_assert(false && "Unimplemented");
+       return 0;
+//     lua_newtable(L);
+//     if (!params) /* Allowed special case. */
+//             return 1;
+//     trie_it_t *it;
+//     size_t list_index = 0;
+//     for (it = trie_it_begin(params); !trie_it_finished(it); trie_it_next(it)) {
+//             /* Prepare table for the current address
+//              * and its index in the returned list. */
+//             lua_pushinteger(L, ++list_index);
+//             lua_createtable(L, 0, 2);
+//
+//             /* Get the "addr#port" string... */
+//             size_t ia_len;
+//             const char *key = trie_it_key(it, &ia_len);
+//             int af = AF_UNSPEC;
+//             if (ia_len == 2 + sizeof(struct in_addr)) {
+//                     af = AF_INET;
+//             } else if (ia_len == 2 + sizeof(struct in6_addr)) {
+//                     af = AF_INET6;
+//             }
+//             if (kr_fails_assert(key && af != AF_UNSPEC))
+//                     lua_error_p(L, "internal error: bad IP address");
+//             uint16_t port;
+//             memcpy(&port, key, sizeof(port));
+//             port = ntohs(port);
+//             const char *ia = key + sizeof(port);
+//             char str[INET6_ADDRSTRLEN + 1 + 5 + 1];
+//             size_t len = sizeof(str);
+//             if (kr_fails_assert(kr_ntop_str(af, ia, port, str, &len) == kr_ok()))
+//                     lua_error_p(L, "internal error: bad IP address conversion");
+//             /* ...and push it as [1]. */
+//             lua_pushinteger(L, 1);
+//             lua_pushlstring(L, str, len - 1 /* len includes '\0' */);
+//             lua_settable(L, -3);
+//
+//             const tls_client_param_t *e = *trie_it_val(it);
+//             if (kr_fails_assert(e))
+//                     lua_error_p(L, "internal problem - NULL entry for %s", str);
+//
+//             /* .hostname = */
+//             if (e->hostname) {
+//                     lua_pushstring(L, e->hostname);
+//                     lua_setfield(L, -2, "hostname");
+//             }
+//
+//             /* .ca_files = */
+//             if (e->ca_files.len) {
+//                     lua_createtable(L, e->ca_files.len, 0);
+//                     for (size_t i = 0; i < e->ca_files.len; ++i) {
+//                             lua_pushinteger(L, i + 1);
+//                             lua_pushstring(L, e->ca_files.at[i]);
+//                             lua_settable(L, -3);
+//                     }
+//                     lua_setfield(L, -2, "ca_files");
+//             }
+//
+//             /* .pin_sha256 = ... ; keep sane indentation via goto. */
+//             if (!e->pins.len) goto no_pins;
+//             lua_createtable(L, e->pins.len, 0);
+//             for (size_t i = 0; i < e->pins.len; ++i) {
+//                     uint8_t pin_base64[TLS_SHA256_BASE64_BUFLEN];
+//                     int err = kr_base64_encode(e->pins.at[i], TLS_SHA256_RAW_LEN,
+//                                             pin_base64, sizeof(pin_base64));
+//                     if (kr_fails_assert(err >= 0))
+//                             lua_error_p(L,
+//                                     "internal problem when converting pin_sha256: %s",
+//                                     kr_strerror(err));
+//                     lua_pushinteger(L, i + 1);
+//                     lua_pushlstring(L, (const char *)pin_base64, err);
+//                             /* pin_base64 isn't 0-terminated     ^^^ */
+//                     lua_settable(L, -3);
+//             }
+//             lua_setfield(L, -2, "pin_sha256");
+//
+//     no_pins:/* .insecure = */
+//             if (e->insecure) {
+//                     lua_pushboolean(L, true);
+//                     lua_setfield(L, -2, "insecure");
+//             }
+//             /* Now the whole table is pushed atop the returned list. */
+//             lua_settable(L, -3);
+//     }
+//     trie_it_free(it);
+//     return 1;
  }
  
  static inline int cmp_sha256(const void *p1, const void *p2)
@@ -706,224 +712,230 @@ static inline int cmp_sha256(const void *p1, const void *p2)
  }
  static int net_tls_client(lua_State *L)
  {
-       /* TODO idea: allow starting the lua table with *multiple* IP targets,
-        * meaning the authentication config should be applied to each.
-        */
-       if (lua_gettop(L) == 0)
-               return tls_params2lua(L, the_network->tls_client_params);
-       /* Various basic sanity-checking. */
-       if (lua_gettop(L) != 1 || !lua_istable(L, 1))
-               lua_error_maybe(L, EINVAL);
-       /* check that only allowed keys are present */
-       {
-               const char *bad_key = lua_table_checkindices(L, (const char *[])
-                       { "1", "hostname", "ca_file", "pin_sha256", "insecure", NULL });
-               if (bad_key)
-                       lua_error_p(L, "found unexpected key '%s'", bad_key);
-       }
-
-       /**** Phase 1: get the parameter into a C struct, incl. parse of CA files,
-        *       regardless of the address-pair having an entry already. */
-
-       tls_client_param_t *newcfg = tls_client_param_new();
-       if (!newcfg)
-               lua_error_p(L, "out of memory or something like that :-/");
-       /* Shortcut for cleanup actions needed from now on. */
-       #define ERROR(...) do { \
-               free(newcfg); \
-               lua_error_p(L, __VA_ARGS__); \
-       } while (false)
-
-       /* .hostname - always accepted. */
-       lua_getfield(L, 1, "hostname");
-       if (!lua_isnil(L, -1)) {
-               const char *hn_str = lua_tostring(L, -1);
-               /* Convert to lower-case dname and back, for checking etc. */
-               knot_dname_t dname[KNOT_DNAME_MAXLEN];
-               if (!hn_str || !knot_dname_from_str(dname, hn_str, sizeof(dname)))
-                       ERROR("invalid hostname");
-               knot_dname_to_lower(dname);
-               char *h = knot_dname_to_str_alloc(dname);
-               if (!h)
-                       ERROR("%s", kr_strerror(ENOMEM));
-               /* Strip the final dot produced by knot_dname_*() */
-               h[strlen(h) - 1] = '\0';
-               newcfg->hostname = h;
-       }
-       lua_pop(L, 1);
-
-       /* .ca_file - it can be a list of paths, contrary to the name. */
-       bool has_ca_file = false;
-       lua_getfield(L, 1, "ca_file");
-       if (!lua_isnil(L, -1)) {
-               if (!newcfg->hostname)
-                       ERROR("missing hostname but specifying ca_file");
-               lua_listify(L);
-               array_init(newcfg->ca_files); /*< placate apparently confused scan-build */
-               if (array_reserve(newcfg->ca_files, lua_objlen(L, -1)) != 0) /*< optim. */
-                       ERROR("%s", kr_strerror(ENOMEM));
-               /* Iterate over table at the top of the stack.
-                * http://www.lua.org/manual/5.1/manual.html#lua_next */
-               for (lua_pushnil(L); lua_next(L, -2); lua_pop(L, 1)) {
-                       has_ca_file = true; /* deferred here so that {} -> false */
-                       const char *ca_file = lua_tostring(L, -1);
-                       if (!ca_file)
-                               ERROR("ca_file contains a non-string");
-                       /* Let gnutls process it immediately, so garbage gets detected. */
-                       int ret = gnutls_certificate_set_x509_trust_file(
-                                       newcfg->credentials, ca_file, GNUTLS_X509_FMT_PEM);
-                       if (ret < 0) {
-                               ERROR("failed to import certificate file '%s': %s - %s\n",
-                                       ca_file, gnutls_strerror_name(ret),
-                                       gnutls_strerror(ret));
-                       } else {
-                               kr_log_debug(TLSCLIENT, "imported %d certs from file '%s'\n",
-                                       ret, ca_file);
-                       }
-
-                       ca_file = strdup(ca_file);
-                       if (!ca_file || array_push(newcfg->ca_files, ca_file) < 0)
-                               ERROR("%s", kr_strerror(ENOMEM));
-               }
-               /* Sort the strings for easier comparison later. */
-               if (newcfg->ca_files.len) {
-                       qsort(&newcfg->ca_files.at[0], newcfg->ca_files.len,
-                               sizeof(newcfg->ca_files.at[0]), strcmp_p);
-               }
-       }
-       lua_pop(L, 1);
-
-       /* .pin_sha256 */
-       lua_getfield(L, 1, "pin_sha256");
-       if (!lua_isnil(L, -1)) {
-               if (has_ca_file)
-                       ERROR("mixing pin_sha256 with ca_file is not supported");
-               lua_listify(L);
-               array_init(newcfg->pins); /*< placate apparently confused scan-build */
-               if (array_reserve(newcfg->pins, lua_objlen(L, -1)) != 0) /*< optim. */
-                       ERROR("%s", kr_strerror(ENOMEM));
-               /* Iterate over table at the top of the stack. */
-               for (lua_pushnil(L); lua_next(L, -2); lua_pop(L, 1)) {
-                       const char *pin = lua_tostring(L, -1);
-                       if (!pin)
-                               ERROR("pin_sha256 is not a string");
-                       uint8_t *pin_raw = malloc(TLS_SHA256_RAW_LEN);
-                       /* Push the string early to simplify error processing. */
-                       if (kr_fails_assert(pin_raw && array_push(newcfg->pins, pin_raw) >= 0)) {
-                               free(pin_raw);
-                               ERROR("%s", kr_strerror(ENOMEM));
-                       }
-                       int ret = kr_base64_decode((const uint8_t *)pin, strlen(pin),
-                                               pin_raw, TLS_SHA256_RAW_LEN + 8);
-                       if (ret < 0) {
-                               ERROR("not a valid pin_sha256: '%s' (length %d), %s\n",
-                                       pin, (int)strlen(pin), knot_strerror(ret));
-                       } else if (ret != TLS_SHA256_RAW_LEN) {
-                               ERROR("not a valid pin_sha256: '%s', "
-                                               "raw length %d instead of "
-                                               STR(TLS_SHA256_RAW_LEN)"\n",
-                                       pin, ret);
-                       }
-               }
-               /* Sort the raw strings for easier comparison later. */
-               if (newcfg->pins.len) {
-                       qsort(&newcfg->pins.at[0], newcfg->pins.len,
-                               sizeof(newcfg->pins.at[0]), cmp_sha256);
-               }
-       }
-       lua_pop(L, 1);
-
-       /* .insecure */
-       lua_getfield(L, 1, "insecure");
-       if (lua_isnil(L, -1)) {
-               if (!newcfg->hostname && !newcfg->pins.len)
-                       ERROR("no way to authenticate and not set as insecure");
-       } else if (lua_isboolean(L, -1) && lua_toboolean(L, -1)) {
-               newcfg->insecure = true;
-               if (has_ca_file || newcfg->pins.len)
-                       ERROR("set as insecure but provided authentication config");
-       } else {
-               ERROR("incorrect value in the 'insecure' field");
-       }
-       lua_pop(L, 1);
-
-       /* Init CAs from system trust store, if needed. */
-       if (!newcfg->insecure && !newcfg->pins.len && !has_ca_file) {
-               int ret = gnutls_certificate_set_x509_system_trust(newcfg->credentials);
-               if (ret <= 0) {
-                       ERROR("failed to use system CA certificate store: %s",
-                               ret ? gnutls_strerror(ret) : kr_strerror(ENOENT));
-               } else {
-                       kr_log_debug(TLSCLIENT, "imported %d certs from system store\n",
-                               ret);
-               }
-       }
-       #undef ERROR
-
-       /**** Phase 2: deal with the C authentication "table". */
-       /* Parse address and port. */
-       lua_pushinteger(L, 1);
-       lua_gettable(L, 1);
-       const char *addr_str = lua_tostring(L, -1);
-       if (!addr_str)
-               lua_error_p(L, "address is not a string");
-       char buf[INET6_ADDRSTRLEN + 1];
-       uint16_t port = 853;
-       const struct sockaddr *addr = NULL;
-       if (kr_straddr_split(addr_str, buf, &port) == kr_ok())
-               addr = kr_straddr_socket(buf, port, NULL);
-       /* Add newcfg into the C map, saving the original into oldcfg. */
-       if (!addr)
-               lua_error_p(L, "address '%s' could not be converted", addr_str);
-       tls_client_param_t **oldcfgp = tls_client_param_getptr(
-                       &the_network->tls_client_params, addr, true);
-       free_const(addr);
-       if (!oldcfgp)
-               lua_error_p(L, "internal error when extending tls_client_params map");
-       tls_client_param_t *oldcfg = *oldcfgp;
-       *oldcfgp = newcfg;  /* replace old config in trie with the new one */
-       /* If there was no original entry, it's easy! */
-       if (!oldcfg)
-               return 0;
-
-       /* Check for equality (newcfg vs. oldcfg), and print a warning if not equal.*/
-       const bool ok_h = (!newcfg->hostname && !oldcfg->hostname)
-               || (newcfg->hostname && oldcfg->hostname && strcmp(newcfg->hostname, oldcfg->hostname) == 0);
-       bool ok_ca = newcfg->ca_files.len == oldcfg->ca_files.len;
-       for (int i = 0; ok_ca && i < newcfg->ca_files.len; ++i)
-               ok_ca = strcmp(newcfg->ca_files.at[i], oldcfg->ca_files.at[i]) == 0;
-       bool ok_pins = newcfg->pins.len == oldcfg->pins.len;
-       for (int i = 0; ok_pins && i < newcfg->pins.len; ++i)
-               ok_ca = memcmp(newcfg->pins.at[i], oldcfg->pins.at[i], TLS_SHA256_RAW_LEN) == 0;
-       const bool ok_insecure = newcfg->insecure == oldcfg->insecure;
-       if (!(ok_h && ok_ca && ok_pins && ok_insecure)) {
-               kr_log_warning(TLSCLIENT,
-                       "warning: re-defining TLS authentication parameters for %s\n",
-                       addr_str);
-       }
-       tls_client_param_unref(oldcfg);
+       /* TODO */
+       kr_assert(false && "Unimplemented");
         return 0;
+//     /* TODO idea: allow starting the lua table with *multiple* IP targets,
+//      * meaning the authentication config should be applied to each.
+//      */
+//     if (lua_gettop(L) == 0)
+//             return tls_params2lua(L, the_network->tls_client_params);
+//     /* Various basic sanity-checking. */
+//     if (lua_gettop(L) != 1 || !lua_istable(L, 1))
+//             lua_error_maybe(L, EINVAL);
+//     /* check that only allowed keys are present */
+//     {
+//             const char *bad_key = lua_table_checkindices(L, (const char *[])
+//                     { "1", "hostname", "ca_file", "pin_sha256", "insecure", NULL });
+//             if (bad_key)
+//                     lua_error_p(L, "found unexpected key '%s'", bad_key);
+//     }
+//
+//     /**** Phase 1: get the parameter into a C struct, incl. parse of CA files,
+//      *       regardless of the address-pair having an entry already. */
+//
+//     tls_client_param_t *newcfg = tls_client_param_new();
+//     if (!newcfg)
+//             lua_error_p(L, "out of memory or something like that :-/");
+//     /* Shortcut for cleanup actions needed from now on. */
+//     #define ERROR(...) do { \
+//             free(newcfg); \
+//             lua_error_p(L, __VA_ARGS__); \
+//     } while (false)
+//
+//     /* .hostname - always accepted. */
+//     lua_getfield(L, 1, "hostname");
+//     if (!lua_isnil(L, -1)) {
+//             const char *hn_str = lua_tostring(L, -1);
+//             /* Convert to lower-case dname and back, for checking etc. */
+//             knot_dname_t dname[KNOT_DNAME_MAXLEN];
+//             if (!hn_str || !knot_dname_from_str(dname, hn_str, sizeof(dname)))
+//                     ERROR("invalid hostname");
+//             knot_dname_to_lower(dname);
+//             char *h = knot_dname_to_str_alloc(dname);
+//             if (!h)
+//                     ERROR("%s", kr_strerror(ENOMEM));
+//             /* Strip the final dot produced by knot_dname_*() */
+//             h[strlen(h) - 1] = '\0';
+//             newcfg->hostname = h;
+//     }
+//     lua_pop(L, 1);
+//
+//     /* .ca_file - it can be a list of paths, contrary to the name. */
+//     bool has_ca_file = false;
+//     lua_getfield(L, 1, "ca_file");
+//     if (!lua_isnil(L, -1)) {
+//             if (!newcfg->hostname)
+//                     ERROR("missing hostname but specifying ca_file");
+//             lua_listify(L);
+//             array_init(newcfg->ca_files); /*< placate apparently confused scan-build */
+//             if (array_reserve(newcfg->ca_files, lua_objlen(L, -1)) != 0) /*< optim. */
+//                     ERROR("%s", kr_strerror(ENOMEM));
+//             /* Iterate over table at the top of the stack.
+//              * http://www.lua.org/manual/5.1/manual.html#lua_next */
+//             for (lua_pushnil(L); lua_next(L, -2); lua_pop(L, 1)) {
+//                     has_ca_file = true; /* deferred here so that {} -> false */
+//                     const char *ca_file = lua_tostring(L, -1);
+//                     if (!ca_file)
+//                             ERROR("ca_file contains a non-string");
+//                     /* Let gnutls process it immediately, so garbage gets detected. */
+//                     int ret = gnutls_certificate_set_x509_trust_file(
+//                                     newcfg->credentials, ca_file, GNUTLS_X509_FMT_PEM);
+//                     if (ret < 0) {
+//                             ERROR("failed to import certificate file '%s': %s - %s\n",
+//                                     ca_file, gnutls_strerror_name(ret),
+//                                     gnutls_strerror(ret));
+//                     } else {
+//                             kr_log_debug(TLSCLIENT, "imported %d certs from file '%s'\n",
+//                                     ret, ca_file);
+//                     }
+//
+//                     ca_file = strdup(ca_file);
+//                     if (!ca_file || array_push(newcfg->ca_files, ca_file) < 0)
+//                             ERROR("%s", kr_strerror(ENOMEM));
+//             }
+//             /* Sort the strings for easier comparison later. */
+//             if (newcfg->ca_files.len) {
+//                     qsort(&newcfg->ca_files.at[0], newcfg->ca_files.len,
+//                             sizeof(newcfg->ca_files.at[0]), strcmp_p);
+//             }
+//     }
+//     lua_pop(L, 1);
+//
+//     /* .pin_sha256 */
+//     lua_getfield(L, 1, "pin_sha256");
+//     if (!lua_isnil(L, -1)) {
+//             if (has_ca_file)
+//                     ERROR("mixing pin_sha256 with ca_file is not supported");
+//             lua_listify(L);
+//             array_init(newcfg->pins); /*< placate apparently confused scan-build */
+//             if (array_reserve(newcfg->pins, lua_objlen(L, -1)) != 0) /*< optim. */
+//                     ERROR("%s", kr_strerror(ENOMEM));
+//             /* Iterate over table at the top of the stack. */
+//             for (lua_pushnil(L); lua_next(L, -2); lua_pop(L, 1)) {
+//                     const char *pin = lua_tostring(L, -1);
+//                     if (!pin)
+//                             ERROR("pin_sha256 is not a string");
+//                     uint8_t *pin_raw = malloc(TLS_SHA256_RAW_LEN);
+//                     /* Push the string early to simplify error processing. */
+//                     if (kr_fails_assert(pin_raw && array_push(newcfg->pins, pin_raw) >= 0)) {
+//                             free(pin_raw);
+//                             ERROR("%s", kr_strerror(ENOMEM));
+//                     }
+//                     int ret = kr_base64_decode((const uint8_t *)pin, strlen(pin),
+//                                             pin_raw, TLS_SHA256_RAW_LEN + 8);
+//                     if (ret < 0) {
+//                             ERROR("not a valid pin_sha256: '%s' (length %d), %s\n",
+//                                     pin, (int)strlen(pin), knot_strerror(ret));
+//                     } else if (ret != TLS_SHA256_RAW_LEN) {
+//                             ERROR("not a valid pin_sha256: '%s', "
+//                                             "raw length %d instead of "
+//                                             STR(TLS_SHA256_RAW_LEN)"\n",
+//                                     pin, ret);
+//                     }
+//             }
+//             /* Sort the raw strings for easier comparison later. */
+//             if (newcfg->pins.len) {
+//                     qsort(&newcfg->pins.at[0], newcfg->pins.len,
+//                             sizeof(newcfg->pins.at[0]), cmp_sha256);
+//             }
+//     }
+//     lua_pop(L, 1);
+//
+//     /* .insecure */
+//     lua_getfield(L, 1, "insecure");
+//     if (lua_isnil(L, -1)) {
+//             if (!newcfg->hostname && !newcfg->pins.len)
+//                     ERROR("no way to authenticate and not set as insecure");
+//     } else if (lua_isboolean(L, -1) && lua_toboolean(L, -1)) {
+//             newcfg->insecure = true;
+//             if (has_ca_file || newcfg->pins.len)
+//                     ERROR("set as insecure but provided authentication config");
+//     } else {
+//             ERROR("incorrect value in the 'insecure' field");
+//     }
+//     lua_pop(L, 1);
+//
+//     /* Init CAs from system trust store, if needed. */
+//     if (!newcfg->insecure && !newcfg->pins.len && !has_ca_file) {
+//             int ret = gnutls_certificate_set_x509_system_trust(newcfg->credentials);
+//             if (ret <= 0) {
+//                     ERROR("failed to use system CA certificate store: %s",
+//                             ret ? gnutls_strerror(ret) : kr_strerror(ENOENT));
+//             } else {
+//                     kr_log_debug(TLSCLIENT, "imported %d certs from system store\n",
+//                             ret);
+//             }
+//     }
+//     #undef ERROR
+//
+//     /**** Phase 2: deal with the C authentication "table". */
+//     /* Parse address and port. */
+//     lua_pushinteger(L, 1);
+//     lua_gettable(L, 1);
+//     const char *addr_str = lua_tostring(L, -1);
+//     if (!addr_str)
+//             lua_error_p(L, "address is not a string");
+//     char buf[INET6_ADDRSTRLEN + 1];
+//     uint16_t port = 853;
+//     const struct sockaddr *addr = NULL;
+//     if (kr_straddr_split(addr_str, buf, &port) == kr_ok())
+//             addr = kr_straddr_socket(buf, port, NULL);
+//     /* Add newcfg into the C map, saving the original into oldcfg. */
+//     if (!addr)
+//             lua_error_p(L, "address '%s' could not be converted", addr_str);
+//     tls_client_param_t **oldcfgp = tls_client_param_getptr(
+//                     &the_network->tls_client_params, addr, true);
+//     free_const(addr);
+//     if (!oldcfgp)
+//             lua_error_p(L, "internal error when extending tls_client_params map");
+//     tls_client_param_t *oldcfg = *oldcfgp;
+//     *oldcfgp = newcfg;  /* replace old config in trie with the new one */
+//     /* If there was no original entry, it's easy! */
+//     if (!oldcfg)
+//             return 0;
+//
+//     /* Check for equality (newcfg vs. oldcfg), and print a warning if not equal.*/
+//     const bool ok_h = (!newcfg->hostname && !oldcfg->hostname)
+//             || (newcfg->hostname && oldcfg->hostname && strcmp(newcfg->hostname, oldcfg->hostname) == 0);
+//     bool ok_ca = newcfg->ca_files.len == oldcfg->ca_files.len;
+//     for (int i = 0; ok_ca && i < newcfg->ca_files.len; ++i)
+//             ok_ca = strcmp(newcfg->ca_files.at[i], oldcfg->ca_files.at[i]) == 0;
+//     bool ok_pins = newcfg->pins.len == oldcfg->pins.len;
+//     for (int i = 0; ok_pins && i < newcfg->pins.len; ++i)
+//             ok_ca = memcmp(newcfg->pins.at[i], oldcfg->pins.at[i], TLS_SHA256_RAW_LEN) == 0;
+//     const bool ok_insecure = newcfg->insecure == oldcfg->insecure;
+//     if (!(ok_h && ok_ca && ok_pins && ok_insecure)) {
+//             kr_log_warning(TLSCLIENT,
+//                     "warning: re-defining TLS authentication parameters for %s\n",
+//                     addr_str);
+//     }
+//     tls_client_param_unref(oldcfg);
+//     return 0;
  }
  
  int net_tls_client_clear(lua_State *L)
  {
-       /* One parameter: address -> convert it to a struct sockaddr. */
-       if (lua_gettop(L) != 1 || !lua_isstring(L, 1))
-               lua_error_p(L, "net.tls_client_clear() requires one parameter (\"address\")");
-       const char *addr_str = lua_tostring(L, 1);
-       char buf[INET6_ADDRSTRLEN + 1];
-       uint16_t port = 853;
-       const struct sockaddr *addr = NULL;
-       if (kr_straddr_split(addr_str, buf, &port) == kr_ok())
-               addr = kr_straddr_socket(buf, port, NULL);
-       if (!addr)
-               lua_error_p(L, "invalid IP address");
-       /* Do the actual removal. */
-       int r = tls_client_param_remove(the_network->tls_client_params, addr);
-       free_const(addr);
-       lua_error_maybe(L, r);
-       lua_pushboolean(L, true);
-       return 1;
+       /* TODO */
+       kr_assert(false && "Unimplemented");
+       return 0;
+//     /* One parameter: address -> convert it to a struct sockaddr. */
+//     if (lua_gettop(L) != 1 || !lua_isstring(L, 1))
+//             lua_error_p(L, "net.tls_client_clear() requires one parameter (\"address\")");
+//     const char *addr_str = lua_tostring(L, 1);
+//     char buf[INET6_ADDRSTRLEN + 1];
+//     uint16_t port = 853;
+//     const struct sockaddr *addr = NULL;
+//     if (kr_straddr_split(addr_str, buf, &port) == kr_ok())
+//             addr = kr_straddr_socket(buf, port, NULL);
+//     if (!addr)
+//             lua_error_p(L, "invalid IP address");
+//     /* Do the actual removal. */
+//     int r = tls_client_param_remove(the_network->tls_client_params, addr);
+//     free_const(addr);
+//     lua_error_maybe(L, r);
+//     lua_pushboolean(L, true);
+//     return 1;
  }
  
  static int net_tls_padding(lua_State *L)
@@ -969,79 +981,85 @@ static int net_tls_padding(lua_State *L)
  
  static int net_tls_sticket_secret_string(lua_State *L)
  {
-       size_t secret_len;
-       const char *secret;
-
-       if (lua_gettop(L) == 0) {
-               /* Zero-length secret, implying random key. */
-               secret_len = 0;
-               secret = NULL;
-       } else {
-               if (lua_gettop(L) != 1 || !lua_isstring(L, 1)) {
-                       lua_error_p(L,
-                               "net.tls_sticket_secret takes one parameter: (\"secret string\")");
-               }
-               secret = lua_tolstring(L, 1, &secret_len);
-               if (secret_len < net_tls_sticket_MIN_SECRET_LEN || !secret) {
-                       lua_error_p(L, "net.tls_sticket_secret - the secret is shorter than "
-                                       STR(net_tls_sticket_MIN_SECRET_LEN) " bytes");
-               }
-       }
-
-       tls_session_ticket_ctx_destroy(the_network->tls_session_ticket_ctx);
-       the_network->tls_session_ticket_ctx =
-               tls_session_ticket_ctx_create(the_network->loop, secret, secret_len);
-       if (the_network->tls_session_ticket_ctx == NULL) {
-               lua_error_p(L,
-                       "net.tls_sticket_secret_string - can't create session ticket context");
-       }
-
-       lua_pushboolean(L, true);
-       return 1;
+       /* TODO */
+       kr_assert(false && "Unimplemented");
+       return 0;
+//     size_t secret_len;
+//     const char *secret;
+//
+//     if (lua_gettop(L) == 0) {
+//             /* Zero-length secret, implying random key. */
+//             secret_len = 0;
+//             secret = NULL;
+//     } else {
+//             if (lua_gettop(L) != 1 || !lua_isstring(L, 1)) {
+//                     lua_error_p(L,
+//                             "net.tls_sticket_secret takes one parameter: (\"secret string\")");
+//             }
+//             secret = lua_tolstring(L, 1, &secret_len);
+//             if (secret_len < net_tls_sticket_MIN_SECRET_LEN || !secret) {
+//                     lua_error_p(L, "net.tls_sticket_secret - the secret is shorter than "
+//                                     STR(net_tls_sticket_MIN_SECRET_LEN) " bytes");
+//             }
+//     }
+//
+//     tls_session_ticket_ctx_destroy(the_network->tls_session_ticket_ctx);
+//     the_network->tls_session_ticket_ctx =
+//             tls_session_ticket_ctx_create(the_network->loop, secret, secret_len);
+//     if (the_network->tls_session_ticket_ctx == NULL) {
+//             lua_error_p(L,
+//                     "net.tls_sticket_secret_string - can't create session ticket context");
+//     }
+//
+//     lua_pushboolean(L, true);
+//     return 1;
  }
  
  static int net_tls_sticket_secret_file(lua_State *L)
  {
-       if (lua_gettop(L) != 1 || !lua_isstring(L, 1)) {
-               lua_error_p(L,
-                       "net.tls_sticket_secret_file takes one parameter: (\"file name\")");
-       }
-
-       const char *file_name = lua_tostring(L, 1);
-       if (strlen(file_name) == 0)
-               lua_error_p(L, "net.tls_sticket_secret_file - empty file name");
-
-       FILE *fp = fopen(file_name, "r");
-       if (fp == NULL) {
-               lua_error_p(L, "net.tls_sticket_secret_file - can't open file '%s': %s",
-                               file_name, strerror(errno));
-       }
-
-       char secret_buf[TLS_SESSION_TICKET_SECRET_MAX_LEN];
-       const size_t secret_len = fread(secret_buf, 1, sizeof(secret_buf), fp);
-       int err = ferror(fp);
-       if (err) {
-               lua_error_p(L,
-                       "net.tls_sticket_secret_file - error reading from file '%s': %s",
-                       file_name, strerror(err));
-       }
-       if (secret_len < net_tls_sticket_MIN_SECRET_LEN) {
-               lua_error_p(L,
-                       "net.tls_sticket_secret_file - file '%s' is shorter than "
-                               STR(net_tls_sticket_MIN_SECRET_LEN) " bytes",
-                       file_name);
-       }
-       fclose(fp);
-
-       tls_session_ticket_ctx_destroy(the_network->tls_session_ticket_ctx);
-       the_network->tls_session_ticket_ctx =
-               tls_session_ticket_ctx_create(the_network->loop, secret_buf, secret_len);
-       if (the_network->tls_session_ticket_ctx == NULL) {
-               lua_error_p(L,
-                       "net.tls_sticket_secret_file - can't create session ticket context");
-       }
-       lua_pushboolean(L, true);
-       return 1;
+       /* TODO */
+       kr_assert(false && "Unimplemented");
+       return 0;
+//     if (lua_gettop(L) != 1 || !lua_isstring(L, 1)) {
+//             lua_error_p(L,
+//                     "net.tls_sticket_secret_file takes one parameter: (\"file name\")");
+//     }
+//
+//     const char *file_name = lua_tostring(L, 1);
+//     if (strlen(file_name) == 0)
+//             lua_error_p(L, "net.tls_sticket_secret_file - empty file name");
+//
+//     FILE *fp = fopen(file_name, "r");
+//     if (fp == NULL) {
+//             lua_error_p(L, "net.tls_sticket_secret_file - can't open file '%s': %s",
+//                             file_name, strerror(errno));
+//     }
+//
+//     char secret_buf[TLS_SESSION_TICKET_SECRET_MAX_LEN];
+//     const size_t secret_len = fread(secret_buf, 1, sizeof(secret_buf), fp);
+//     int err = ferror(fp);
+//     if (err) {
+//             lua_error_p(L,
+//                     "net.tls_sticket_secret_file - error reading from file '%s': %s",
+//                     file_name, strerror(err));
+//     }
+//     if (secret_len < net_tls_sticket_MIN_SECRET_LEN) {
+//             lua_error_p(L,
+//                     "net.tls_sticket_secret_file - file '%s' is shorter than "
+//                             STR(net_tls_sticket_MIN_SECRET_LEN) " bytes",
+//                     file_name);
+//     }
+//     fclose(fp);
+//
+//     tls_session_ticket_ctx_destroy(the_network->tls_session_ticket_ctx);
+//     the_network->tls_session_ticket_ctx =
+//             tls_session_ticket_ctx_create(the_network->loop, secret_buf, secret_len);
+//     if (the_network->tls_session_ticket_ctx == NULL) {
+//             lua_error_p(L,
+//                     "net.tls_sticket_secret_file - can't create session ticket context");
+//     }
+//     lua_pushboolean(L, true);
+//     return 1;
  }
  
  static int net_outgoing(lua_State *L, int family)
diff --git a/daemon/io.c b/daemon/io.c

index 66ad03dae11a5ec10c9b47cf96eaf8164af8fc1b..c9fcc0eb2b2ab47c15a0c193d451ef0cd8bede0e 100644 (file)
--- a/daemon/io.c
+++ b/daemon/io.c
@@ -21,7 +21,7 @@
  #include "daemon/worker.h"
  #include "daemon/tls.h"
  #include "daemon/http.h"
-#include "daemon/session.h"
+#include "daemon/session2.h"
  #include "contrib/cleanup.h"
  #include "lib/utils.h"
  
@@ -40,9 +40,9 @@ static void check_bufsize(uv_handle_t* handle)
          * This is magic presuming we can pull in a whole recvmmsg width in one wave.
          * Linux will double this the bufsize wanted.
          */
-       const int bufsize_want = 2 * sizeof(the_worker->wire_buf) ;
-       negotiate_bufsize(uv_recv_buffer_size, handle, bufsize_want);
-       negotiate_bufsize(uv_send_buffer_size, handle, bufsize_want);
+       const int BUF_SIZE = 2 * sizeof(RECVMMSG_BATCH * KNOT_WIRE_MAX_PKTSIZE);
+       negotiate_bufsize(uv_recv_buffer_size, handle, BUF_SIZE);
+       negotiate_bufsize(uv_send_buffer_size, handle, BUF_SIZE);
  }
  
  #undef negotiate_bufsize
@@ -57,26 +57,26 @@ static void handle_getbuf(uv_handle_t* handle, size_t suggested_size, uv_buf_t*
          * guaranteed to be unchanged only for the duration of
          * udp_read() and tcp_read().
          */
-       struct session *s = handle->data;
-       if (!session_flags(s)->has_tls) {
-               buf->base = (char *) session_wirebuf_get_free_start(s);
-               buf->len = session_wirebuf_get_free_size(s);
-       } else {
-               struct tls_common_ctx *ctx = session_tls_get_common_ctx(s);
-               buf->base = (char *) ctx->recv_buf;
-               buf->len = sizeof(ctx->recv_buf);
-       }
+       struct session2 *s = handle->data;
+       buf->base = wire_buf_free_space(&s->wire_buf);
+       buf->len = wire_buf_free_space_length(&s->wire_buf);
+}
+
+static void udp_on_unwrapped(int status, struct session2 *session,
+                             const void *target, void *baton)
+{
+       wire_buf_reset(&session->wire_buf);
  }
  
  void udp_recv(uv_udp_t *handle, ssize_t nread, const uv_buf_t *buf,
         const struct sockaddr *comm_addr, unsigned flags)
  {
-       struct session *s = handle->data;
-       if (session_flags(s)->closing || nread <= 0 || comm_addr->sa_family == AF_UNSPEC)
+       struct session2 *s = handle->data;
+       if (s->closing || nread <= 0 || comm_addr->sa_family == AF_UNSPEC)
                 return;
  
-       if (session_flags(s)->outgoing) {
-               const struct sockaddr *peer = session_get_peer(s);
+       if (s->outgoing) {
+               const struct sockaddr *peer = session2_get_peer(s);
                 if (kr_fails_assert(peer->sa_family != AF_UNSPEC))
                         return;
                 if (kr_sockaddr_cmp(peer, comm_addr) != 0) {
@@ -86,64 +86,16 @@ void udp_recv(uv_udp_t *handle, ssize_t nread, const uv_buf_t *buf,
                 }
         }
  
-       const uint8_t *data = (const uint8_t *)buf->base;
-       ssize_t data_len = nread;
-       const struct sockaddr *src_addr = comm_addr;
-       const struct sockaddr *dst_addr = NULL;
-       struct proxy_result proxy;
-       bool has_proxy = false;
-       if (!session_flags(s)->outgoing && proxy_header_present(data, data_len)) {
-               if (!proxy_allowed(comm_addr)) {
-                       kr_log_debug(IO, "<= ignoring PROXYv2 UDP from disallowed address '%s'\n",
-                                       kr_straddr(comm_addr));
-                       return;
-               }
-
-               ssize_t trimmed = proxy_process_header(&proxy, s, data, data_len);
-               if (trimmed == KNOT_EMALF) {
-                       if (kr_log_is_debug(IO, NULL)) {
-                               kr_log_debug(IO, "<= ignoring malformed PROXYv2 UDP "
-                                               "from address '%s'\n",
-                                               kr_straddr(comm_addr));
-                       }
-                       return;
-               } else if (trimmed < 0) {
-                       if (kr_log_is_debug(IO, NULL)) {
-                               kr_log_debug(IO, "<= error processing PROXYv2 UDP "
-                                               "from address '%s', ignoring\n",
-                                               kr_straddr(comm_addr));
-                       }
-                       return;
-               }
-
-               if (proxy.command == PROXY2_CMD_PROXY && proxy.family != AF_UNSPEC) {
-                       has_proxy = true;
-                       src_addr = &proxy.src_addr.ip;
-                       dst_addr = &proxy.dst_addr.ip;
-
-                       if (kr_log_is_debug(IO, NULL)) {
-                               kr_log_debug(IO, "<= UDP query from '%s'\n",
-                                               kr_straddr(src_addr));
-                               kr_log_debug(IO, "<= proxied through '%s'\n",
-                                               kr_straddr(comm_addr));
-                       }
-               }
-               data = session_wirebuf_get_free_start(s);
-               data_len = nread - trimmed;
+       int ret = wire_buf_consume(&s->wire_buf, nread);
+       if (ret) {
+               wire_buf_reset(&s->wire_buf);
+               return;
         }
  
-       ssize_t consumed = session_wirebuf_consume(s, data, data_len);
-       kr_assert(consumed == data_len);
-
-       struct io_comm_data comm = {
-               .src_addr = src_addr,
-               .comm_addr = comm_addr,
-               .dst_addr = dst_addr,
-               .proxy = (has_proxy) ? &proxy : NULL
-       };
-       session_wirebuf_process(s, &comm);
-       session_wirebuf_discard(s);
-       mp_flush(the_worker->pkt_pool.ctx);
+       ret = session2_unwrap(s, protolayer_wire_buf(&s->wire_buf), comm_addr,
+                       udp_on_unwrapped, NULL);
+       if (ret)
+               wire_buf_reset(&s->wire_buf);
  }
  
  static int family_to_freebind_option(sa_family_t sa_family, int *level, int *name)
@@ -180,6 +132,304 @@ static int family_to_freebind_option(sa_family_t sa_family, int *level, int *nam
         return kr_ok();
  }
  
+
+struct pl_udp_iter_data {
+       struct proxy_result proxy;
+       bool has_proxy;
+};
+
+static int pl_udp_iter_init(struct protolayer_manager *manager, struct protolayer_data *layer)
+{
+       struct pl_udp_iter_data *udp = protolayer_iter_data(layer);
+       *udp = (struct pl_udp_iter_data){0};
+       return 0;
+}
+
+static enum protolayer_cb_result pl_udp_unwrap(struct protolayer_data *layer, struct protolayer_cb_ctx *ctx)
+{
+       if (ctx->payload.type == PROTOLAYER_PAYLOAD_EVENT) {
+               /* events should not happen in UDP (currently) */
+               return protolayer_continue(ctx);
+       }
+
+       ctx->payload = protolayer_as_buffer(&ctx->payload);
+       if (kr_fails_assert(ctx->payload.type == PROTOLAYER_PAYLOAD_BUFFER)) {
+               /* unsupported payload */
+               return protolayer_break(ctx, kr_error(EINVAL));
+       }
+
+       struct session2 *s = ctx->manager->session;
+       struct pl_udp_iter_data *udp = protolayer_iter_data(layer);
+
+       char *data = ctx->payload.buffer.buf;
+       ssize_t data_len = ctx->payload.buffer.len;
+       struct comm_info *comm = &ctx->comm;
+       comm->comm_addr = ctx->target;
+       comm->src_addr = ctx->target;
+       if (!s->outgoing && proxy_header_present(data, data_len)) {
+               if (!proxy_allowed(comm->comm_addr)) {
+                       kr_log_debug(IO, "<= ignoring PROXYv2 UDP from disallowed address '%s'\n",
+                                       kr_straddr(comm->comm_addr));
+                       return protolayer_break(ctx, kr_error(EPERM));
+               }
+
+               ssize_t trimmed = proxy_process_header(&udp->proxy, data, data_len);
+               if (trimmed == KNOT_EMALF) {
+                       if (kr_log_is_debug(IO, NULL)) {
+                               kr_log_debug(IO, "<= ignoring malformed PROXYv2 UDP "
+                                               "from address '%s'\n",
+                                               kr_straddr(comm->comm_addr));
+                       }
+                       return protolayer_break(ctx, kr_error(EINVAL));
+               } else if (trimmed < 0) {
+                       if (kr_log_is_debug(IO, NULL)) {
+                               kr_log_debug(IO, "<= error processing PROXYv2 UDP "
+                                               "from address '%s', ignoring\n",
+                                               kr_straddr(comm->comm_addr));
+                       }
+                       return protolayer_break(ctx, kr_error(EINVAL));
+               }
+
+               if (udp->proxy.command == PROXY2_CMD_PROXY && udp->proxy.family != AF_UNSPEC) {
+                       udp->has_proxy = true;
+
+                       comm->src_addr = &udp->proxy.src_addr.ip;
+                       comm->dst_addr = &udp->proxy.dst_addr.ip;
+                       comm->proxy = &udp->proxy;
+
+                       if (kr_log_is_debug(IO, NULL)) {
+                               kr_log_debug(IO, "<= UDP query from '%s'\n",
+                                               kr_straddr(comm->src_addr));
+                               kr_log_debug(IO, "<= proxied through '%s'\n",
+                                               kr_straddr(comm->comm_addr));
+                       }
+               }
+
+               ctx->payload = protolayer_buffer(data + trimmed, data_len - trimmed);
+       }
+
+       return protolayer_continue(ctx);
+}
+
+static enum protolayer_cb_result pl_udp_wrap(struct protolayer_data *layer, struct protolayer_cb_ctx *ctx)
+{
+       return protolayer_push(ctx);
+}
+
+
+struct pl_tcp_sess_data {
+       struct proxy_result proxy;
+       struct wire_buf wire_buf;
+       bool had_data : 1;
+       bool has_proxy : 1;
+};
+
+static int pl_tcp_sess_init(struct protolayer_manager *manager, struct protolayer_data *layer)
+{
+       struct pl_tcp_sess_data *tcp = protolayer_sess_data(layer);
+       *tcp = (struct pl_tcp_sess_data){0};
+       return 0;
+}
+
+static int pl_tcp_sess_deinit(struct protolayer_manager *manager, struct protolayer_data *layer)
+{
+       struct pl_tcp_sess_data *tcp = protolayer_sess_data(layer);
+       wire_buf_deinit(&tcp->wire_buf);
+       return 0;
+}
+
+static enum protolayer_cb_result pl_tcp_unwrap_timeout(
+               struct protolayer_data *layer, struct protolayer_cb_ctx *ctx)
+{
+       /* TODO - connecting timeout? */
+       struct session2 *s = ctx->manager->session;
+
+       if (kr_fails_assert(!s->closing))
+               return protolayer_continue(ctx);
+
+       if (!session2_tasklist_is_empty(s)) {
+               int finalized = session2_tasklist_finalize_expired(s);
+               the_worker->stats.timeout += finalized;
+               /* session2_tasklist_finalize_expired() may call worker_task_finalize().
+                * If session is a source session and there were IO errors,
+                * worker_task_finalize() can finalize all tasks and close session. */
+               if (s->closing)
+                       return protolayer_continue(ctx);
+       }
+
+       if (!session2_tasklist_is_empty(s)) {
+               session2_timer_stop(s);
+               session2_timer_start(s,
+                               KR_RESOLVE_TIME_LIMIT / 2,
+                               KR_RESOLVE_TIME_LIMIT / 2,
+                               PROTOLAYER_UNWRAP);
+       } else {
+               /* Normally it should not happen,
+                * but better to check if there anything in this list. */
+               while (!session2_waitinglist_is_empty(s)) {
+                       struct qr_task *t = session2_waitinglist_pop(s, false);
+                       worker_task_finalize(t, KR_STATE_FAIL);
+                       worker_task_unref(t);
+                       the_worker->stats.timeout += 1;
+                       if (s->closing)
+                               return protolayer_continue(ctx);
+               }
+               uint64_t idle_in_timeout = the_network->tcp.in_idle_timeout;
+               uint64_t idle_time = kr_now() - s->last_activity;
+               if (idle_time < idle_in_timeout) {
+                       idle_in_timeout -= idle_time;
+                       session2_timer_stop(s);
+                       session2_timer_start(s,
+                                       idle_in_timeout, idle_in_timeout,
+                                       PROTOLAYER_UNWRAP);
+               } else {
+                       struct sockaddr *peer = session2_get_peer(s);
+                       char *peer_str = kr_straddr(peer);
+                       kr_log_debug(IO, "=> closing connection to '%s'\n",
+                                      peer_str ? peer_str : "");
+                       if (s->outgoing) {
+                               worker_del_tcp_waiting(peer);
+                               worker_del_tcp_connected(peer);
+                       }
+                       session2_unwrap(s, protolayer_event_nd(PROTOLAYER_EVENT_CLOSE), NULL, NULL, NULL);
+               }
+       }
+
+       return protolayer_continue(ctx);
+}
+
+static enum protolayer_cb_result pl_tcp_unwrap(struct protolayer_data *layer, struct protolayer_cb_ctx *ctx)
+{
+       struct session2 *s = ctx->manager->session;
+       struct pl_tcp_sess_data *tcp = protolayer_sess_data(layer);
+       struct sockaddr *peer = session2_get_peer(s);
+
+       if (ctx->payload.type == PROTOLAYER_PAYLOAD_EVENT) {
+               if (ctx->payload.event.type == PROTOLAYER_EVENT_TIMEOUT)
+                       return pl_tcp_unwrap_timeout(layer, ctx);
+
+               /* pass thru */
+               return protolayer_continue(ctx);
+       }
+
+       if (ctx->payload.type == PROTOLAYER_PAYLOAD_BUFFER) {
+               const char *buf = ctx->payload.buffer.buf;
+               const size_t len = ctx->payload.buffer.len;
+
+               /* Copy a simple buffer into internal wirebuffer. */
+               if (len > KNOT_WIRE_MAX_PKTSIZE)
+                       return protolayer_break(ctx, kr_error(EMSGSIZE));
+
+               if (!tcp->wire_buf.buf) {
+                       int ret = wire_buf_reserve(&tcp->wire_buf,
+                                       KNOT_WIRE_MAX_PKTSIZE);
+                       if (ret)
+                               return protolayer_break(ctx, ret);
+               }
+
+               /* Try to make space */
+               while (len > wire_buf_free_space_length(&tcp->wire_buf)) {
+                       if (wire_buf_data_length(&tcp->wire_buf) > 0 ||
+                                       tcp->wire_buf.start == 0)
+                               return protolayer_break(ctx, kr_error(EMSGSIZE));
+
+                       wire_buf_movestart(&tcp->wire_buf);
+               }
+
+               memcpy(wire_buf_free_space(&tcp->wire_buf), buf, len);
+               wire_buf_consume(&tcp->wire_buf, ctx->payload.buffer.len);
+               ctx->payload = protolayer_wire_buf(&tcp->wire_buf);
+       }
+
+       if (kr_fails_assert(ctx->payload.type == PROTOLAYER_PAYLOAD_WIRE_BUF)) {
+               /* TODO: iovec support unimplemented */
+               return protolayer_break(ctx, kr_error(EINVAL));
+       }
+
+       char *data = wire_buf_data(ctx->payload.wire_buf); /* layer's or session's wirebuf */
+       ssize_t data_len = wire_buf_data_length(ctx->payload.wire_buf);
+       struct comm_info *comm = &ctx->comm;
+       comm->src_addr = peer;
+       comm->comm_addr = peer;
+       comm->dst_addr = NULL;
+       if (!s->outgoing && !tcp->had_data && proxy_header_present(data, data_len)) {
+               if (!proxy_allowed(comm->src_addr)) {
+                       if (kr_log_is_debug(IO, NULL)) {
+                               kr_log_debug(IO, "<= connection to '%s': PROXYv2 not allowed "
+                                               "for this peer, close\n",
+                                               kr_straddr(peer));
+                       }
+                       worker_end_tcp(s);
+                       ctx->payload = protolayer_event_nd(PROTOLAYER_EVENT_CLOSE);
+                       return protolayer_push(ctx);
+               }
+
+               ssize_t trimmed = proxy_process_header(&tcp->proxy, data, data_len);
+               if (trimmed < 0) {
+                       if (kr_log_is_debug(IO, NULL)) {
+                               if (trimmed == KNOT_EMALF) {
+                                       kr_log_debug(IO, "<= connection to '%s': "
+                                                       "malformed PROXYv2 header, close\n",
+                                                       kr_straddr(comm->src_addr));
+                               } else {
+                                       kr_log_debug(IO, "<= connection to '%s': "
+                                                       "error processing PROXYv2 header, close\n",
+                                                       kr_straddr(comm->src_addr));
+                               }
+                       }
+                       worker_end_tcp(s);
+                       ctx->payload = protolayer_event_nd(PROTOLAYER_EVENT_CLOSE);
+                       return protolayer_push(ctx);
+               } else if (trimmed == 0) {
+                       ctx->payload = protolayer_event_nd(PROTOLAYER_EVENT_CLOSE);
+                       return protolayer_push(ctx);
+               }
+
+               if (tcp->proxy.command != PROXY2_CMD_LOCAL && tcp->proxy.family != AF_UNSPEC) {
+                       comm->src_addr = &tcp->proxy.src_addr.ip;
+                       comm->dst_addr = &tcp->proxy.dst_addr.ip;
+
+                       if (kr_log_is_debug(IO, NULL)) {
+                               kr_log_debug(IO, "<= TCP stream from '%s'\n",
+                                               kr_straddr(comm->src_addr));
+                               kr_log_debug(IO, "<= proxied through '%s'\n",
+                                               kr_straddr(comm->comm_addr));
+                       }
+               }
+
+               wire_buf_trim(ctx->payload.wire_buf, trimmed);
+       }
+
+       tcp->had_data = true;
+
+       return protolayer_continue(ctx);
+}
+
+static enum protolayer_cb_result pl_tcp_wrap(struct protolayer_data *layer, struct protolayer_cb_ctx *ctx)
+{
+       return protolayer_push(ctx);
+}
+
+
+void io_protolayers_init()
+{
+       protolayer_globals[PROTOLAYER_UDP] = (struct protolayer_globals){
+               .iter_size = sizeof(struct pl_udp_iter_data),
+               .iter_init = pl_udp_iter_init,
+               .unwrap = pl_udp_unwrap,
+               .wrap = pl_udp_wrap
+       };
+
+       protolayer_globals[PROTOLAYER_TCP] = (struct protolayer_globals){
+               .sess_size = sizeof(struct pl_tcp_sess_data),
+               .sess_init = pl_tcp_sess_init,
+               .sess_deinit = pl_tcp_sess_deinit,
+               .unwrap = pl_tcp_unwrap,
+               .wrap = pl_tcp_wrap
+       };
+}
+
+
  int io_bind(const struct sockaddr *addr, int type, const endpoint_flags_t *flags)
  {
         const int fd = socket(addr->sa_family, type, 0);
@@ -265,12 +515,11 @@ int io_listen_udp(uv_loop_t *loop, uv_udp_t *handle, int fd)
         uv_handle_t *h = (uv_handle_t *)handle;
         check_bufsize(h);
         /* Handle is already created, just create context. */
-       struct session *s = session_new(h, false, false);
+       struct session2 *s = session2_new_io(h, PROTOLAYER_GRP_DOUDP, false);
         kr_require(s);
-       session_flags(s)->outgoing = false;
  
         int socklen = sizeof(union kr_sockaddr);
-       ret = uv_udp_getsockname(handle, session_get_sockname(s), &socklen);
+       ret = uv_udp_getsockname(handle, &s->transport.io.sockname.ip, &socklen);
         if (ret) {
                 kr_log_error(IO, "ERROR: getsockname failed: %s\n", uv_strerror(ret));
                 abort(); /* It might be nontrivial not to leak something here. */
@@ -279,70 +528,13 @@ int io_listen_udp(uv_loop_t *loop, uv_udp_t *handle, int fd)
         return io_start_read(h);
  }
  
-void tcp_timeout_trigger(uv_timer_t *timer)
-{
-       struct session *s = timer->data;
-
-       if (kr_fails_assert(!session_flags(s)->closing))
-               return;
-
-       if (!session_tasklist_is_empty(s)) {
-               int finalized = session_tasklist_finalize_expired(s);
-               the_worker->stats.timeout += finalized;
-               /* session_tasklist_finalize_expired() may call worker_task_finalize().
-                * If session is a source session and there were IO errors,
-                * worker_task_finalize() can finalize all tasks and close session. */
-               if (session_flags(s)->closing) {
-                       return;
-               }
-
-       }
-       if (!session_tasklist_is_empty(s)) {
-               uv_timer_stop(timer);
-               session_timer_start(s, tcp_timeout_trigger,
-                                   KR_RESOLVE_TIME_LIMIT / 2,
-                                   KR_RESOLVE_TIME_LIMIT / 2);
-       } else {
-               /* Normally it should not happen,
-                * but better to check if there anything in this list. */
-               while (!session_waitinglist_is_empty(s)) {
-                       struct qr_task *t = session_waitinglist_pop(s, false);
-                       worker_task_finalize(t, KR_STATE_FAIL);
-                       worker_task_unref(t);
-                       the_worker->stats.timeout += 1;
-                       if (session_flags(s)->closing) {
-                               return;
-                       }
-               }
-               uint64_t idle_in_timeout = the_network->tcp.in_idle_timeout;
-               uint64_t last_activity = session_last_activity(s);
-               uint64_t idle_time = kr_now() - last_activity;
-               if (idle_time < idle_in_timeout) {
-                       idle_in_timeout -= idle_time;
-                       uv_timer_stop(timer);
-                       session_timer_start(s, tcp_timeout_trigger,
-                                           idle_in_timeout, idle_in_timeout);
-               } else {
-                       struct sockaddr *peer = session_get_peer(s);
-                       char *peer_str = kr_straddr(peer);
-                       kr_log_debug(IO, "=> closing connection to '%s'\n",
-                                      peer_str ? peer_str : "");
-                       if (session_flags(s)->outgoing) {
-                               worker_del_tcp_waiting(peer);
-                               worker_del_tcp_connected(peer);
-                       }
-                       session_close(s);
-               }
-       }
-}
-
  static void tcp_recv(uv_stream_t *handle, ssize_t nread, const uv_buf_t *buf)
  {
-       struct session *s = handle->data;
-       if (kr_fails_assert(s && session_get_handle(s) == (uv_handle_t *)handle && handle->type == UV_TCP))
+       struct session2 *s = handle->data;
+       if (kr_fails_assert(s && session2_get_handle(s) == (uv_handle_t *)handle && handle->type == UV_TCP))
                 return;
  
-       if (session_flags(s)->closing) {
+       if (s->closing) {
                 return;
         }
  
@@ -354,160 +546,117 @@ static void tcp_recv(uv_stream_t *handle, ssize_t nread, const uv_buf_t *buf)
  
         if (nread < 0 || !buf->base) {
                 if (kr_log_is_debug(IO, NULL)) {
-                       struct sockaddr *peer = session_get_peer(s);
+                       struct sockaddr *peer = session2_get_peer(s);
                         char *peer_str = kr_straddr(peer);
                         kr_log_debug(IO, "=> connection to '%s' closed by peer (%s)\n",
                                        peer_str ? peer_str : "",
                                        uv_strerror(nread));
                 }
                 worker_end_tcp(s);
+               session2_unwrap(s, protolayer_event_nd(PROTOLAYER_EVENT_FORCE_CLOSE),
+                               NULL, NULL, NULL);
                 return;
         }
  
-       const uint8_t *data = (const uint8_t *)buf->base;
-       ssize_t data_len = nread;
-       const struct sockaddr *src_addr = session_get_peer(s);
-       const struct sockaddr *dst_addr = NULL;
-       if (!session_flags(s)->outgoing && !session_flags(s)->no_proxy &&
-                       proxy_header_present(data, data_len)) {
-               if (!proxy_allowed(src_addr)) {
-                       if (kr_log_is_debug(IO, NULL)) {
-                               kr_log_debug(IO, "<= connection to '%s': PROXYv2 not allowed "
-                                               "for this peer, close\n",
-                                               kr_straddr(src_addr));
-                       }
-                       worker_end_tcp(s);
-                       return;
-               }
-
-               struct proxy_result *proxy = session_proxy_create(s);
-               ssize_t trimmed = proxy_process_header(proxy, s, data, data_len);
-               if (trimmed < 0) {
-                       if (kr_log_is_debug(IO, NULL)) {
-                               if (trimmed == KNOT_EMALF) {
-                                       kr_log_debug(IO, "<= connection to '%s': "
-                                                       "malformed PROXYv2 header, close\n",
-                                                       kr_straddr(src_addr));
-                               } else {
-                                       kr_log_debug(IO, "<= connection to '%s': "
-                                                       "error processing PROXYv2 header, close\n",
-                                                       kr_straddr(src_addr));
-                               }
-                       }
-                       worker_end_tcp(s);
-                       return;
-               } else if (trimmed == 0) {
-                       return;
-               }
-
-               if (proxy->command != PROXY2_CMD_LOCAL && proxy->family != AF_UNSPEC) {
-                       src_addr = &proxy->src_addr.ip;
-                       dst_addr = &proxy->dst_addr.ip;
-
-                       if (kr_log_is_debug(IO, NULL)) {
-                               kr_log_debug(IO, "<= TCP stream from '%s'\n",
-                                               kr_straddr(src_addr));
-                               kr_log_debug(IO, "<= proxied through '%s'\n",
-                                               kr_straddr(session_get_peer(s)));
-                       }
-               }
-
-               data = session_wirebuf_get_free_start(s);
-               data_len = nread - trimmed;
-       }
-
-       session_flags(s)->no_proxy = true;
-
-       ssize_t consumed = 0;
-       if (session_flags(s)->has_tls) {
-               /* buf->base points to start of the tls receive buffer.
-                  Decode data free space in session wire buffer. */
-               consumed = tls_process_input_data(s, data, data_len);
-               if (consumed < 0) {
-                       if (kr_log_is_debug(IO, NULL)) {
-                               char *peer_str = kr_straddr(src_addr);
-                               kr_log_debug(IO, "=> connection to '%s': "
-                                              "error processing TLS data, close\n",
-                                              peer_str ? peer_str : "");
-                       }
-                       worker_end_tcp(s);
-                       return;
-               } else if (consumed == 0) {
-                       return;
-               }
-               data = session_wirebuf_get_free_start(s);
-               data_len = consumed;
-       }
-#if ENABLE_DOH2
-       int streaming = 1;
-       if (session_flags(s)->has_http) {
-               streaming = http_process_input_data(s, data, data_len,
-                               &consumed);
-               if (streaming < 0) {
-                       if (kr_log_is_debug(IO, NULL)) {
-                               char *peer_str = kr_straddr(src_addr);
-                               kr_log_debug(IO, "=> connection to '%s': "
-                                      "error processing HTTP data, close\n",
-                                      peer_str ? peer_str : "");
-                       }
-                       worker_end_tcp(s);
-                       return;
-               }
-               if (consumed == 0) {
-                       return;
-               }
-               data = session_wirebuf_get_free_start(s);
-               data_len = consumed;
-       }
-#endif
-
-       /* data points to start of the free space in session wire buffer.
-          Simple increase internal counter. */
-       consumed = session_wirebuf_consume(s, data, data_len);
-       kr_assert(consumed == data_len);
-
-       struct io_comm_data comm = {
-               .src_addr = src_addr,
-               .comm_addr = session_get_peer(s),
-               .dst_addr = dst_addr,
-               .proxy = session_proxy_get(s)
-       };
-       int ret = session_wirebuf_process(s, &comm);
-       if (ret < 0) {
-               /* An error has occurred, close the session. */
-               worker_end_tcp(s);
-       }
-       session_wirebuf_compress(s);
-       mp_flush(the_worker->pkt_pool.ctx);
-#if ENABLE_DOH2
-       if (session_flags(s)->has_http && streaming == 0 && ret == 0) {
-               ret = http_send_status(s, HTTP_STATUS_BAD_REQUEST);
-               if (ret) {
-                       /* An error has occurred, close the session. */
-                       worker_end_tcp(s);
-               }
+       int ret = wire_buf_consume(&s->wire_buf, nread);
+       if (ret) {
+               wire_buf_reset(&s->wire_buf);
+               return;
         }
-#endif
-}
  
-#if ENABLE_DOH2
-static ssize_t tls_send(const uint8_t *buf, const size_t len, struct session *session)
-{
-       struct tls_ctx *ctx = session_tls_get_server_ctx(session);
-       ssize_t sent = 0;
-       kr_require(ctx);
-
-       sent = gnutls_record_send(ctx->c.tls_session, buf, len);
-       if (sent < 0) {
-               kr_log_debug(DOH, "gnutls_record_send failed: %s (%zd)\n",
-                              gnutls_strerror_name(sent), sent);
-               return kr_error(EIO);
-       }
-       return sent;
+       session2_unwrap(s, protolayer_wire_buf(&s->wire_buf), NULL, NULL, NULL);
+
+//     ssize_t consumed = 0;
+//     if (session_flags(s)->has_tls) {
+//             /* buf->base points to start of the tls receive buffer.
+//                Decode data free space in session wire buffer. */
+//             consumed = tls_process_input_data(s, data, data_len);
+//             if (consumed < 0) {
+//                     if (kr_log_is_debug(IO, NULL)) {
+//                             char *peer_str = kr_straddr(src_addr);
+//                             kr_log_debug(IO, "=> connection to '%s': "
+//                                            "error processing TLS data, close\n",
+//                                            peer_str ? peer_str : "");
+//                     }
+//                     worker_end_tcp(s);
+//                     return;
+//             } else if (consumed == 0) {
+//                     return;
+//             }
+//             data = session_wirebuf_get_free_start(s);
+//             data_len = consumed;
+//     }
+//#if ENABLE_DOH2
+//     int streaming = 1;
+//     if (session_flags(s)->has_http) {
+//             streaming = http_process_input_data(s, data, data_len,
+//                             &consumed);
+//             if (streaming < 0) {
+//                     if (kr_log_is_debug(IO, NULL)) {
+//                             char *peer_str = kr_straddr(src_addr);
+//                             kr_log_debug(IO, "=> connection to '%s': "
+//                                    "error processing HTTP data, close\n",
+//                                    peer_str ? peer_str : "");
+//                     }
+//                     worker_end_tcp(s);
+//                     return;
+//             }
+//             if (consumed == 0) {
+//                     return;
+//             }
+//             data = session_wirebuf_get_free_start(s);
+//             data_len = consumed;
+//     }
+//#endif
+//
+//     /* data points to start of the free space in session wire buffer.
+//        Simple increase internal counter. */
+//     consumed = session_wirebuf_consume(s, data, data_len);
+//     kr_assert(consumed == data_len);
+//
+//     struct io_comm_data comm = {
+//             .src_addr = src_addr,
+//             .comm_addr = session_get_peer(s),
+//             .dst_addr = dst_addr,
+//             .proxy = session_proxy_get(s)
+//     };
+//     int ret = session_wirebuf_process(s, &comm);
+//     if (ret < 0) {
+//             /* An error has occurred, close the session. */
+//             worker_end_tcp(s);
+//     }
+//     session_wirebuf_compress(s);
+//     mp_flush(the_worker->pkt_pool.ctx);
+//#if ENABLE_DOH2
+//     if (session_flags(s)->has_http && streaming == 0 && ret == 0) {
+//             ret = http_send_status(s, HTTP_STATUS_BAD_REQUEST);
+//             if (ret) {
+//                     /* An error has occurred, close the session. */
+//                     worker_end_tcp(s);
+//             }
+//     }
+//#endif
  }
-#endif
  
-static void _tcp_accept(uv_stream_t *master, int status, bool tls, bool http)
+/* TODO: http */
+//#if ENABLE_DOH2
+//static ssize_t tls_send(const uint8_t *buf, const size_t len, struct session *session)
+//{
+//     struct tls_ctx *ctx = session_tls_get_server_ctx(session);
+//     ssize_t sent = 0;
+//     kr_require(ctx);
+//
+//     sent = gnutls_record_send(ctx->c.tls_session, buf, len);
+//     if (sent < 0) {
+//             kr_log_debug(DOH, "gnutls_record_send failed: %s (%zd)\n",
+//                            gnutls_strerror_name(sent), sent);
+//             return kr_error(EIO);
+//     }
+//     return sent;
+//}
+//#endif
+
+static void _tcp_accept(uv_stream_t *master, int status, enum protolayer_grp grp)
  {
         if (status != 0) {
                 return;
@@ -518,7 +667,7 @@ static void _tcp_accept(uv_stream_t *master, int status, bool tls, bool http)
                 return;
         }
         int res = io_create(master->loop, (uv_handle_t *)client,
-                           SOCK_STREAM, AF_UNSPEC, tls, http);
+                           SOCK_STREAM, AF_UNSPEC, grp, false);
         if (res) {
                 if (res == UV_EMFILE) {
                         the_worker->too_many_open = true;
@@ -532,31 +681,37 @@ static void _tcp_accept(uv_stream_t *master, int status, bool tls, bool http)
         }
  
         /* struct session was allocated \ borrowed from memory pool. */
-       struct session *s = client->data;
-       kr_require(session_flags(s)->outgoing == false);
-       kr_require(session_flags(s)->has_tls == tls);
+       struct session2 *s = client->data;
+       kr_require(s->outgoing == false);
+//     kr_require(s->secure == tls); /* TODO */
  
         if (uv_accept(master, (uv_stream_t *)client) != 0) {
                 /* close session, close underlying uv handles and
                  * deallocate (or return to memory pool) memory. */
-               session_close(s);
+               session2_unwrap(s,
+                               protolayer_event_nd(PROTOLAYER_EVENT_CLOSE),
+                               NULL, NULL, NULL);
                 return;
         }
  
         /* Get peer's and our address.  We apparently get specific sockname here
          * even if we listened on a wildcard address. */
-       struct sockaddr *sa = session_get_peer(s);
+       struct sockaddr *sa = session2_get_peer(s);
         int sa_len = sizeof(struct sockaddr_in6);
         int ret = uv_tcp_getpeername(client, sa, &sa_len);
         if (ret || sa->sa_family == AF_UNSPEC) {
-               session_close(s);
+               session2_unwrap(s,
+                               protolayer_event_nd(PROTOLAYER_EVENT_CLOSE),
+                               NULL, NULL, NULL);
                 return;
         }
-       sa = session_get_sockname(s);
+       sa = session2_get_sockname(s);
         sa_len = sizeof(struct sockaddr_in6);
         ret = uv_tcp_getsockname(client, sa, &sa_len);
         if (ret || sa->sa_family == AF_UNSPEC) {
-               session_close(s);
+               session2_unwrap(s,
+                               protolayer_event_nd(PROTOLAYER_EVENT_CLOSE),
+                               NULL, NULL, NULL);
                 return;
         }
  
@@ -567,77 +722,78 @@ static void _tcp_accept(uv_stream_t *master, int status, bool tls, bool http)
         uint64_t idle_in_timeout = the_network->tcp.in_idle_timeout;
  
         uint64_t timeout = KR_CONN_RTT_MAX / 2;
-       if (tls) {
-               timeout += TLS_MAX_HANDSHAKE_TIME;
-               struct tls_ctx *ctx = session_tls_get_server_ctx(s);
-               if (!ctx) {
-                       ctx = tls_new();
-                       if (!ctx) {
-                               session_close(s);
-                               return;
-                       }
-                       ctx->c.session = s;
-                       ctx->c.handshake_state = TLS_HS_IN_PROGRESS;
-
-                       /* Configure ALPN. */
-                       gnutls_datum_t proto;
-                       if (!http) {
-                               proto.data = (unsigned char *)"dot";
-                               proto.size = 3;
-                       } else {
-                               proto.data = (unsigned char *)"h2";
-                               proto.size = 2;
-                       }
-                       unsigned int flags = 0;
-#if GNUTLS_VERSION_NUMBER >= 0x030500
-                       /* Mandatory ALPN means the protocol must match if and
-                        * only if ALPN extension is used by the client. */
-                       flags |= GNUTLS_ALPN_MANDATORY;
-#endif
-                       ret = gnutls_alpn_set_protocols(ctx->c.tls_session, &proto, 1, flags);
-                       if (ret != GNUTLS_E_SUCCESS) {
-                               session_close(s);
-                               return;
-                       }
-
-                       session_tls_set_server_ctx(s, ctx);
-               }
-       }
-#if ENABLE_DOH2
-       if (http) {
-               struct http_ctx *ctx = session_http_get_server_ctx(s);
-               if (!ctx) {
-                       if (!tls) {  /* Plain HTTP is not supported. */
-                               session_close(s);
-                               return;
-                       }
-                       ctx = http_new(s, tls_send);
-                       if (!ctx) {
-                               session_close(s);
-                               return;
-                       }
-                       session_http_set_server_ctx(s, ctx);
-               }
-       }
-#endif
-       session_timer_start(s, tcp_timeout_trigger, timeout, idle_in_timeout);
+       /* TODO: tls, http */
+//     if (tls) {
+//             timeout += TLS_MAX_HANDSHAKE_TIME;
+//             struct tls_ctx *ctx = session_tls_get_server_ctx(s);
+//             if (!ctx) {
+//                     ctx = tls_new();
+//                     if (!ctx) {
+//                             session_close(s);
+//                             return;
+//                     }
+//                     ctx->c.session = s;
+//                     ctx->c.handshake_state = TLS_HS_IN_PROGRESS;
+//
+//                     /* Configure ALPN. */
+//                     gnutls_datum_t proto;
+//                     if (!http) {
+//                             proto.data = (unsigned char *)"dot";
+//                             proto.size = 3;
+//                     } else {
+//                             proto.data = (unsigned char *)"h2";
+//                             proto.size = 2;
+//                     }
+//                     unsigned int flags = 0;
+//#if GNUTLS_VERSION_NUMBER >= 0x030500
+//                     /* Mandatory ALPN means the protocol must match if and
+//                      * only if ALPN extension is used by the client. */
+//                     flags |= GNUTLS_ALPN_MANDATORY;
+//#endif
+//                     ret = gnutls_alpn_set_protocols(ctx->c.tls_session, &proto, 1, flags);
+//                     if (ret != GNUTLS_E_SUCCESS) {
+//                             session_close(s);
+//                             return;
+//                     }
+//
+//                     session_tls_set_server_ctx(s, ctx);
+//             }
+//     }
+//#if ENABLE_DOH2
+//     if (http) {
+//             struct http_ctx *ctx = session_http_get_server_ctx(s);
+//             if (!ctx) {
+//                     if (!tls) {  /* Plain HTTP is not supported. */
+//                             session_close(s);
+//                             return;
+//                     }
+//                     ctx = http_new(s, tls_send);
+//                     if (!ctx) {
+//                             session_close(s);
+//                             return;
+//                     }
+//                     session_http_set_server_ctx(s, ctx);
+//             }
+//     }
+//#endif
+       session2_timer_start(s, timeout, idle_in_timeout, PROTOLAYER_UNWRAP);
         io_start_read((uv_handle_t *)client);
  }
  
  static void tcp_accept(uv_stream_t *master, int status)
  {
-       _tcp_accept(master, status, false, false);
+       _tcp_accept(master, status, PROTOLAYER_GRP_DOTCP);
  }
  
  static void tls_accept(uv_stream_t *master, int status)
  {
-       _tcp_accept(master, status, true, false);
+       _tcp_accept(master, status, PROTOLAYER_GRP_DOTLS);
  }
  
  #if ENABLE_DOH2
  static void https_accept(uv_stream_t *master, int status)
  {
-       _tcp_accept(master, status, true, true);
+       _tcp_accept(master, status, PROTOLAYER_GRP_DOHTTPS);
  }
  #endif
  
@@ -933,151 +1089,152 @@ int io_listen_pipe(uv_loop_t *loop, uv_pipe_t *handle, int fd)
         return 0;
  }
  
-#if ENABLE_XDP
-static void xdp_rx(uv_poll_t* handle, int status, int events)
-{
-       const int XDP_RX_BATCH_SIZE = 64;
-       if (status < 0) {
-               kr_log_error(XDP, "poll status %d: %s\n", status, uv_strerror(status));
-               return;
-       }
-       if (events != UV_READABLE) {
-               kr_log_error(XDP, "poll unexpected events: %d\n", events);
-               return;
-       }
-
-       xdp_handle_data_t *xhd = handle->data;
-       kr_require(xhd && xhd->session && xhd->socket);
-       uint32_t rcvd;
-       knot_xdp_msg_t msgs[XDP_RX_BATCH_SIZE];
-       int ret = knot_xdp_recv(xhd->socket, msgs, XDP_RX_BATCH_SIZE, &rcvd
-                       #if KNOT_VERSION_HEX >= 0x030100
-                       , NULL
-                       #endif
-                       );
-
-       if (kr_fails_assert(ret == KNOT_EOK)) {
-               /* ATM other error codes can only be returned when called incorrectly */
-               kr_log_error(XDP, "knot_xdp_recv(): %d, %s\n", ret, knot_strerror(ret));
-               return;
-       }
-       kr_log_debug(XDP, "poll triggered, processing a batch of %d packets\n", (int)rcvd);
-       kr_require(rcvd <= XDP_RX_BATCH_SIZE);
-       for (int i = 0; i < rcvd; ++i) {
-               const knot_xdp_msg_t *msg = &msgs[i];
-               kr_require(msg->payload.iov_len <= KNOT_WIRE_MAX_PKTSIZE);
-               knot_pkt_t *kpkt = knot_pkt_new(msg->payload.iov_base, msg->payload.iov_len,
-                                               &the_worker->pkt_pool);
-               if (kpkt == NULL) {
-                       ret = kr_error(ENOMEM);
-               } else {
-                       struct io_comm_data comm = {
-                               .src_addr = (const struct sockaddr *)&msg->ip_from,
-                               .comm_addr = (const struct sockaddr *)&msg->ip_from,
-                               .dst_addr = (const struct sockaddr *)&msg->ip_to
-                       };
-                       ret = worker_submit(xhd->session, &comm,
-                                       msg->eth_from, msg->eth_to, kpkt);
-               }
-               if (ret)
-                       kr_log_debug(XDP, "worker_submit() == %d: %s\n", ret, kr_strerror(ret));
-               mp_flush(the_worker->pkt_pool.ctx);
-       }
-       knot_xdp_recv_finish(xhd->socket, msgs, rcvd);
-}
-/// Warn if the XDP program is running in emulated mode (XDP_SKB)
-static void xdp_warn_mode(const char *ifname)
-{
-       if (kr_fails_assert(ifname))
-               return;
-
-       const unsigned if_index = if_nametoindex(ifname);
-       if (!if_index) {
-               kr_log_warning(XDP, "warning: interface %s, unexpected error when converting its name: %s\n",
-                               ifname, strerror(errno));
-               return;
-       }
-
-       const knot_xdp_mode_t mode = knot_eth_xdp_mode(if_index);
-       switch (mode) {
-       case KNOT_XDP_MODE_FULL:
-               return;
-       case KNOT_XDP_MODE_EMUL:
-               kr_log_warning(XDP, "warning: interface %s running only with XDP emulation\n",
-                               ifname);
-               return;
-       case KNOT_XDP_MODE_NONE: // enum warnings from compiler
-               break;
-       }
-       kr_log_warning(XDP, "warning: interface %s running in unexpected XDP mode %d\n",
-                       ifname, (int)mode);
-}
-int io_listen_xdp(uv_loop_t *loop, struct endpoint *ep, const char *ifname)
-{
-       if (!ep || !ep->handle) {
-               return kr_error(EINVAL);
-       }
-
-       // RLIMIT_MEMLOCK often needs raising when operating on BPF
-       static int ret_limit = 1;
-       if (ret_limit == 1) {
-               struct rlimit no_limit = { RLIM_INFINITY, RLIM_INFINITY };
-               ret_limit = setrlimit(RLIMIT_MEMLOCK, &no_limit)
-                       ? kr_error(errno) : 0;
-       }
-       if (ret_limit) return ret_limit;
-
-       xdp_handle_data_t *xhd = malloc(sizeof(*xhd));
-       if (!xhd) return kr_error(ENOMEM);
-
-       xhd->socket = NULL; // needed for some reason
-
-       // This call is a libknot version hell, unfortunately.
-       int ret = knot_xdp_init(&xhd->socket, ifname, ep->nic_queue,
-               #if KNOT_VERSION_HEX < 0x030100
-                       ep->port ? ep->port : KNOT_XDP_LISTEN_PORT_ALL,
-                       KNOT_XDP_LOAD_BPF_MAYBE
-               #elif KNOT_VERSION_HEX < 0x030200
-                       ep->port ? ep->port : (KNOT_XDP_LISTEN_PORT_PASS | 0),
-                       KNOT_XDP_LOAD_BPF_MAYBE
-               #else
-                       KNOT_XDP_FILTER_UDP | (ep->port ? 0 : KNOT_XDP_FILTER_PASS),
-                       ep->port, 0/*quic_port*/,
-                       KNOT_XDP_LOAD_BPF_MAYBE,
-                       NULL/*xdp_config*/
-               #endif
-               );
-
-       if (!ret) xdp_warn_mode(ifname);
-
-       if (!ret) ret = uv_idle_init(loop, &xhd->tx_waker);
-       if (ret || kr_fails_assert(xhd->socket)) {
-               free(xhd);
-               return ret == 0 ? kr_error(EINVAL) : kr_error(ret);
-       }
-       xhd->tx_waker.data = xhd->socket;
-
-       ep->fd = knot_xdp_socket_fd(xhd->socket); // probably not useful
-       ret = uv_poll_init(loop, (uv_poll_t *)ep->handle, ep->fd);
-       if (ret) {
-               knot_xdp_deinit(xhd->socket);
-               free(xhd);
-               return kr_error(ret);
-       }
-
-       // beware: this sets poll_handle->data
-       xhd->session = session_new(ep->handle, false, false);
-       kr_require(!session_flags(xhd->session)->outgoing);
-       session_get_sockname(xhd->session)->sa_family = AF_XDP; // to have something in there
-
-       ep->handle->data = xhd;
-       ret = uv_poll_start((uv_poll_t *)ep->handle, UV_READABLE, xdp_rx);
-       return ret;
-}
-#endif
-
-
-int io_create(uv_loop_t *loop, uv_handle_t *handle, int type, unsigned family, bool has_tls, bool has_http)
+/* TODO: xdp */
+//#if ENABLE_XDP
+//static void xdp_rx(uv_poll_t* handle, int status, int events)
+//{
+//     const int XDP_RX_BATCH_SIZE = 64;
+//     if (status < 0) {
+//             kr_log_error(XDP, "poll status %d: %s\n", status, uv_strerror(status));
+//             return;
+//     }
+//     if (events != UV_READABLE) {
+//             kr_log_error(XDP, "poll unexpected events: %d\n", events);
+//             return;
+//     }
+//
+//     xdp_handle_data_t *xhd = handle->data;
+//     kr_require(xhd && xhd->session && xhd->socket);
+//     uint32_t rcvd;
+//     knot_xdp_msg_t msgs[XDP_RX_BATCH_SIZE];
+//     int ret = knot_xdp_recv(xhd->socket, msgs, XDP_RX_BATCH_SIZE, &rcvd
+//                     #if KNOT_VERSION_HEX >= 0x030100
+//                     , NULL
+//                     #endif
+//                     );
+//
+//     if (kr_fails_assert(ret == KNOT_EOK)) {
+//             /* ATM other error codes can only be returned when called incorrectly */
+//             kr_log_error(XDP, "knot_xdp_recv(): %d, %s\n", ret, knot_strerror(ret));
+//             return;
+//     }
+//     kr_log_debug(XDP, "poll triggered, processing a batch of %d packets\n", (int)rcvd);
+//     kr_require(rcvd <= XDP_RX_BATCH_SIZE);
+//     for (int i = 0; i < rcvd; ++i) {
+//             const knot_xdp_msg_t *msg = &msgs[i];
+//             kr_require(msg->payload.iov_len <= KNOT_WIRE_MAX_PKTSIZE);
+//             knot_pkt_t *kpkt = knot_pkt_new(msg->payload.iov_base, msg->payload.iov_len,
+//                                             &the_worker->pkt_pool);
+//             if (kpkt == NULL) {
+//                     ret = kr_error(ENOMEM);
+//             } else {
+//                     struct io_comm_data comm = {
+//                             .src_addr = (const struct sockaddr *)&msg->ip_from,
+//                             .comm_addr = (const struct sockaddr *)&msg->ip_from,
+//                             .dst_addr = (const struct sockaddr *)&msg->ip_to
+//                     };
+//                     ret = worker_submit(xhd->session, &comm,
+//                                     msg->eth_from, msg->eth_to, kpkt);
+//             }
+//             if (ret)
+//                     kr_log_debug(XDP, "worker_submit() == %d: %s\n", ret, kr_strerror(ret));
+//             mp_flush(the_worker->pkt_pool.ctx);
+//     }
+//     knot_xdp_recv_finish(xhd->socket, msgs, rcvd);
+//}
+///// Warn if the XDP program is running in emulated mode (XDP_SKB)
+//static void xdp_warn_mode(const char *ifname)
+//{
+//     if (kr_fails_assert(ifname))
+//             return;
+//
+//     const unsigned if_index = if_nametoindex(ifname);
+//     if (!if_index) {
+//             kr_log_warning(XDP, "warning: interface %s, unexpected error when converting its name: %s\n",
+//                             ifname, strerror(errno));
+//             return;
+//     }
+//
+//     const knot_xdp_mode_t mode = knot_eth_xdp_mode(if_index);
+//     switch (mode) {
+//     case KNOT_XDP_MODE_FULL:
+//             return;
+//     case KNOT_XDP_MODE_EMUL:
+//             kr_log_warning(XDP, "warning: interface %s running only with XDP emulation\n",
+//                             ifname);
+//             return;
+//     case KNOT_XDP_MODE_NONE: // enum warnings from compiler
+//             break;
+//     }
+//     kr_log_warning(XDP, "warning: interface %s running in unexpected XDP mode %d\n",
+//                     ifname, (int)mode);
+//}
+//int io_listen_xdp(uv_loop_t *loop, struct endpoint *ep, const char *ifname)
+//{
+//     if (!ep || !ep->handle) {
+//             return kr_error(EINVAL);
+//     }
+//
+//     // RLIMIT_MEMLOCK often needs raising when operating on BPF
+//     static int ret_limit = 1;
+//     if (ret_limit == 1) {
+//             struct rlimit no_limit = { RLIM_INFINITY, RLIM_INFINITY };
+//             ret_limit = setrlimit(RLIMIT_MEMLOCK, &no_limit)
+//                     ? kr_error(errno) : 0;
+//     }
+//     if (ret_limit) return ret_limit;
+//
+//     xdp_handle_data_t *xhd = malloc(sizeof(*xhd));
+//     if (!xhd) return kr_error(ENOMEM);
+//
+//     xhd->socket = NULL; // needed for some reason
+//
+//     // This call is a libknot version hell, unfortunately.
+//     int ret = knot_xdp_init(&xhd->socket, ifname, ep->nic_queue,
+//             #if KNOT_VERSION_HEX < 0x030100
+//                     ep->port ? ep->port : KNOT_XDP_LISTEN_PORT_ALL,
+//                     KNOT_XDP_LOAD_BPF_MAYBE
+//             #elif KNOT_VERSION_HEX < 0x030200
+//                     ep->port ? ep->port : (KNOT_XDP_LISTEN_PORT_PASS | 0),
+//                     KNOT_XDP_LOAD_BPF_MAYBE
+//             #else
+//                     KNOT_XDP_FILTER_UDP | (ep->port ? 0 : KNOT_XDP_FILTER_PASS),
+//                     ep->port, 0/*quic_port*/,
+//                     KNOT_XDP_LOAD_BPF_MAYBE,
+//                     NULL/*xdp_config*/
+//             #endif
+//             );
+//
+//     if (!ret) xdp_warn_mode(ifname);
+//
+//     if (!ret) ret = uv_idle_init(loop, &xhd->tx_waker);
+//     if (ret || kr_fails_assert(xhd->socket)) {
+//             free(xhd);
+//             return ret == 0 ? kr_error(EINVAL) : kr_error(ret);
+//     }
+//     xhd->tx_waker.data = xhd->socket;
+//
+//     ep->fd = knot_xdp_socket_fd(xhd->socket); // probably not useful
+//     ret = uv_poll_init(loop, (uv_poll_t *)ep->handle, ep->fd);
+//     if (ret) {
+//             knot_xdp_deinit(xhd->socket);
+//             free(xhd);
+//             return kr_error(ret);
+//     }
+//
+//     // beware: this sets poll_handle->data
+//     xhd->session = session_new(ep->handle, false, false);
+//     kr_require(!session_flags(xhd->session)->outgoing);
+//     session_get_sockname(xhd->session)->sa_family = AF_XDP; // to have something in there
+//
+//     ep->handle->data = xhd;
+//     ret = uv_poll_start((uv_poll_t *)ep->handle, UV_READABLE, xdp_rx);
+//     return ret;
+//}
+//#endif
+
+int io_create(uv_loop_t *loop, uv_handle_t *handle, int type, unsigned family,
+               enum protolayer_grp grp, bool outgoing)
  {
         int ret = -1;
         if (type == SOCK_DGRAM) {
@@ -1089,7 +1246,7 @@ int io_create(uv_loop_t *loop, uv_handle_t *handle, int type, unsigned family, b
         if (ret != 0) {
                 return ret;
         }
-       struct session *s = session_new(handle, has_tls, has_http);
+       struct session2 *s = session2_new_io(handle, grp, outgoing);
         if (s == NULL) {
                 ret = -1;
         }
@@ -1102,13 +1259,13 @@ static void io_deinit(uv_handle_t *handle)
                 return;
         }
         if (handle->type != UV_POLL) {
-               session_free(handle->data);
+               session2_free(handle->data);
         } else {
         #if ENABLE_XDP
                 xdp_handle_data_t *xhd = handle->data;
                 uv_idle_stop(&xhd->tx_waker);
                 uv_close((uv_handle_t *)&xhd->tx_waker, NULL);
-               session_free(xhd->session);
+               session2_free(xhd->session);
                 knot_xdp_deinit(xhd->socket);
                 free(xhd);
         #else
diff --git a/daemon/io.h b/daemon/io.h

index 0e88dc189f404bba23f4a216c6338ca0e3fe340c..e589231dcda3278978c0252da55ee1315cf9bd43 100644 (file)
--- a/daemon/io.h
+++ b/daemon/io.h
@@ -11,33 +11,14 @@
  #include "lib/generic/array.h"
  #include "daemon/worker.h"
  #include "daemon/engine.h"
+#include "daemon/session2.h"
  
  struct tls_ctx;
  struct tls_client_ctx;
  struct io_stream_data;
  
-/** Communication data. */
-struct io_comm_data {
-       /** The original address the data came from. May be that of a proxied
-        * client, if they came through a proxy. May be `NULL` if
-        * the communication did not come from network. */
-       const struct sockaddr *src_addr;
-
-       /** The actual address the resolver is communicating with. May be
-        * the address of a proxy if the communication came through one,
-        * otherwise it will be the same as `src_addr`. May be `NULL` if
-        * the communication did not come from network. */
-       const struct sockaddr *comm_addr;
-
-       /** The original destination address. May be the resolver's address, or
-        * the address of a proxy if the communication came through one. May be
-        * `NULL` if the communication did not come from network. */
-       const struct sockaddr *dst_addr;
-
-       /** Data parsed from a PROXY header. May be `NULL` if the communication
-        * did not come through a proxy, or if the PROXYv2 protocol was not used. */
-       const struct proxy_result *proxy;
-};
+/** Initializes the protocol layers managed by io. */
+void io_protolayers_init();
  
  /** Bind address into a file-descriptor (only, no libuv).  type is e.g. SOCK_DGRAM */
  int io_bind(const struct sockaddr *addr, int type, const endpoint_flags_t *flags);
@@ -64,7 +45,7 @@ void tcp_timeout_trigger(uv_timer_t *timer);
   * \param family = AF_*
   * \param has_tls has meanings only when type is SOCK_STREAM */
  int io_create(uv_loop_t *loop, uv_handle_t *handle, int type,
-             unsigned family, bool has_tls, bool has_http);
+             unsigned family, enum protolayer_grp grp, bool outgoing);
  void io_free(uv_handle_t *handle);
  
  int io_start_read(uv_handle_t *handle);
@@ -74,7 +55,7 @@ int io_stop_read(uv_handle_t *handle);
   * (Other cases store a direct struct session pointer in ::data.) */
  typedef struct {
         struct knot_xdp_socket *socket;
-       struct session *session;
+       struct session2 *session;
         uv_idle_t tx_waker;
  } xdp_handle_data_t;
  
diff --git a/daemon/main.c b/daemon/main.c

index 5f63ee6df2634321faca65ec8cd93b1e194c0c96..9df6dc79f78084ea011b625c9928965cb8d5f837 100644 (file)
--- a/daemon/main.c
+++ b/daemon/main.c
@@ -569,6 +569,8 @@ int main(int argc, char **argv)
                                 uv_strerror(ret));
         }
  
+       io_protolayers_init();
+
         /* Start listening, in the sense of network_listen_fd(). */
         if (start_listening(&the_args->fds) != 0) {
                 ret = EXIT_FAILURE;
diff --git a/daemon/meson.build b/daemon/meson.build

index 1ff28ec031d26ab7a4d81e82e8dffb5aff416585..b0b119f0dd97e8e2041add0e3c743631791221af 100644 (file)
--- a/daemon/meson.build
+++ b/daemon/meson.build
@@ -14,18 +14,17 @@ kresd_src = files([
    'main.c',
    'network.c',
    'proxyv2.c',
-  'session.c',
    'session2.c',
-  'tls.c',
-  'tls_ephemeral_credentials.c',
-  'tls_session_ticket-srv.c',
+#  'tls.c',
+#  'tls_ephemeral_credentials.c',
+#  'tls_session_ticket-srv.c',
    'udp_queue.c',
    'worker.c',
    'zimport.c',
  ])
-if nghttp2.found()
-  kresd_src += files(['http.c'])
-endif
+#if nghttp2.found()
+#  kresd_src += files(['http.c'])
+#endif
  
  c_src_lint += kresd_src
  
@@ -51,7 +50,7 @@ kresd_deps = [
    gnutls,
    libsystemd,
    capng,
-  nghttp2,
+#  nghttp2,
    malloc,
  ]
  
diff --git a/daemon/network.c b/daemon/network.c

index 737a3b3e2923c9972c977cfb6811e4134a4bb460..706523bc33d6981746b05cd4be4ab7e340b8fbec 100644 (file)
--- a/daemon/network.c
+++ b/daemon/network.c
@@ -72,8 +72,9 @@ void network_init(uv_loop_t *loop, int tcp_backlog)
         the_network->proxy_addrs4 = trie_create(NULL);
         the_network->proxy_addrs6 = trie_create(NULL);
         the_network->tls_client_params = NULL;
-       the_network->tls_session_ticket_ctx = /* unsync. random, by default */
-       tls_session_ticket_ctx_create(loop, NULL, 0);
+       /* TODO: tls */
+//     the_network->tls_session_ticket_ctx = /* unsync. random, by default */
+//                     tls_session_ticket_ctx_create(loop, NULL, 0);
         the_network->tcp.in_idle_timeout = 10000;
         the_network->tcp.tls_handshake_timeout = TLS_MAX_HANDSHAKE_TIME;
         the_network->tcp_backlog = tcp_backlog;
@@ -299,9 +300,10 @@ void network_deinit(void)
         network_proxy_free_addr_data(the_network->proxy_addrs6);
         trie_free(the_network->proxy_addrs6);
  
-       tls_credentials_free(the_network->tls_credentials);
-       tls_client_params_free(the_network->tls_client_params);
-       tls_session_ticket_ctx_destroy(the_network->tls_session_ticket_ctx);
+       /* TODO: tls */
+//     tls_credentials_free(the_network->tls_credentials);
+//     tls_client_params_free(the_network->tls_client_params);
+//     tls_session_ticket_ctx_destroy(the_network->tls_session_ticket_ctx);
  #ifndef NDEBUG
         memset(the_network, 0, sizeof(*the_network));
  #endif
@@ -417,14 +419,14 @@ static int open_endpoint(const char *addr_str,
         }
  
         if (is_xdp) {
-       #if ENABLE_XDP
-               uv_poll_t *ep_handle = malloc(sizeof(uv_poll_t));
-               ep->handle = (uv_handle_t *)ep_handle;
-               ret = !ep->handle ? ENOMEM
-                       : io_listen_xdp(the_network->loop, ep, addr_str);
-       #else
+//     #if ENABLE_XDP
+//             uv_poll_t *ep_handle = malloc(sizeof(uv_poll_t));
+//             ep->handle = (uv_handle_t *)ep_handle;
+//             ret = !ep->handle ? ENOMEM
+//                     : io_listen_xdp(the_network->loop, ep, addr_str);
+//     #else
                 ret = ESOCKTNOSUPPORT;
-       #endif
+//     #endif
                 goto finish_ret;
         } /* else */
  
@@ -842,18 +844,19 @@ int network_close(const char *addr_str, int port)
  
  void network_new_hostname(void)
  {
-       if (the_network->tls_credentials &&
-           the_network->tls_credentials->ephemeral_servicename) {
-               struct tls_credentials *newcreds;
-               newcreds = tls_get_ephemeral_credentials();
-               if (newcreds) {
-                       tls_credentials_release(the_network->tls_credentials);
-                       the_network->tls_credentials = newcreds;
-                       kr_log_info(TLS, "Updated ephemeral X.509 cert with new hostname\n");
-               } else {
-                       kr_log_error(TLS, "Failed to update ephemeral X.509 cert with new hostname, using existing one\n");
-               }
-       }
+       /* TODO: tls */
+//     if (the_network->tls_credentials &&
+//         the_network->tls_credentials->ephemeral_servicename) {
+//             struct tls_credentials *newcreds;
+//             newcreds = tls_get_ephemeral_credentials();
+//             if (newcreds) {
+//                     tls_credentials_release(the_network->tls_credentials);
+//                     the_network->tls_credentials = newcreds;
+//                     kr_log_info(TLS, "Updated ephemeral X.509 cert with new hostname\n");
+//             } else {
+//                     kr_log_error(TLS, "Failed to update ephemeral X.509 cert with new hostname, using existing one\n");
+//             }
+//     }
  }
  
  #ifdef SO_ATTACH_BPF
diff --git a/daemon/proxyv2.c b/daemon/proxyv2.c

index c293a62360e5a48167bfccd23ab857263f0d3cdd..f796aad80bc1841e7ceac9083fe51cb402eefeea 100644 (file)
--- a/daemon/proxyv2.c
+++ b/daemon/proxyv2.c
@@ -167,7 +167,7 @@ bool proxy_allowed(const struct sockaddr *saddr)
         return kr_bitcmp((char *) &addr, (char *) &found->addr, found->netmask) == 0;
  }
  
-ssize_t proxy_process_header(struct proxy_result *out, struct session *s,
+ssize_t proxy_process_header(struct proxy_result *out,
                 const void *buf, const ssize_t nread)
  {
         if (!buf)
@@ -287,5 +287,5 @@ ssize_t proxy_process_header(struct proxy_result *out, struct session *s,
         }
  
  fill_wirebuf:
-       return session_wirebuf_trim(s, hdr_len);
+       return hdr_len;
  }
diff --git a/daemon/proxyv2.h b/daemon/proxyv2.h

index 3cbb5213a77b2c1f2efdedef3d87f191e7f0c7b7..c18e71ce35b90fc29d34534f68031e671ffab6be 100644 (file)
--- a/daemon/proxyv2.h
+++ b/daemon/proxyv2.h
@@ -6,7 +6,7 @@
  
  #include <stdint.h>
  
-#include "daemon/session.h"
+#include "daemon/session2.h"
  #include "lib/utils.h"
  
  extern const char PROXY2_SIGNATURE[12];
@@ -42,8 +42,7 @@ static inline bool proxy_header_present(const void* buf, const ssize_t nread)
  bool proxy_allowed(const struct sockaddr *saddr);
  
  /** Parses the PROXYv2 header from buf of size nread and writes the result into
- * out. The rest of the buffer is moved to free bytes of the specified session's
- * wire buffer. The function assumes that the PROXYv2 signature is present
+ * out. The function assumes that the PROXYv2 signature is present
   * and has been already checked by the caller (like `udp_recv` or `tcp_recv`). */
-ssize_t proxy_process_header(struct proxy_result *out, struct session *s,
+ssize_t proxy_process_header(struct proxy_result *out,
                               const void *buf, ssize_t nread);
diff --git a/daemon/session.c b/daemon/session.c

index 97256be24374e4247a98d24c12acb72d0f3f00a7..2f7ce601221ccd167c9df6b0cf8c1d29c095b926 100644 (file)
--- a/daemon/session.c
+++ b/daemon/session.c
@@ -767,7 +767,7 @@ void session_unpoison(struct session *session)
         kr_asan_unpoison(session, sizeof(*session));
  }
  
-int session_wirebuf_process(struct session *session, struct io_comm_data *comm)
+int session_wirebuf_process(struct session *session, struct comm_info *comm)
  {
         int ret = 0;
         if (session->wire_buf_start_idx == session->wire_buf_end_idx)
diff --git a/daemon/session.h b/daemon/session.h

index eccf45b5f0e98cd8d5841cb61858351a7182147e..66e86fbd11e7c407e27caca0e328e58b29dd34a3 100644 (file)
--- a/daemon/session.h
+++ b/daemon/session.h
@@ -13,7 +13,7 @@
  struct qr_task;
  struct worker_ctx;
  struct session;
-struct io_comm_data;
+struct comm_info;
  struct proxy_result;
  
  struct session_flags {
@@ -140,7 +140,7 @@ size_t session_wirebuf_get_free_size(struct session *session);
  void session_wirebuf_discard(struct session *session);
  /** Move all data to the beginning of the buffer. */
  void session_wirebuf_compress(struct session *session);
-int session_wirebuf_process(struct session *session, struct io_comm_data *comm);
+int session_wirebuf_process(struct session *session, struct comm_info *comm);
  ssize_t session_wirebuf_consume(struct session *session,
                                 const uint8_t *data, ssize_t len);
  /** Trims `len` bytes from the start of the session's wire buffer.
diff --git a/daemon/session2.c b/daemon/session2.c

index 0dcb0134b8f058aa17990864d67ce703073b4785..49dcecbda8aac4ad744124579eea84c2c695940f 100644 (file)
--- a/daemon/session2.c
+++ b/daemon/session2.c
@@ -7,21 +7,24 @@
  
  #include "lib/log.h"
  #include "lib/utils.h"
+#include "daemon/io.h"
+#include "daemon/worker.h"
  
  #include "daemon/session2.h"
  
  
-typedef void (*session2_push_cb)(struct session2 *s, int status,
-                                 void *target, void *baton);
-
  static int session2_transport_pushv(struct session2 *s,
                                      const struct iovec *iov, int iovcnt,
-                                    void *target,
-                                    session2_push_cb cb, void *baton);
+                                    const void *target,
+                                    protolayer_finished_cb cb, void *baton);
  static inline int session2_transport_push(struct session2 *s,
                                            char *buf, size_t buf_len,
-                                          void *target,
-                                          session2_push_cb cb, void *baton);
+                                          const void *target,
+                                          protolayer_finished_cb cb, void *baton);
+static int session2_transport_event(struct session2 *s,
+                                    struct protolayer_event event,
+                                    const void *target,
+                                    protolayer_finished_cb cb, void *baton);
  
  struct protolayer_globals protolayer_globals[PROTOLAYER_PROTOCOL_COUNT] = {0};
  
@@ -55,17 +58,53 @@ enum protolayer_protocol protolayer_grp_doh[] = {
  
  
  enum protolayer_protocol *protolayer_grps[PROTOLAYER_GRP_COUNT] = {
-#define XX(id, name, desc) [PROTOLAYER_GRP_##id] = protolayer_grp_##name,
+#define XX(cid, vid, name) [PROTOLAYER_GRP_##cid] = protolayer_grp_##vid,
         PROTOLAYER_GRP_MAP(XX)
  #undef XX
  };
  
-char *protolayer_grp_descs[PROTOLAYER_GRP_COUNT] = {
-#define XX(id, name, desc) [PROTOLAYER_GRP_##id] = desc,
+char *protolayer_grp_names[PROTOLAYER_GRP_COUNT] = {
+       [PROTOLAYER_GRP_NULL] = "(null)",
+#define XX(cid, vid, name) [PROTOLAYER_GRP_##cid] = name,
         PROTOLAYER_GRP_MAP(XX)
  #undef XX
  };
  
+char *protolayer_event_names[PROTOLAYER_EVENT_COUNT] = {
+       [PROTOLAYER_EVENT_NULL] = "(null)",
+#define XX(cid) [PROTOLAYER_EVENT_##cid] = #cid,
+       PROTOLAYER_EVENT_MAP(XX)
+#undef XX
+};
+
+char *protolayer_payload_names[PROTOLAYER_PAYLOAD_COUNT] = {
+       [PROTOLAYER_PAYLOAD_NULL] = "(null)",
+#define XX(cid, name) [PROTOLAYER_PAYLOAD_##cid] = name,
+       PROTOLAYER_PAYLOAD_MAP(XX)
+#undef XX
+};
+
+
+struct protolayer_payload protolayer_as_buffer(const struct protolayer_payload *payload)
+{
+       if (payload->type == PROTOLAYER_PAYLOAD_BUFFER)
+               return *payload;
+
+       if (payload->type == PROTOLAYER_PAYLOAD_WIRE_BUF)
+               return (struct protolayer_payload){
+                       .type = PROTOLAYER_PAYLOAD_BUFFER,
+                       .buffer = {
+                               .buf = wire_buf_data(payload->wire_buf),
+                               .len = wire_buf_data_length(payload->wire_buf)
+                       }
+               };
+
+       kr_assert(false && "Unsupported payload type.");
+       return (struct protolayer_payload){
+               .type = PROTOLAYER_PAYLOAD_NULL
+       };
+}
+
  
  /** Gets context for the layer with the specified index from the manager. */
  static inline struct protolayer_data *protolayer_manager_get(
@@ -79,6 +118,14 @@ static inline struct protolayer_data *protolayer_manager_get(
         return (struct protolayer_data *)(pl_data_beg + offsets[layer_ix]);
  }
  
+static inline bool protolayer_cb_ctx_is_last(struct protolayer_cb_ctx *ctx)
+{
+       unsigned int last_ix = (ctx->direction == PROTOLAYER_UNWRAP)
+               ? ctx->manager->num_layers - 1
+               : 0;
+       return ctx->layer_ix == last_ix;
+}
+
  static inline void protolayer_cb_ctx_next(struct protolayer_cb_ctx *ctx)
  {
         if (ctx->direction == PROTOLAYER_UNWRAP)
@@ -88,26 +135,43 @@ static inline void protolayer_cb_ctx_next(struct protolayer_cb_ctx *ctx)
  }
  
  static int protolayer_cb_ctx_finish(struct protolayer_cb_ctx *ctx, int ret,
-                                    bool reset_layers)
+                                    bool deinit_iter_data)
  {
-       if (reset_layers) {
+       struct session2 *session = ctx->manager->session;
+
+       if (deinit_iter_data) {
                 struct protolayer_manager *m = ctx->manager;
                 struct protolayer_globals *globals = &protolayer_globals[m->grp];
                 for (size_t i = 0; i < m->num_layers; i++) {
                         struct protolayer_data *d = protolayer_manager_get(m, i);
-                       if (globals->reset)
-                               globals->reset(m, d);
+                       if (globals->iter_deinit)
+                               globals->iter_deinit(m, d);
                 }
+               m->iter_data_inited = false;
         }
  
+       if (ret)
+               kr_log_debug(PROTOLAYER, "layer context of group '%s' ended with return code %d\n",
+                               protolayer_grp_names[ctx->manager->grp], ret);
+
         if (ctx->status)
-               kr_log_debug(PROTOLAYER, "layer iteration of group '%s' ended with status %d",
-                               protolayer_grp_descs[ctx->manager->grp], ret);
+               kr_log_debug(PROTOLAYER, "layer %u iteration of group '%s' ended with status %d\n",
+                               ctx->layer_ix, protolayer_grp_names[ctx->manager->grp], ctx->status);
  
         if (ctx->finished_cb)
-               ctx->finished_cb(ret, ctx->finished_cb_target,
+               ctx->finished_cb(ret, session, ctx->finished_cb_target,
                                 ctx->finished_cb_baton);
+
+       /* events bounce back from unwrap to wrap */
+       bool bounce_back = (ctx->direction == PROTOLAYER_UNWRAP
+                       && ret == PROTOLAYER_RET_NORMAL
+                       && !ctx->status
+                       && ctx->payload.type == PROTOLAYER_PAYLOAD_EVENT);
+       if (bounce_back)
+               session2_wrap(session, ctx->payload, NULL, NULL, NULL);
+
         free(ctx);
+
         return ret;
  }
  
@@ -129,31 +193,42 @@ static int protolayer_step(struct protolayer_cb_ctx *ctx)
                 enum protolayer_protocol protocol = ldata->protocol;
                 struct protolayer_globals *globals = &protolayer_globals[protocol];
  
+               enum protolayer_cb_result result = PROTOLAYER_CB_RESULT_MAGIC;
                 if (!ldata->processed) { /* Avoid repetition */
                         ctx->async_mode = false;
                         ctx->status = 0;
-                       ctx->result = PROTOLAYER_CB_NULL;
+                       ctx->action = PROTOLAYER_CB_ACTION_NULL;
  
                         protolayer_cb cb = (ctx->direction == PROTOLAYER_UNWRAP)
                                 ? globals->unwrap : globals->wrap;
  
-                       cb(ldata, ctx);
+                       if (cb)
+                               result = cb(ldata, ctx);
+                       else
+                               ctx->action = PROTOLAYER_CB_ACTION_CONTINUE;
                         ldata->processed = true;
+               } else {
+                       kr_assert(false && "Repeated protocol layer step");
                 }
  
-               if (!ctx->result) {
+               if (kr_fails_assert(result == PROTOLAYER_CB_RESULT_MAGIC)) {
+                       /* Callback did not use a continuation function to return. */
+                       return kr_error(EINVAL);
+               }
+
+               if (!ctx->action) {
+                       /* Next step is from a callback */
                         ctx->async_mode = true;
-                       return PROTOLAYER_RET_ASYNC; /* Next step is callback */
+                       return PROTOLAYER_RET_ASYNC;
                 }
  
-               if (ctx->result == PROTOLAYER_CB_WAIT) {
+               if (ctx->action == PROTOLAYER_CB_ACTION_WAIT) {
                         kr_assert(ctx->status == 0);
                         return protolayer_cb_ctx_finish(
                                         ctx, PROTOLAYER_RET_WAITING, false);
                 }
  
-               if (ctx->result == PROTOLAYER_CB_BREAK) {
-                       kr_assert(ctx->status <= 0);
+               if (ctx->action == PROTOLAYER_CB_ACTION_BREAK) {
                         return protolayer_cb_ctx_finish(
                                         ctx, PROTOLAYER_RET_NORMAL, true);
                 }
@@ -164,13 +239,17 @@ static int protolayer_step(struct protolayer_cb_ctx *ctx)
                                         ctx, kr_error(ECANCELED), true);
                 }
  
-               if (ctx->result == PROTOLAYER_CB_CONTINUE) {
+               if (ctx->action == PROTOLAYER_CB_ACTION_CONTINUE) {
+                       if (protolayer_cb_ctx_is_last(ctx))
+                               return protolayer_cb_ctx_finish(
+                                               ctx, PROTOLAYER_RET_NORMAL, true);
+
                         protolayer_cb_ctx_next(ctx);
                         continue;
                 }
  
                 /* Should never get here */
-               kr_assert(false);
+               kr_assert(false && "Invalid layer callback action");
                 return protolayer_cb_ctx_finish(ctx, kr_error(EINVAL), true);
         }
  }
@@ -185,7 +264,7 @@ static int protolayer_step(struct protolayer_cb_ctx *ctx)
  static int protolayer_manager_submit(
                 struct protolayer_manager *manager,
                 enum protolayer_direction direction,
-               char *buf, size_t buf_len, void *target,
+               struct protolayer_payload payload, const void *target,
                 protolayer_finished_cb cb, void *baton)
  {
         size_t layer_ix = (direction == PROTOLAYER_UNWRAP)
@@ -194,8 +273,31 @@ static int protolayer_manager_submit(
         struct protolayer_cb_ctx *ctx = malloc(sizeof(*ctx)); // TODO - mempool?
         kr_require(ctx);
  
+       if (kr_log_is_debug(PROTOLAYER, NULL)) {
+               const char *sess_dir = manager->session->outgoing ? "out" : "in";
+               const char *event_name = (payload.type == PROTOLAYER_PAYLOAD_EVENT)
+                       ? protolayer_event_names[payload.event.type]
+                       : "";
+               const char *event_space = (payload.type == PROTOLAYER_PAYLOAD_EVENT) ? " " : "";
+               kr_log_debug(PROTOLAYER, "[%s] %s%s%s submitted to grp '%s' in %s direction\n",
+                               sess_dir,
+                               protolayer_payload_names[payload.type],
+                               event_space, event_name,
+                               protolayer_grp_names[manager->grp],
+                               (direction == PROTOLAYER_UNWRAP) ? "unwrap" : "wrap");
+       }
+
+       for (size_t i = 0; i < manager->num_layers; i++) {
+               struct protolayer_data *data = protolayer_manager_get(manager, i);
+               data->processed = false;
+               struct protolayer_globals *globals = &protolayer_globals[data->protocol];
+               if (globals->iter_init)
+                       globals->iter_init(manager, data);
+       }
+
         *ctx = (struct protolayer_cb_ctx) {
-               .data = { .target = target },
+               .payload = payload,
+               .target = target,
                 .direction = direction,
                 .layer_ix = layer_ix,
                 .manager = manager,
@@ -203,7 +305,6 @@ static int protolayer_manager_submit(
                 .finished_cb_target = target,
                 .finished_cb_baton = baton
         };
-       protolayer_set_buffer(ctx, buf, buf_len);
  
         return protolayer_step(ctx);
  }
@@ -234,13 +335,17 @@ struct protolayer_manager *protolayer_manager_new(struct session2 *s,
         size_t total_data_size = 0;
         for (size_t i = 0; i < num_layers; i++) {
                 offsets[i] = total_data_size;
-               size_t d = protolayer_globals[protocols[i]].data_size;
-               size += ALIGN_TO(d, CPU_STRUCT_ALIGN);
+               total_data_size += ALIGN_TO(sizeof(struct protolayer_data),
+                               CPU_STRUCT_ALIGN);
+               total_data_size += ALIGN_TO(protolayer_globals[protocols[i]].sess_size,
+                               CPU_STRUCT_ALIGN);
+               total_data_size += ALIGN_TO(protolayer_globals[protocols[i]].iter_size,
+                               CPU_STRUCT_ALIGN);
         }
         size += total_data_size;
  
         /* Allocate and initialize manager */
-       struct protolayer_manager *m = malloc(size);
+       struct protolayer_manager *m = calloc(1, size);
         kr_require(m);
         m->grp = grp;
         m->session = s;
@@ -252,8 +357,9 @@ struct protolayer_manager *protolayer_manager_new(struct session2 *s,
                 struct protolayer_globals *globals = &protolayer_globals[protocols[i]];
                 struct protolayer_data *data = protolayer_manager_get(m, i);
                 data->protocol = protocols[i];
-               data->size = globals->data_size;
-               globals->init(m, data);
+               data->sess_size = ALIGN_TO(globals->sess_size, CPU_STRUCT_ALIGN);
+               if (globals->sess_init)
+                       globals->sess_init(m, data);
         }
  
         return m;
@@ -265,128 +371,542 @@ void protolayer_manager_free(struct protolayer_manager *m)
  
         for (size_t i = 0; i < m->num_layers; i++) {
                 struct protolayer_data *data = protolayer_manager_get(m, i);
-               protolayer_globals[data->protocol].deinit(m, data);
+               struct protolayer_globals *globals = &protolayer_globals[data->protocol];
+               if (globals->sess_deinit)
+                       globals->sess_deinit(m, data);
         }
  
         free(m);
  }
  
-void protolayer_continue(struct protolayer_cb_ctx *ctx)
+enum protolayer_cb_result protolayer_continue(struct protolayer_cb_ctx *ctx)
  {
         if (ctx->async_mode) {
                 protolayer_cb_ctx_next(ctx);
                 protolayer_step(ctx);
         } else {
-               ctx->result = PROTOLAYER_CB_CONTINUE;
+               ctx->action = PROTOLAYER_CB_ACTION_CONTINUE;
         }
+       return PROTOLAYER_CB_RESULT_MAGIC;
  }
  
-void protolayer_wait(struct protolayer_cb_ctx *ctx)
+enum protolayer_cb_result protolayer_wait(struct protolayer_cb_ctx *ctx)
  {
         if (ctx->async_mode) {
                 protolayer_cb_ctx_finish(ctx, PROTOLAYER_RET_WAITING, false);
         } else {
-               ctx->result = PROTOLAYER_CB_WAIT;
+               ctx->action = PROTOLAYER_CB_ACTION_WAIT;
         }
+       return PROTOLAYER_CB_RESULT_MAGIC;
  }
  
-void protolayer_break(struct protolayer_cb_ctx *ctx, int status)
+enum protolayer_cb_result protolayer_break(struct protolayer_cb_ctx *ctx, int status)
  {
         ctx->status = status;
         if (ctx->async_mode) {
                 protolayer_cb_ctx_finish(ctx, PROTOLAYER_RET_NORMAL, true);
         } else {
-               ctx->result = PROTOLAYER_CB_BREAK;
+               ctx->action = PROTOLAYER_CB_ACTION_BREAK;
         }
+       return PROTOLAYER_CB_RESULT_MAGIC;
  }
  
-static void protolayer_push_finished(struct session2 *s, int status, void *target, void *baton)
+static void protolayer_push_finished(int status, struct session2 *s, const void *target, void *baton)
  {
-       protolayer_break(baton, status);
+       struct protolayer_cb_ctx *ctx = baton;
+       if (ctx->converted_wire_buf) {
+               wire_buf_reset(ctx->converted_wire_buf);
+               ctx->converted_wire_buf = NULL;
+       }
+       protolayer_break(ctx, status);
  }
  
-void protolayer_pushv(struct protolayer_cb_ctx *ctx,
-                      struct iovec *iov, int iovcnt,
-                      void *target)
+enum protolayer_cb_result protolayer_push(struct protolayer_cb_ctx *ctx)
  {
-       int ret = session2_transport_pushv(ctx->manager->session, iov, iovcnt,
-                       target, protolayer_push_finished, ctx);
+       int ret;
+       struct session2 *session = ctx->manager->session;
+
+       if (ctx->payload.type == PROTOLAYER_PAYLOAD_WIRE_BUF) {
+               ctx->converted_wire_buf = ctx->payload.wire_buf;
+               ctx->payload = protolayer_as_buffer(&ctx->payload);
+       }
+
+       if (kr_log_is_debug(PROTOLAYER, NULL)) {
+               kr_log_debug(PROTOLAYER, "Pushing %s\n",
+                               protolayer_payload_names[ctx->payload.type]);
+       }
+
+       if (ctx->payload.type == PROTOLAYER_PAYLOAD_BUFFER) {
+               ret = session2_transport_push(session,
+                               ctx->payload.buffer.buf, ctx->payload.buffer.len,
+                               ctx->target, protolayer_push_finished, ctx);
+       } else if (ctx->payload.type == PROTOLAYER_PAYLOAD_IOVEC) {
+               ret = session2_transport_pushv(session,
+                               ctx->payload.iovec.iov, ctx->payload.iovec.cnt,
+                               ctx->target, protolayer_push_finished, ctx);
+       } else if (ctx->payload.type == PROTOLAYER_PAYLOAD_EVENT) {
+               ret = session2_transport_event(session,
+                               ctx->payload.event,
+                               ctx->target, protolayer_push_finished, ctx);
+       } else {
+               kr_assert(false && "Invalid payload type");
+               ret = kr_error(EINVAL);
+       }
+
+       /* Push error - otherwise the callback will be called by a push
+        * function called above. */
         if (ret && ctx->finished_cb)
-               ctx->finished_cb(ret, ctx->finished_cb_target,
+               ctx->finished_cb(ret, session, ctx->finished_cb_target,
                                 ctx->finished_cb_baton);
+
+       return PROTOLAYER_CB_RESULT_MAGIC;
  }
  
-void protolayer_push(struct protolayer_cb_ctx *ctx, char *buf, size_t buf_len,
-                     void *target)
+
+int wire_buf_init(struct wire_buf *wb, size_t initial_size)
  {
-       int ret = session2_transport_push(ctx->manager->session, buf, buf_len,
-                       target, protolayer_push_finished, ctx);
-       if (ret && ctx->finished_cb)
-               ctx->finished_cb(ret, ctx->finished_cb_target,
-                               ctx->finished_cb_baton);
+       char *buf = malloc(initial_size);
+       kr_require(buf);
+
+       *wb = (struct wire_buf){
+               .buf = buf,
+               .size = initial_size
+       };
+
+       return kr_ok();
+}
+
+void wire_buf_deinit(struct wire_buf *wb)
+{
+       free(wb->buf);
+}
+
+int wire_buf_reserve(struct wire_buf *wb, size_t size)
+{
+       if (wb->buf && wb->size >= size)
+               return kr_ok();
+
+       wb->buf = realloc(wb->buf, size);
+       kr_require(wb->buf);
+       wb->size = size;
+       return kr_ok();
+}
+
+int wire_buf_consume(struct wire_buf *wb, size_t length)
+{
+       size_t ne = wb->end + length;
+       if (kr_fails_assert(wb->buf && ne <= wb->size))
+               return kr_error(EINVAL);
+
+       wb->end = ne;
+       return kr_ok();
+}
+
+int wire_buf_trim(struct wire_buf *wb, size_t length)
+{
+       size_t ns = wb->start + length;
+       if (kr_fails_assert(ns <= wb->end))
+               return kr_error(EINVAL);
+
+       wb->start = ns;
+       return kr_ok();
+}
+
+int wire_buf_movestart(struct wire_buf *wb)
+{
+       if (kr_fails_assert(wb->buf))
+               return kr_error(EINVAL);
+       if (wb->start == 0)
+               return kr_ok();
+
+       size_t len = wire_buf_data_length(wb);
+       if (len)
+               memmove(wb->buf, wire_buf_data(wb), len);
+       wb->end -= wb->start;
+       wb->start = 0;
+       return kr_ok();
+}
+
+int wire_buf_reset(struct wire_buf *wb)
+{
+       wb->start = 0;
+       wb->end = 0;
+       wb->error = false;
+       return kr_ok();
  }
  
  
  struct session2 *session2_new(enum session2_transport_type transport_type,
-                              void *transport_ctx,
                                enum protolayer_grp layer_grp,
                                bool outgoing)
  {
-       kr_require(transport_type && transport_ctx && layer_grp);
+       kr_require(transport_type && layer_grp);
  
         struct session2 *s = malloc(sizeof(*s));
         kr_require(s);
  
-       s->transport.type = transport_type;
-       s->transport.ctx = transport_ctx;
-
-       s->layers = protolayer_manager_new(s, layer_grp);
-       if (!s->layers) {
+       struct protolayer_manager *layers = protolayer_manager_new(s, layer_grp);
+       if (!layers) {
                 free(s);
                 return NULL;
         }
  
-       s->outgoing = outgoing;
+       *s = (struct session2) {
+               .transport = {
+                       .type = transport_type,
+               },
+               .layers = layers,
+               .outgoing = outgoing,
+               .tasks = trie_create(NULL),
+       };
+
+       mm_ctx_mempool(&s->pool, 4 * CPU_PAGE_SIZE);
+       queue_init(s->waiting);
+
+       int ret = wire_buf_init(&s->wire_buf, KNOT_WIRE_MAX_PKTSIZE);
+       kr_require(!ret);
+
+       ret = uv_timer_init(uv_default_loop(), &s->timer);
+       kr_require(!ret);
+       s->timer.data = s;
+
+       session2_touch(s);
  
         return s;
  }
  
-void session2_free(struct session2 *s)
+static void session2_timer_on_close(uv_handle_t *handle)
  {
+       struct session2 *s = handle->data;
         protolayer_manager_free(s->layers);
         free(s);
  }
  
-int session2_unwrap(struct session2 *s, char *buf, size_t buf_len, void *target,
-                    protolayer_finished_cb cb, void *baton)
+void session2_free(struct session2 *s)
+{
+       trie_free(s->tasks);
+       queue_deinit(s->waiting);
+       uv_close((uv_handle_t *)&s->timer, session2_timer_on_close);
+}
+
+int session2_start_read(struct session2 *session)
+{
+       if (session->transport.type == SESSION2_TRANSPORT_IO)
+               return io_start_read(session->transport.io.handle);
+
+       /* TODO - probably just some event for this */
+       kr_assert(false && "Parent start_read unsupported");
+       return kr_error(EINVAL);
+}
+
+int session2_stop_read(struct session2 *session)
+{
+       if (session->transport.type == SESSION2_TRANSPORT_IO)
+               return io_stop_read(session->transport.io.handle);
+
+       /* TODO - probably just some event for this */
+       kr_assert(false && "Parent stop_read unsupported");
+       return kr_error(EINVAL);
+}
+
+struct sockaddr *session2_get_peer(struct session2 *s)
+{
+       while (s && s->transport.type == SESSION2_TRANSPORT_PARENT)
+               s = s->transport.parent;
+
+       return (s && s->transport.type == SESSION2_TRANSPORT_IO)
+               ? &s->transport.io.peer.ip
+               : NULL;
+}
+
+struct sockaddr *session2_get_sockname(struct session2 *s)
+{
+       while (s && s->transport.type == SESSION2_TRANSPORT_PARENT)
+               s = s->transport.parent;
+
+       return (s && s->transport.type == SESSION2_TRANSPORT_IO)
+               ? &s->transport.io.sockname.ip
+               : NULL;
+}
+
+uv_handle_t *session2_get_handle(struct session2 *s)
+{
+       while (s && s->transport.type == SESSION2_TRANSPORT_PARENT)
+               s = s->transport.parent;
+
+       return (s && s->transport.type == SESSION2_TRANSPORT_IO)
+               ? s->transport.io.handle
+               : NULL;
+}
+
+static void session2_on_timeout(uv_timer_t *timer)
+{
+       struct session2 *s = timer->data;
+       protolayer_manager_submit(s->layers, s->timer_direction,
+                       protolayer_event_nd(PROTOLAYER_EVENT_TIMEOUT),
+                       NULL, NULL, NULL);
+}
+
+int session2_timer_start(struct session2 *s, uint64_t timeout, uint64_t repeat,
+                          enum protolayer_direction direction)
+{
+       s->timer_direction = direction;
+       return uv_timer_start(&s->timer, session2_on_timeout, timeout, repeat);
+}
+
+int session2_timer_restart(struct session2 *s)
+{
+       return uv_timer_again(&s->timer);
+}
+
+int session2_timer_stop(struct session2 *s)
+{
+       return uv_timer_stop(&s->timer);
+}
+
+int session2_tasklist_add(struct session2 *session, struct qr_task *task)
+{
+       trie_t *t = session->tasks;
+       uint16_t task_msg_id = 0;
+       const char *key = NULL;
+       size_t key_len = 0;
+       if (session->outgoing) {
+               knot_pkt_t *pktbuf = worker_task_get_pktbuf(task);
+               task_msg_id = knot_wire_get_id(pktbuf->wire);
+               key = (const char *)&task_msg_id;
+               key_len = sizeof(task_msg_id);
+       } else {
+               key = (const char *)&task;
+               key_len = sizeof(char *);
+       }
+       trie_val_t *v = trie_get_ins(t, key, key_len);
+       if (kr_fails_assert(v))
+               return kr_error(ENOMEM);
+       if (*v == NULL) {
+               *v = task;
+               worker_task_ref(task);
+       } else if (kr_fails_assert(*v == task)) {
+               return kr_error(EINVAL);
+       }
+       return kr_ok();
+}
+
+int session2_tasklist_del(struct session2 *session, struct qr_task *task)
+{
+       trie_t *t = session->tasks;
+       uint16_t task_msg_id = 0;
+       const char *key = NULL;
+       size_t key_len = 0;
+       trie_val_t val;
+       if (session->outgoing) {
+               knot_pkt_t *pktbuf = worker_task_get_pktbuf(task);
+               task_msg_id = knot_wire_get_id(pktbuf->wire);
+               key = (const char *)&task_msg_id;
+               key_len = sizeof(task_msg_id);
+       } else {
+               key = (const char *)&task;
+               key_len = sizeof(char *);
+       }
+       int ret = trie_del(t, key, key_len, &val);
+       if (ret == KNOT_EOK) {
+               kr_require(val == task);
+               worker_task_unref(val);
+       }
+       return ret;
+}
+
+struct qr_task *session2_tasklist_get_first(struct session2 *session)
+{
+       trie_val_t *val = trie_get_first(session->tasks, NULL, NULL);
+       return val ? (struct qr_task *) *val : NULL;
+}
+
+struct qr_task *session2_tasklist_del_first(struct session2 *session, bool deref)
+{
+       trie_val_t val = NULL;
+       int res = trie_del_first(session->tasks, NULL, NULL, &val);
+       if (res != KNOT_EOK) {
+               val = NULL;
+       } else if (deref) {
+               worker_task_unref(val);
+       }
+       return (struct qr_task *)val;
+}
+
+struct qr_task *session2_tasklist_find_msgid(const struct session2 *session, uint16_t msg_id)
+{
+       if (kr_fails_assert(session->outgoing))
+               return NULL;
+       trie_t *t = session->tasks;
+       struct qr_task *ret = NULL;
+       const char *key = (const char *)&msg_id;
+       size_t key_len = sizeof(msg_id);
+       trie_val_t val;
+       int res = trie_del(t, key, key_len, &val);
+       if (res == KNOT_EOK) {
+               if (worker_task_numrefs(val) > 1) {
+                       ret = val;
+               }
+               worker_task_unref(val);
+       }
+       return ret;
+}
+
+struct qr_task *session2_tasklist_del_msgid(const struct session2 *session, uint16_t msg_id)
+{
+       if (kr_fails_assert(session->outgoing))
+               return NULL;
+       trie_t *t = session->tasks;
+       struct qr_task *ret = NULL;
+       trie_val_t *val = trie_get_try(t, (char *)&msg_id, sizeof(msg_id));
+       if (val) {
+               ret = *val;
+       }
+       return ret;
+}
+
+void session2_tasklist_finalize(struct session2 *session, int status)
+{
+       while (session2_tasklist_get_len(session) > 0) {
+               struct qr_task *t = session2_tasklist_del_first(session, false);
+               kr_require(worker_task_numrefs(t) > 0);
+               worker_task_finalize(t, status);
+               worker_task_unref(t);
+       }
+}
+
+int session2_tasklist_finalize_expired(struct session2 *session)
+{
+       int ret = 0;
+       queue_t(struct qr_task *) q;
+       uint64_t now = kr_now();
+       trie_t *t = session->tasks;
+       trie_it_t *it;
+       queue_init(q);
+       for (it = trie_it_begin(t); !trie_it_finished(it); trie_it_next(it)) {
+               trie_val_t *v = trie_it_val(it);
+               struct qr_task *task = (struct qr_task *)*v;
+               if ((now - worker_task_creation_time(task)) >= KR_RESOLVE_TIME_LIMIT) {
+                       struct kr_request *req = worker_task_request(task);
+                       if (!kr_fails_assert(req))
+                               kr_query_inform_timeout(req, req->current_query);
+                       queue_push(q, task);
+                       worker_task_ref(task);
+               }
+       }
+       trie_it_free(it);
+
+       struct qr_task *task = NULL;
+       uint16_t msg_id = 0;
+       char *key = (char *)&task;
+       int32_t keylen = sizeof(struct qr_task *);
+       if (session->outgoing) {
+               key = (char *)&msg_id;
+               keylen = sizeof(msg_id);
+       }
+       while (queue_len(q) > 0) {
+               task = queue_head(q);
+               if (session->outgoing) {
+                       knot_pkt_t *pktbuf = worker_task_get_pktbuf(task);
+                       msg_id = knot_wire_get_id(pktbuf->wire);
+               }
+               int res = trie_del(t, key, keylen, NULL);
+               if (!worker_task_finished(task)) {
+                       /* task->pending_count must be zero,
+                        * but there are can be followers,
+                        * so run worker_task_subreq_finalize() to ensure retrying
+                        * for all the followers. */
+                       worker_task_subreq_finalize(task);
+                       worker_task_finalize(task, KR_STATE_FAIL);
+               }
+               if (res == KNOT_EOK) {
+                       worker_task_unref(task);
+               }
+               queue_pop(q);
+               worker_task_unref(task);
+               ++ret;
+       }
+
+       queue_deinit(q);
+       return ret;
+}
+
+int session2_waitinglist_push(struct session2 *session, struct qr_task *task)
+{
+       queue_push(session->waiting, task);
+       worker_task_ref(task);
+       return kr_ok();
+}
+
+struct qr_task *session2_waitinglist_get(const struct session2 *session)
+{
+       return (queue_len(session->waiting) > 0) ? (queue_head(session->waiting)) : NULL;
+}
+
+struct qr_task *session2_waitinglist_pop(struct session2 *session, bool deref)
+{
+       struct qr_task *t = session2_waitinglist_get(session);
+       queue_pop(session->waiting);
+       if (deref) {
+               worker_task_unref(t);
+       }
+       return t;
+}
+
+void session2_waitinglist_retry(struct session2 *session, bool increase_timeout_cnt)
+{
+       while (!session2_waitinglist_is_empty(session)) {
+               struct qr_task *task = session2_waitinglist_pop(session, false);
+               if (increase_timeout_cnt) {
+                       worker_task_timeout_inc(task);
+               }
+               worker_task_step(task, session2_get_peer(session), NULL);
+               worker_task_unref(task);
+       }
+}
+
+void session2_waitinglist_finalize(struct session2 *session, int status)
+{
+       while (!session2_waitinglist_is_empty(session)) {
+               struct qr_task *t = session2_waitinglist_pop(session, false);
+               worker_task_finalize(t, status);
+               worker_task_unref(t);
+       }
+}
+
+int session2_unwrap(struct session2 *s, struct protolayer_payload payload,
+                    const void *target, protolayer_finished_cb cb, void *baton)
  {
         return protolayer_manager_submit(s->layers, PROTOLAYER_UNWRAP,
-                       buf, buf_len, target, cb, baton);
+                       payload, target, cb, baton);
  }
  
-int session2_wrap(struct session2 *s, char *buf, size_t buf_len, void *target,
-                  protolayer_finished_cb cb, void *baton)
+int session2_wrap(struct session2 *s, struct protolayer_payload payload,
+                  const void *target, protolayer_finished_cb cb, void *baton)
  {
         return protolayer_manager_submit(s->layers, PROTOLAYER_WRAP,
-                       buf, buf_len, target, cb, baton);
+                       payload, target, cb, baton);
  }
  
  
  struct parent_pushv_ctx {
         struct session2 *session;
-       session2_push_cb cb;
-       void *target;
+       protolayer_finished_cb cb;
+       const void *target;
         void *baton;
  
         char *buf;
         size_t buf_len;
  };
  
-static void session2_transport_parent_pushv_finished(int status, void *target, void *baton)
+static void session2_transport_parent_pushv_finished(int status,
+                                                     struct session2 *session,
+                                                     const void *target,
+                                                     void *baton)
  {
         struct parent_pushv_ctx *ctx = baton;
         if (ctx->cb)
-               ctx->cb(ctx->session, status, target, ctx->baton);
+               ctx->cb(status, ctx->session, target, ctx->baton);
         free(ctx->buf);
         free(ctx);
  }
@@ -395,7 +915,7 @@ static void session2_transport_udp_pushv_finished(uv_udp_send_t *req, int status
  {
         struct parent_pushv_ctx *ctx = req->data;
         if (ctx->cb)
-               ctx->cb(ctx->session, status, ctx->target, ctx->baton);
+               ctx->cb(status, ctx->session, ctx->target, ctx->baton);
         free(ctx->buf);
         free(ctx);
         free(req);
@@ -405,7 +925,7 @@ static void session2_transport_stream_pushv_finished(uv_write_t *req, int status
  {
         struct parent_pushv_ctx *ctx = req->data;
         if (ctx->cb)
-               ctx->cb(ctx->session, status, ctx->target, ctx->baton);
+               ctx->cb(status, ctx->session, ctx->target, ctx->baton);
         free(ctx->buf);
         free(ctx);
         free(req);
@@ -448,15 +968,15 @@ static int concat_iovs(const struct iovec *iov, int iovcnt, char **buf, size_t *
  
  static int session2_transport_pushv(struct session2 *s,
                                      const struct iovec *iov, int iovcnt,
-                                    void *target,
-                                    session2_push_cb cb, void *baton)
+                                    const void *target,
+                                    protolayer_finished_cb cb, void *baton)
  {
         if (kr_fails_assert(s))
                 return kr_error(EINVAL);
  
         struct parent_pushv_ctx *ctx = malloc(sizeof(*ctx));
         kr_require(ctx);
-       *ctx = (struct parent_pushv_ctx) {
+       *ctx = (struct parent_pushv_ctx){
                 .session = s,
                 .cb = cb,
                 .baton = baton,
@@ -464,8 +984,8 @@ static int session2_transport_pushv(struct session2 *s,
         };
  
         switch (s->transport.type) {
-       case SESSION2_TRANSPORT_HANDLE:;
-               uv_handle_t *handle = s->transport.handle;
+       case SESSION2_TRANSPORT_IO:;
+               uv_handle_t *handle = s->transport.io.handle;
                 if (kr_fails_assert(handle)) {
                         free(ctx);
                         return kr_error(EINVAL);
@@ -484,12 +1004,12 @@ static int session2_transport_pushv(struct session2 *s,
                         uv_write(req, (uv_stream_t *)handle, (uv_buf_t *)iov, iovcnt,
                                         session2_transport_stream_pushv_finished);
                         return kr_ok();
+               } else {
+                       kr_assert(false && "Unsupported handle");
+                       free(ctx);
+                       return kr_error(EINVAL);
                 }
  
-               kr_assert(false && "Unsupported handle");
-               free(ctx);
-               return kr_error(EINVAL);
-
         case SESSION2_TRANSPORT_PARENT:;
                 struct session2 *parent = s->transport.parent;
                 if (kr_fails_assert(parent)) {
@@ -501,8 +1021,9 @@ static int session2_transport_pushv(struct session2 *s,
                         free(ctx);
                         return ret;
                 }
-               session2_wrap(parent, ctx->buf, ctx->buf_len, target,
-                               session2_transport_parent_pushv_finished, ctx);
+               session2_wrap(parent, protolayer_buffer(ctx->buf, ctx->buf_len),
+                               target, session2_transport_parent_pushv_finished,
+                               ctx);
                 return kr_ok();
  
         default:
@@ -514,28 +1035,29 @@ static int session2_transport_pushv(struct session2 *s,
  
  struct push_ctx {
         struct iovec iov;
-       session2_push_cb cb;
+       protolayer_finished_cb cb;
         void *baton;
  };
  
-static void session2_transport_single_push_finished(struct session2 *s,
-                                                    int status,
-                                                    void *target, void *baton)
+static void session2_transport_single_push_finished(int status,
+                                                    struct session2 *s,
+                                                    const void *target,
+                                                    void *baton)
  {
         struct push_ctx *ctx = baton;
         if (ctx->cb)
-               ctx->cb(s, status, target, ctx->baton);
+               ctx->cb(status, s, target, ctx->baton);
         free(ctx);
  }
  
  static inline int session2_transport_push(struct session2 *s,
                                            char *buf, size_t buf_len,
-                                          void *target,
-                                          session2_push_cb cb, void *baton)
+                                          const void *target,
+                                          protolayer_finished_cb cb, void *baton)
  {
         struct push_ctx *ctx = malloc(sizeof(*ctx));
         kr_require(ctx);
-       *ctx = (struct push_ctx) {
+       *ctx = (struct push_ctx){
                 .iov = {
                         .iov_base = buf,
                         .iov_len = buf_len
@@ -547,3 +1069,107 @@ static inline int session2_transport_push(struct session2 *s,
         return session2_transport_pushv(s, &ctx->iov, 1, target,
                         session2_transport_single_push_finished, ctx);
  }
+
+struct event_ctx {
+       struct session2 *session;
+       protolayer_finished_cb cb;
+       void *baton;
+       const void *target;
+};
+
+static void session2_transport_io_event_finished(uv_handle_t *handle)
+{
+       struct event_ctx *ctx = handle->data;
+       if (ctx->cb)
+               ctx->cb(kr_ok(), ctx->session, ctx->target, ctx->baton);
+       free(ctx);
+}
+
+static void session2_transport_parent_event_finished(int status,
+                                                     struct session2 *session,
+                                                     const void *target,
+                                                     void *baton)
+{
+       struct event_ctx *ctx = baton;
+       if (ctx->cb)
+               ctx->cb(status, ctx->session, target, ctx->baton);
+       free(ctx);
+}
+
+static int session2_handle_close(struct session2 *s, uv_handle_t *handle,
+                                 struct event_ctx *ctx)
+{
+       io_stop_read(handle);
+       handle->data = ctx;
+       uv_close(handle, session2_transport_io_event_finished);
+
+       return kr_ok();
+}
+
+static int session2_transport_event(struct session2 *s,
+                                    struct protolayer_event event,
+                                    const void *target,
+                                    protolayer_finished_cb cb, void *baton)
+{
+       if (s->closing) {
+               if (cb)
+                       cb(kr_error(ESTALE), s, target, baton);
+               return kr_ok();
+       }
+
+       bool is_close_event = (event.type == PROTOLAYER_EVENT_CLOSE ||
+                       event.type == PROTOLAYER_EVENT_FORCE_CLOSE);
+       if (is_close_event) {
+               kr_require(session2_is_empty(s));
+               session2_timer_stop(s);
+               s->closing = true;
+       }
+
+       struct event_ctx *ctx = malloc(sizeof(*ctx));
+       kr_require(ctx);
+       *ctx = (struct event_ctx){
+               .session = s,
+               .cb = cb,
+               .baton = baton,
+               .target = target
+       };
+
+       switch (s->transport.type) {
+       case SESSION2_TRANSPORT_IO:;
+               uv_handle_t *handle = s->transport.io.handle;
+               if (kr_fails_assert(handle)) {
+                       free(ctx);
+                       return kr_error(EINVAL);
+               }
+
+               if (is_close_event)
+                       return session2_handle_close(s, handle, ctx);
+
+               return kr_ok();
+
+       case SESSION2_TRANSPORT_PARENT:
+               session2_wrap(s, protolayer_event(event), target,
+                               session2_transport_parent_event_finished, ctx);
+               return kr_ok();
+
+       default:
+               kr_assert(false && "Invalid transport");
+               free(ctx);
+               return kr_error(EINVAL);
+       }
+}
+
+void session2_kill_ioreq(struct session2 *session, struct qr_task *task)
+{
+       if (!session || session->closing)
+               return;
+       if (kr_fails_assert(session->outgoing
+                               && session->transport.type == SESSION2_TRANSPORT_IO
+                               && session->transport.io.handle))
+               return;
+       session2_tasklist_del(session, task);
+       if (session->transport.io.handle->type == UV_UDP)
+               session2_unwrap(session,
+                               protolayer_event_nd(PROTOLAYER_EVENT_CLOSE),
+                               NULL, NULL, NULL);
+}
diff --git a/daemon/session2.h b/daemon/session2.h

index 8cf1c3439b7847d8ffc445104f73069fb3b617a8..efb40897af441c782f4522407d63333e2c37dd3a 100644 (file)
--- a/daemon/session2.h
+++ b/daemon/session2.h
@@ -9,16 +9,42 @@
  #include <uv.h>
  
  #include "contrib/mempattern.h"
+#include "lib/generic/queue.h"
+#include "lib/generic/trie.h"
+#include "lib/utils.h"
  
  /* Forward declarations */
  struct session2;
  struct protolayer_cb_ctx;
  
+/** Information about the transport - addresses and proxy. */
+struct comm_info {
+       /** The original address the data came from. May be that of a proxied
+        * client, if they came through a proxy. May be `NULL` if
+        * the communication did not come from network. */
+       const struct sockaddr *src_addr;
+
+       /** The actual address the resolver is communicating with. May be
+        * the address of a proxy if the communication came through one,
+        * otherwise it will be the same as `src_addr`. May be `NULL` if
+        * the communication did not come from network. */
+       const struct sockaddr *comm_addr;
+
+       /** The original destination address. May be the resolver's address, or
+        * the address of a proxy if the communication came through one. May be
+        * `NULL` if the communication did not come from network. */
+       const struct sockaddr *dst_addr;
+
+       /** Data parsed from a PROXY header. May be `NULL` if the communication
+        * did not come through a proxy, or if the PROXYv2 protocol was not used. */
+       const struct proxy_result *proxy;
+};
+
  /** Protocol types - individual implementations of protocol layers. */
  enum protolayer_protocol {
         PROTOLAYER_NULL = 0,
-       PROTOLAYER_TCP,
         PROTOLAYER_UDP,
+       PROTOLAYER_TCP,
         PROTOLAYER_TLS,
         PROTOLAYER_HTTP,
  
@@ -32,16 +58,24 @@ enum protolayer_protocol {
         PROTOLAYER_PROTOCOL_COUNT
  };
  
+/** Protocol layer groups. Each of these represents a sequence of layers in
+ * the unwrap direction. This macro is used to generate `enum protolayer_grp`
+ * and `protolayer_grp_descs[]`.
+ *
+ * Parameters are:
+ *   1. Constant name (for e.g. PROTOLAYER_GRP_* constants)
+ *   2. Variable name (for e.g. protolayer_grp_* arrays)
+ *   3. Human-readable name for logging */
  #define PROTOLAYER_GRP_MAP(XX) \
         XX(DOUDP, doudp, "DNS UDP") \
         XX(DOTCP, dotcp, "DNS TCP") \
-       XX(DOT, dot, "DNS-over-TLS") \
-       XX(DOH, doh, "DNS-over-HTTPS")
+       XX(DOTLS, dot, "DNS-over-TLS") \
+       XX(DOHTTPS, doh, "DNS-over-HTTPS")
  
  /** Pre-defined sequences of protocol layers. */
  enum protolayer_grp {
         PROTOLAYER_GRP_NULL = 0,
-#define XX(id, name, desc) PROTOLAYER_GRP_##id,
+#define XX(cid, vid, name) PROTOLAYER_GRP_##cid,
         PROTOLAYER_GRP_MAP(XX)
  #undef XX
         PROTOLAYER_GRP_COUNT
@@ -49,23 +83,29 @@ enum protolayer_grp {
  
  /** Maps protocol layer group IDs to human-readable descriptions.
   * E.g. PROTOLAYER_GRP_DOH has description 'DNS-over-HTTPS'. */
-extern char *protolayer_grp_descs[];
+extern char *protolayer_grp_names[];
  
  /** Flow control indicators for protocol layer `wrap` and `unwrap` callbacks.
- * Use with `protolayer_continue`, `protolayer_wait` and `protolayer_break`
- * functions. */
-enum protolayer_cb_result {
-       PROTOLAYER_CB_NULL = 0,
-
-       PROTOLAYER_CB_CONTINUE,
-       PROTOLAYER_CB_WAIT,
-       PROTOLAYER_CB_BREAK,
-       PROTOLAYER_CB_PUSH,
+ * Use via `protolayer_continue`, `protolayer_wait`, `protolayer_break`, and
+ * `protolayer_push` functions. */
+enum protolayer_cb_action {
+       PROTOLAYER_CB_ACTION_NULL = 0,
+
+       PROTOLAYER_CB_ACTION_CONTINUE,
+       PROTOLAYER_CB_ACTION_WAIT,
+       PROTOLAYER_CB_ACTION_BREAK,
  };
  
+/** Direction of layer sequence processing. */
  enum protolayer_direction {
-       PROTOLAYER_WRAP,
+       /** Processes buffers in order of layers as defined in the layer group.
+        * In this direction, protocol data should be removed from the buffer,
+        * parsing additional data provided by the protocol. */
         PROTOLAYER_UNWRAP,
+
+       /** Processes buffers in reverse order of layers as defined in the layer
+        * group. In this direction, protocol data should be added. */
+       PROTOLAYER_WRAP,
  };
  
  enum protolayer_ret {
@@ -94,104 +134,215 @@ enum protolayer_ret {
   * function.
   * `baton` is the `baton` parameter passed to the
   * `session2_(un)wrap` function. */
-typedef void (*protolayer_finished_cb)(int status, void *target, void *baton);
+typedef void (*protolayer_finished_cb)(int status, struct session2 *session,
+                                       const void *target, void *baton);
+
+#define PROTOLAYER_EVENT_MAP(XX) \
+       XX(CLOSE) /**< Signal to gracefully close the session -
+                  * i.e. layers add their standard disconnection
+                  * ceremony (e.g. `gnutls_bye()`). */\
+       XX(FORCE_CLOSE) /**< Signal to forcefully close the
+                        * session - i.e. layers SHOULD NOT add
+                        * any disconnection ceremony, if
+                        * avoidable. */\
+       XX(TIMEOUT) /**< Signal that the session has timed out. */
+
+/** Event type, to be interpreted by a layer. */
+enum protolayer_event_type {
+       PROTOLAYER_EVENT_NULL = 0,
+#define XX(cid) PROTOLAYER_EVENT_##cid,
+       PROTOLAYER_EVENT_MAP(XX)
+#undef XX
+       PROTOLAYER_EVENT_COUNT
+};
  
-enum protolayer_cb_data_type {
-       PROTOLAYER_CB_DATA_NULL = 0,
-       PROTOLAYER_CB_DATA_BUFFER,
-       PROTOLAYER_CB_DATA_IOVEC,
+extern char *protolayer_event_names[];
+
+/** Event, with optional auxiliary data. */
+struct protolayer_event {
+       enum protolayer_event_type type;
+       union {
+               void *ptr;
+               char raw[sizeof(void *)];
+       } data; /**< Optional data supplied with the event.
+                * May be used by a layer. */
+};
+
+#define PROTOLAYER_PAYLOAD_MAP(XX) \
+       XX(BUFFER, "Buffer") \
+       XX(IOVEC, "IOVec") \
+       XX(EVENT, "Event") \
+       XX(WIRE_BUF, "Wire buffer")
+
+/** Defines whether the data for a `struct protolayer_cb_ctx` is represented
+ * by a single buffer, an array of `struct iovec`, or an `enum protolayer_event`. */
+enum protolayer_payload_type {
+       PROTOLAYER_PAYLOAD_NULL = 0,
+#define XX(cid, name) PROTOLAYER_PAYLOAD_##cid,
+       PROTOLAYER_PAYLOAD_MAP(XX)
+#undef XX
+       PROTOLAYER_PAYLOAD_COUNT
+};
+
+extern char *protolayer_payload_names[];
+
+/** Data processed by the sequence of layers. All pointed-to memory is always
+ * owned by its creator. It is also the layer (group) implementor's
+ * responsibility to keep data compatible in between layers. No payload memory
+ * is ever (de-)allocated by the protolayer manager! */
+struct protolayer_payload {
+       enum protolayer_payload_type type;
+       union {
+               /** Only valid if `type` is `_BUFFER`. */
+               struct {
+                       char *buf;
+                       size_t len;
+               } buffer;
+
+               /** Only valid if `type` is `_IOVEC`. */
+               struct {
+                       struct iovec *iov;
+                       int cnt;
+               } iovec;
+
+               /** Only valid if `type` is `_EVENT`. */
+               struct protolayer_event event;
+
+               /** Only valid if `type` is `_WIRE_BUF`. */
+               struct wire_buf *wire_buf;
+       };
  };
  
  /** Context for protocol layer callbacks, containing buffer data and internal
   * information for protocol layer manager. */
  struct protolayer_cb_ctx {
         /* read-write */
+       /** The payload */
+       struct protolayer_payload payload;
+       /** Transport information (e.g. UDP sender address). May be `NULL`. */
+       const void *target;
+       /** Communication information. Typically written into by one of the
+        * first layers facilitating transport protocol processing.
+        * Zero-initialized in the beginning. */
+       struct comm_info comm;
  
-       /** Data processed by the sequence of layers. All the data is always
-        * owned by its creator. It is also the layer (group) implementor's
-        * responsibility to keep data compatible in between layers. No data is
-        * ever (de-)allocated by the protolayer manager! */
-       struct {
-               enum protolayer_cb_data_type type;
-               union {
-                       /** Only valid if `type` is `_BUFFER`. */
-                       struct {
-                               char *buf;
-                               size_t len;
-                       } buffer;
-
-                       /** Only valid if `type` is `_IOVEC`. */
-                       struct {
-                               struct iovec *iov;
-                               int cnt;
-                       } iovec;
-               };
-               /** Always valid; may be `NULL`. */
-               void *target;
-       } data;
+       /* callback for when the layer iteration has ended - read-only */
+       protolayer_finished_cb finished_cb;
+       const void *finished_cb_target;
+       void *finished_cb_baton;
+       struct wire_buf *converted_wire_buf;
  
-       /* internal manager information - private */
+       /* internal information for the manager - private */
         enum protolayer_direction direction;
         bool async_mode;
         unsigned int layer_ix;
         struct protolayer_manager *manager;
         int status;
-       enum protolayer_cb_result result;
-
-       /* callback for when the layer iteration has ended - read-only */
-       protolayer_finished_cb finished_cb;
-       void *finished_cb_target;
-       void *finished_cb_baton;
+       enum protolayer_cb_action action;
  };
  
-/** Convenience function to put a buffer pointer to the specified context. */
-static inline void protolayer_set_buffer(struct protolayer_cb_ctx *ctx,
-                                         char *buf, size_t len)
+/** Convenience function to get a buffer-type payload. */
+static inline struct protolayer_payload protolayer_buffer(char *buf, size_t len)
+{
+       return (struct protolayer_payload){
+               .type = PROTOLAYER_PAYLOAD_BUFFER,
+               .buffer = {
+                       .buf = buf,
+                       .len = len
+               }
+       };
+}
+
+/** Convenience function to get an iovec-type payload. */
+static inline struct protolayer_payload protolayer_iovec(
+               struct iovec *iov, int iovcnt)
  {
-       ctx->data.type = PROTOLAYER_CB_DATA_BUFFER;
-       ctx->data.buffer.buf = buf;
-       ctx->data.buffer.len = len;
+       return (struct protolayer_payload){
+               .type = PROTOLAYER_PAYLOAD_IOVEC,
+               .iovec = {
+                       .iov = iov,
+                       .cnt = iovcnt
+               }
+       };
  }
  
-/** Convenience function to put an iovec pointer to the specified context. */
-static inline void protolayer_set_iovec(struct protolayer_cb_ctx *ctx,
-                                        struct iovec *iov, int iovcnt)
+/** Convenience function to get an event-type payload. */
+static inline struct protolayer_payload protolayer_event(struct protolayer_event event)
  {
-       ctx->data.type = PROTOLAYER_CB_DATA_IOVEC;
-       ctx->data.iovec.iov = iov;
-       ctx->data.iovec.cnt = iovcnt;
+       return (struct protolayer_payload){
+               .type = PROTOLAYER_PAYLOAD_EVENT,
+               .event = event
+       };
  }
  
+/** Convenience function to get an event-type payload without auxiliary data. */
+static inline struct protolayer_payload protolayer_event_nd(enum protolayer_event_type event)
+{
+       return (struct protolayer_payload){
+               .type = PROTOLAYER_PAYLOAD_EVENT,
+               .event = {
+                       .type = event
+               }
+       };
+}
  
-/** Common header for per-session layer-specific data. When implementing
- * a new layer, this is to be put at the beginning of the struct. */
-#define PROTOLAYER_DATA_HEADER struct {\
-       enum protolayer_protocol protocol;\
-       size_t size; /**< Size of the entire struct (incl. header) */\
-       bool processed; /**< Safeguard so that the layer does not get executed
-                        * multiple times. */\
+/** Convenience function to get a wire-buf-type payload. */
+static inline struct protolayer_payload protolayer_wire_buf(struct wire_buf *wire_buf)
+{
+       return (struct protolayer_payload){
+               .type = PROTOLAYER_PAYLOAD_WIRE_BUF,
+               .wire_buf = wire_buf
+       };
  }
  
+/** Convenience function to represent the specified payload as a buffer-type.
+ * Supports only `_BUFFER` and `_WIRE_BUF` on the input, otherwise returns
+ * `_NULL` type or aborts on assertion if allowed. */
+struct protolayer_payload protolayer_as_buffer(const struct protolayer_payload *payload);
+
+
  /** Per-session layer-specific data - generic struct. */
  struct protolayer_data {
-       PROTOLAYER_DATA_HEADER;
-       uint8_t data[];
+       enum protolayer_protocol protocol;
+       bool processed : 1; /**< Internal safeguard so that the layer does not
+                            * get executed multiple times on the same buffer. */
+       size_t sess_size; /**< Size of the session data (aligned). */
+       size_t iter_size; /**< Size of the iteration data (aligned). */
+       uint8_t data[]; /**< Memory for the layer-specific structs. */
  };
  
-typedef void (*protolayer_cb)(struct protolayer_data *layer,
-                              struct protolayer_cb_ctx *ctx);
-typedef int (*protolayer_data_cb)(struct protolayer_manager *manager,
-                                  struct protolayer_data *layer);
+/** Get a pointer to the session data of the layer. This data shares
+ * its lifetime with a session. */
+static inline void *protolayer_sess_data(struct protolayer_data *d)
+{
+       return d->data;
+}
  
-/** The default implementation for the `struct protolayer_globals::reset`
- * callback. Simply calls the `deinit` and `init` callbacks. */
-int protolayer_data_reset_default(struct protolayer_manager *manager,
-                                  struct protolayer_data *layer);
+/** Gets a pointer to the iteration data of the layer. This data shares its
+ * lifetime with an iteration through layers; it is also kept intact when
+ * an iteration ends with a `_WAIT` action. */
+static inline void *protolayer_iter_data(struct protolayer_data *d)
+{
+       return d->data + d->sess_size;
+}
  
+/** Return value of `protolayer_cb` callbacks. To be generated by continuation
+ * functions, never returned directly. */
+enum protolayer_cb_result {
+       PROTOLAYER_CB_RESULT_MAGIC = 0x364F392E,
+};
+
+typedef enum protolayer_cb_result (*protolayer_cb)(
+               struct protolayer_data *layer, struct protolayer_cb_ctx *ctx);
+typedef int (*protolayer_data_cb)(struct protolayer_manager *manager,
+                                  struct protolayer_data *layer);
  
  /** A collection of protocol layers and their layer-specific data. */
  struct protolayer_manager {
         enum protolayer_grp grp;
+       bool iter_data_inited : 1; /**< True: layers' iteration data is
+                                   * initialized (e.g. from a previous
+                                   * iteration). */
         struct session2 *session;
         size_t num_layers;
         char data[];
@@ -207,19 +358,26 @@ void protolayer_manager_free(struct protolayer_manager *m);
  
  /** Global data for a specific layered protocol. */
  struct protolayer_globals {
-       size_t data_size;          /**< Size of the layer-specific data struct. */
-       protolayer_data_cb init;   /**< Initializes the layer-specific data struct. */
-       protolayer_data_cb deinit; /**< De-initializes the layer-specific data struct. */
-       protolayer_data_cb reset;  /**< Resets the layer-specific data struct
-                                   * after finishing a sequence. Default
-                                   * implementation is available as
-                                   * `protolayer_data_reset_default`. */
-       protolayer_cb unwrap;      /**< Strips the buffer of protocol-specific
-                                   * data. E.g. a HTTP layer removes HTTP
-                                   * status and headers. */
-       protolayer_cb wrap;        /**< Wraps the buffer into protocol-specific
-                                   * data. E.g. a HTTP layer adds HTTP status
-                                   * and headers. */
+       size_t sess_size; /**< Size of the layer-specific session data struct. */
+       size_t iter_size; /**< Size of the layer-specific iteration data struct. */
+       protolayer_data_cb sess_init;   /**< Called upon session creation to
+                                        * initialize layer-specific session
+                                        * data. */
+       protolayer_data_cb sess_deinit; /**< Called upon session destruction to
+                                        * deinitialize layer-specific session
+                                        * data. */
+       protolayer_data_cb iter_init;   /**< Called at the beginning of a layer
+                                        * sequence to initialize layer-specific
+                                        * iteration data. */
+       protolayer_data_cb iter_deinit; /**< Called at the end of a layer
+                                        * sequence to deinitialize
+                                        * layer-specific iteration data. */
+       protolayer_cb unwrap; /**< Strips the buffer of protocol-specific
+                              * data. E.g. a HTTP layer removes HTTP
+                              * status and headers. */
+       protolayer_cb wrap;   /**< Wraps the buffer into protocol-specific
+                              * data. E.g. a HTTP layer adds HTTP status
+                              * and headers. */
  };
  
  /** Global data about layered protocols. Indexed by `enum protolayer_protocol`. */
@@ -227,81 +385,162 @@ extern struct protolayer_globals protolayer_globals[PROTOLAYER_PROTOCOL_COUNT];
  
  /** *Continuation function* - signals the protolayer manager to continue
   * processing the next layer. */
-void protolayer_continue(struct protolayer_cb_ctx *ctx);
+enum protolayer_cb_result protolayer_continue(struct protolayer_cb_ctx *ctx);
  
  /** *Continuation function* - signals that the layer needs more data to produce
   * a new buffer for the next layer. */
-void protolayer_wait(struct protolayer_cb_ctx *ctx);
+enum protolayer_cb_result protolayer_wait(struct protolayer_cb_ctx *ctx);
  
  /** *Continuation function* - signals that the layer wants to stop processing
   * of the buffer and clean up, possibly due to an error (indicated by
   * `status`).
   *
   * `status` must be 0 or a negative integer. */
-void protolayer_break(struct protolayer_cb_ctx *ctx, int status);
+enum protolayer_cb_result protolayer_break(struct protolayer_cb_ctx *ctx, int status);
  
  /** *Continuation function* - pushes data to the session's transport and
   * signals that the layer wants to stop processing of the buffer and clean up.
   *
- * `target` is the target data for the transport - in most cases, it will be
- * unused and may be `NULL`; except for UDP, where it must point to a `struct
- * sockaddr_*` to indicate the target address.
- *
   * This function is meant to be called by the `wrap` callback of first layer in
   * the sequence.  */
-void protolayer_pushv(struct protolayer_cb_ctx *ctx,
-                      struct iovec *iov, int iovcnt, void *target);
+enum protolayer_cb_result protolayer_push(struct protolayer_cb_ctx *ctx);
  
-/** *Continuation function* - pushes data to the session's transport and
- * signals that the layer wants to stop processing of the buffer and clean up.
+static inline enum protolayer_cb_result protolayer_async()
+{
+       return PROTOLAYER_CB_RESULT_MAGIC;
+}
+
+
+/** Wire buffer.
   *
- * `target` is the target data for the transport - in most cases, it will be
- * unused and may be `NULL`; except for UDP, where it must point to a `struct
- * sockaddr_*` to indicate the target address.
+ * May be initialized via `wire_buf_init` or to zero (ZII), then reserved via
+ * `wire_buf_reserve`. */
+struct wire_buf {
+       char *buf; /**< Buffer memory. */
+       size_t size; /**< Current size of the buffer memory. */
+       size_t start; /**< Index at which the valid data of the buffer starts (inclusive). */
+       size_t end; /**< Index at which the valid data of the buffer ends (exclusive). */
+       bool error; /**< Whether there has been an error. */
+};
+
+/** Allocates the wire buffer with the specified `initial_size`. */
+int wire_buf_init(struct wire_buf *wb, size_t initial_size);
+
+/** De-allocates the wire buffer. */
+void wire_buf_deinit(struct wire_buf *wb);
+
+/** Ensures that the wire buffer's size is at least `size`. `*wb` must be
+ * initialized, either to zero or via `wire_buf_init`. */
+int wire_buf_reserve(struct wire_buf *wb, size_t size);
+
+/** Adds `length` to the end index of the valid data, marking `length` more
+ * bytes as valid.
   *
- * This function is meant to be called by the `wrap` callback of first layer in
- * the sequence.  */
-void protolayer_push(struct protolayer_cb_ctx *ctx, char *buf, size_t buf_len,
-                     void *target);
+ * Returns 0 on success.
+ * Returns `kr_error(EINVAL)` if the end index would exceed the
+ * buffer size. */
+int wire_buf_consume(struct wire_buf *wb, size_t length);
+
+/** Adds `length` to the start index of the valid data, marking `length` less
+ * bytes as valid.
+ *
+ * Returns 0 on success.
+ * Returns `kr_error(EINVAL)` if the start index would exceed
+ * the end index. */
+int wire_buf_trim(struct wire_buf *wb, size_t length);
+
+/** Moves the valid bytes of the buffer to the buffer's beginning. */
+int wire_buf_movestart(struct wire_buf *wb);
+
+/** Resets the valid bytes of the buffer to zero, as well as the error flag. */
+int wire_buf_reset(struct wire_buf *wb);
+
+static void *wire_buf_data(const struct wire_buf *wb)
+{
+       return &wb->buf[wb->start];
+}
+
+static size_t wire_buf_data_length(const struct wire_buf *wb)
+{
+       return wb->end - wb->start;
+}
+
+static void *wire_buf_free_space(const struct wire_buf *wb)
+{
+       return &wb->buf[wb->end];
+}
+
+static size_t wire_buf_free_space_length(const struct wire_buf *wb)
+{
+       return wb->size - wb->end;
+}
  
  
  /** Indicates how a session sends data in the `wrap` direction and receives
   * data in the `unwrap` direction. */
  enum session2_transport_type {
         SESSION2_TRANSPORT_NULL = 0,
-       SESSION2_TRANSPORT_HANDLE,
+       SESSION2_TRANSPORT_IO,
         SESSION2_TRANSPORT_PARENT,
  };
  
  struct session2 {
+       /** Data for sending data out in the `wrap` direction and receiving new
+        * data in the `unwrap` direction. */
         struct {
-               enum session2_transport_type type;
+               enum session2_transport_type type; /**< See `enum session2_transport_type` */
                 union {
-                       void *ctx;
-                       uv_handle_t *handle;
+                       /** For `_IO` type transport. Contains a libuv handle
+                        * and session-related addresses. */
+                       struct {
+                               uv_handle_t *handle;
+                               union kr_sockaddr peer;
+                               union kr_sockaddr sockname;
+                       } io;
+
+                       /** For `_PARENT` type transport. */
                         struct session2 *parent;
                 };
         } transport;
  
-       struct protolayer_manager *layers;
+       struct protolayer_manager *layers; /**< Protocol layers of this session. */
+       knot_mm_t pool;
+
+       uv_timer_t timer;
+       enum protolayer_direction timer_direction; /**< Timeout event direction. */
+
+       trie_t *tasks; /**< list of tasks associated with given session. */
+       queue_t(struct qr_task *) waiting; /**< list of tasks waiting for sending to upstream. */
+
+       struct wire_buf wire_buf;
+
+       uint64_t last_activity; /**< Time of last IO activity (if any occurs).
+                                * Otherwise session creation time. */
+
+       bool closing : 1;
+       bool throttled : 1;
         bool outgoing : 1;
+       bool secure : 1; /**< Whether encryption takes place in this session.
+                         * Layers may use this to determine whether padding
+                         * should be applied. */
  };
  
  /** Allocates and initializes a new session with the specified protocol layer
   * group, and the provided transport context. */
  struct session2 *session2_new(enum session2_transport_type transport_type,
-                              void *transport_ctx,
                                enum protolayer_grp layer_grp,
                                bool outgoing);
  
  /** Allocates and initializes a new session with the specified protocol layer
   * group, using a *libuv handle* as its transport. */
-static inline struct session2 *session2_new_handle(uv_handle_t *handle,
-                                                   enum protolayer_grp layer_grp,
-                                                   bool outgoing)
+static inline struct session2 *session2_new_io(uv_handle_t *handle,
+                                               enum protolayer_grp layer_grp,
+                                               bool outgoing)
  {
-       return session2_new(SESSION2_TRANSPORT_HANDLE, handle, layer_grp,
-                       outgoing);
+       struct session2 *s = session2_new(SESSION2_TRANSPORT_IO, layer_grp, outgoing);
+       s->transport.io.handle = handle;
+       handle->data = s;
+       return s;
  }
  
  /** Allocates and initializes a new session with the specified protocol layer
@@ -310,16 +549,96 @@ static inline struct session2 *session2_new_child(struct session2 *parent,
                                                    enum protolayer_grp layer_grp,
                                                    bool outgoing)
  {
-       return session2_new(SESSION2_TRANSPORT_PARENT, parent, layer_grp,
-                       outgoing);
+       struct session2 *s = session2_new(SESSION2_TRANSPORT_PARENT, layer_grp, outgoing);
+       s->transport.parent = parent;
+       return s;
  }
  
  /** De-allocates the session. */
  void session2_free(struct session2 *s);
  
-/** Sends the specified buffer to be processed in the `unwrap` direction by the
- * session's protocol layers. The `target` parameter may contain a pointer to
- * transport-specific data, e.g. for UDP, it shall contain a pointer to the
+/** Start reading from the underlying transport. */
+int session2_start_read(struct session2 *session);
+
+/** Stop reading from the underlying transport. */
+int session2_stop_read(struct session2 *session);
+
+/** Gets the peer address from the specified session, iterating through the
+ * session hierarchy (child-to-parent) until an `_IO` session is found if
+ * needed.
+ *
+ * May return `NULL` if no peer is set.  */
+struct sockaddr *session2_get_peer(struct session2 *s);
+
+/** Gets the sockname from the specified session, iterating through the
+ * session hierarchy (child-to-parent) until an `_IO` session is found if
+ * needed.
+ *
+ * May return `NULL` if no peer is set.  */
+struct sockaddr *session2_get_sockname(struct session2 *s);
+
+/** Gets the libuv handle from the specified session, iterating through the
+ * session hierarchy (child-to-parent) until an `_IO` session is found if
+ * needed.
+ *
+ * May return `NULL` if no peer is set.  */
+uv_handle_t *session2_get_handle(struct session2 *s);
+
+/** Start the session timer. When the timer ends, a `_TIMEOUT` event is sent
+ * in the specified `direction`. */
+int session2_timer_start(struct session2 *s, uint64_t timeout, uint64_t repeat,
+                          enum protolayer_direction direction);
+
+/** Restart the session timer without changing any of its parameters. */
+int session2_timer_restart(struct session2 *s);
+
+/** Stop the session timer. */
+int session2_timer_stop(struct session2 *s);
+
+int session2_tasklist_add(struct session2 *session, struct qr_task *task);
+int session2_tasklist_del(struct session2 *session, struct qr_task *task);
+struct qr_task *session2_tasklist_get_first(struct session2 *session);
+struct qr_task *session2_tasklist_del_first(struct session2 *session, bool deref);
+struct qr_task *session2_tasklist_find_msgid(const struct session2 *session, uint16_t msg_id);
+struct qr_task *session2_tasklist_del_msgid(const struct session2 *session, uint16_t msg_id);
+void session2_tasklist_finalize(struct session2 *session, int status);
+int session2_tasklist_finalize_expired(struct session2 *session);
+
+static inline size_t session2_tasklist_get_len(const struct session2 *session)
+{
+       return trie_weight(session->tasks);
+}
+
+static inline bool session2_tasklist_is_empty(const struct session2 *session)
+{
+       return session2_tasklist_get_len(session) == 0;
+}
+
+int session2_waitinglist_push(struct session2 *session, struct qr_task *task);
+struct qr_task *session2_waitinglist_get(const struct session2 *session);
+struct qr_task *session2_waitinglist_pop(struct session2 *session, bool deref);
+void session2_waitinglist_retry(struct session2 *session, bool increase_timeout_cnt);
+void session2_waitinglist_finalize(struct session2 *session, int status);
+
+static inline size_t session2_waitinglist_get_len(const struct session2 *session)
+{
+       return queue_len(session->waiting);
+}
+
+static inline bool session2_waitinglist_is_empty(const struct session2 *session)
+{
+       return session2_waitinglist_get_len(session) == 0;
+}
+
+static inline bool session2_is_empty(const struct session2 *session)
+{
+       return session2_tasklist_is_empty(session) &&
+              session2_waitinglist_is_empty(session);
+}
+
+/** Sends the specified `payload` to be processed in the `unwrap` direction by
+ * the session's protocol layers. The `target` parameter may contain a pointer
+ * to transport-specific data, e.g. for UDP, it shall contain a pointer to the
   * sender's `struct sockaddr_*`.
   *
   * Once all layers are processed, `cb` is called with `baton` passed as one
@@ -328,10 +647,10 @@ void session2_free(struct session2 *s);
   *
   * Returns one of `enum protolayer_ret` or a negative number
   * indicating an error. */
-int session2_unwrap(struct session2 *s, char *buf, size_t buf_len, void *target,
-                    protolayer_finished_cb cb, void *baton);
+int session2_unwrap(struct session2 *s, struct protolayer_payload payload,
+                    const void *target, protolayer_finished_cb cb, void *baton);
  
-/** Sends the specified buffer to be processed in the `wrap` direction by the
+/** Sends the specified `payload` to be processed in the `wrap` direction by the
   * session's protocol layers. The `target` parameter may contain a pointer to
   * some data specific to the producer-consumer layer of this session.
   *
@@ -341,5 +660,15 @@ int session2_unwrap(struct session2 *s, char *buf, size_t buf_len, void *target,
   *
   * Returns one of `enum protolayer_ret` or a negative number
   * indicating an error. */
-int session2_wrap(struct session2 *s, char *buf, size_t buf_len, void *target,
-                  protolayer_finished_cb cb, void *baton);
+int session2_wrap(struct session2 *s, struct protolayer_payload payload,
+                  const void *target, protolayer_finished_cb cb, void *baton);
+
+/** Removes the specified request task from the session's tasklist. The session
+ * must be outgoing. If the session is UDP, a signal to close is also sent to it. */
+void session2_kill_ioreq(struct session2 *session, struct qr_task *task);
+
+/** Update `last_activity` to the current timestamp. */
+static inline void session2_touch(struct session2 *session)
+{
+       session->last_activity = kr_now();
+}
diff --git a/daemon/worker.c b/daemon/worker.c

index 432ebe5aa9dbe561d07ac589089bec1219e7546e..f946fc761fa7672861cb7ab41485755d0553f685 100644 (file)
--- a/daemon/worker.c
+++ b/daemon/worker.c
@@ -28,7 +28,7 @@
  #include "daemon/engine.h"
  #include "daemon/io.h"
  #include "daemon/proxyv2.h"
-#include "daemon/session.h"
+#include "daemon/session2.h"
  #include "daemon/tls.h"
  #include "daemon/http.h"
  #include "daemon/udp_queue.h"
@@ -51,7 +51,7 @@ struct request_ctx
         struct qr_task *task;
         struct {
                 /** NULL if the request didn't come over network. */
-               struct session *session;
+               struct session2 *session;
                 /** Requestor's address; separate because of UDP session "sharing". */
                 union kr_sockaddr addr;
                 /** Request communication address; if not from a proxy, same as addr. */
@@ -70,7 +70,7 @@ struct qr_task
         struct request_ctx *ctx;
         knot_pkt_t *pktbuf;
         qr_tasklist_t waiting;
-       struct session *pending[MAX_PENDING];
+       struct session2 *pending[MAX_PENDING];
         uint16_t pending_count;
         uint16_t timeouts;
         uint16_t iter_count;
@@ -100,11 +100,11 @@ static void qr_task_free(struct qr_task *task);
  static int qr_task_step(struct qr_task *task,
                         const struct sockaddr *packet_source,
                         knot_pkt_t *packet);
-static int qr_task_send(struct qr_task *task, struct session *session,
+static int qr_task_send(struct qr_task *task, struct session2 *session,
                         const struct sockaddr *addr, knot_pkt_t *pkt);
  static int qr_task_finalize(struct qr_task *task, int state);
  static void qr_task_complete(struct qr_task *task);
-static int worker_add_tcp_waiting(const struct sockaddr* addr, struct session *session);
+static int worker_add_tcp_waiting(const struct sockaddr* addr, struct session2 *session);
  static void on_tcp_connect_timeout(uv_timer_t *timer);
  static void on_udp_timeout(uv_timer_t *timer);
  static void subreq_finalize(struct qr_task *task, const struct sockaddr *packet_source, knot_pkt_t *pkt);
@@ -115,8 +115,8 @@ struct worker_ctx *the_worker = NULL;
  
  /*! @internal Create a UDP/TCP handle for an outgoing AF_INET* connection.
   *  socktype is SOCK_* */
-static uv_handle_t *ioreq_spawn(int socktype, sa_family_t family, bool has_tls,
-                               bool has_http)
+static uv_handle_t *ioreq_spawn(int socktype, sa_family_t family,
+                                enum protolayer_grp grp)
  {
         bool precond = (socktype == SOCK_DGRAM || socktype == SOCK_STREAM)
                         && (family == AF_INET  || family == AF_INET6);
@@ -131,7 +131,7 @@ static uv_handle_t *ioreq_spawn(int socktype, sa_family_t family, bool has_tls,
         if (!handle) {
                 return NULL;
         }
-       int ret = io_create(the_worker->loop, handle, socktype, family, has_tls, has_http);
+       int ret = io_create(the_worker->loop, handle, socktype, family, grp, true);
         if (ret) {
                 if (ret == UV_EMFILE) {
                         the_worker->too_many_open = true;
@@ -168,8 +168,8 @@ static uv_handle_t *ioreq_spawn(int socktype, sa_family_t family, bool has_tls,
         }
  
         /* Set current handle as a subrequest type. */
-       struct session *session = handle->data;
-       session_flags(session)->outgoing = true;
+       struct session2 *session = handle->data;
+       session->outgoing = true;
         /* Connect or issue query datagram */
         return handle;
  }
@@ -177,7 +177,7 @@ static uv_handle_t *ioreq_spawn(int socktype, sa_family_t family, bool has_tls,
  static void ioreq_kill_pending(struct qr_task *task)
  {
         for (uint16_t i = 0; i < task->pending_count; ++i) {
-               session_kill_ioreq(task->pending[i], task);
+               session2_kill_ioreq(task->pending[i], task);
         }
         task->pending_count = 0;
  }
@@ -214,7 +214,7 @@ static uint8_t *alloc_wire_cb(struct kr_request *req, uint16_t *maxlen)
                 return NULL;
         struct request_ctx *ctx = (struct request_ctx *)req;
         /* We know it's an AF_XDP socket; otherwise this CB isn't assigned. */
-       uv_handle_t *handle = session_get_handle(ctx->source.session);
+       uv_handle_t *handle = session2_get_handle(ctx->source.session);
         if (kr_fails_assert(handle->type == UV_POLL))
                 return NULL;
         xdp_handle_data_t *xhd = handle->data;
@@ -249,7 +249,7 @@ static void free_wire(const struct request_ctx *ctx)
         if (likely(ans->wire == NULL)) /* sent most likely */
                 return;
         /* We know it's an AF_XDP socket; otherwise alloc_wire_cb isn't assigned. */
-       uv_handle_t *handle = session_get_handle(ctx->source.session);
+       uv_handle_t *handle = session2_get_handle(ctx->source.session);
         if (kr_fails_assert(handle->type == UV_POLL))
                 return;
         xdp_handle_data_t *xhd = handle->data;
@@ -269,11 +269,11 @@ static void free_wire(const struct request_ctx *ctx)
  }
  #endif
  /* Helper functions for transport selection */
-static inline bool is_tls_capable(struct sockaddr *address) {
-       tls_client_param_t *tls_entry = tls_client_param_get(
-                       the_network->tls_client_params, address);
-       return tls_entry;
-}
+//static inline bool is_tls_capable(struct sockaddr *address) {
+//     tls_client_param_t *tls_entry = tls_client_param_get(
+//                     the_network->tls_client_params, address);
+//     return tls_entry;
+//}
  
  static inline bool is_tcp_connected(struct sockaddr *address) {
         return worker_find_tcp_connected(address);
@@ -288,8 +288,8 @@ static inline bool is_tcp_waiting(struct sockaddr *address) {
   * session and addr point to the source of the request, and they are NULL
   * in case the request didn't come from network.
   */
-static struct request_ctx *request_create(struct session *session,
-                                          struct io_comm_data *comm,
+static struct request_ctx *request_create(struct session2 *session,
+                                          struct comm_info *comm,
                                            const uint8_t *eth_from,
                                            const uint8_t *eth_to,
                                            uint32_t uid)
@@ -307,7 +307,7 @@ static struct request_ctx *request_create(struct session *session,
         }
  
         /* TODO Relocate pool to struct request */
-       if (session && kr_fails_assert(session_flags(session)->outgoing == false)) {
+       if (session && kr_fails_assert(session->outgoing == false)) {
                 pool_release(pool.ctx);
                 return NULL;
         }
@@ -348,9 +348,9 @@ static struct request_ctx *request_create(struct session *session,
                 const struct sockaddr *dst_addr = comm->dst_addr;
                 const struct proxy_result *proxy = comm->proxy;
  
-               req->qsource.comm_flags.tcp = session_get_handle(session)->type == UV_TCP;
-               req->qsource.comm_flags.tls = session_flags(session)->has_tls;
-               req->qsource.comm_flags.http = session_flags(session)->has_http;
+               req->qsource.comm_flags.tcp = session2_get_handle(session)->type == UV_TCP;
+               req->qsource.comm_flags.tls = session->secure;
+//             req->qsource.comm_flags.http = session->has_http; /* TODO */
  
                 req->qsource.flags = req->qsource.comm_flags;
                 if (proxy) {
@@ -359,18 +359,20 @@ static struct request_ctx *request_create(struct session *session,
                 }
  
                 req->qsource.stream_id = -1;
-#if ENABLE_DOH2
-               if (req->qsource.comm_flags.http) {
-                       struct http_ctx *http_ctx = session_http_get_server_ctx(session);
-                       struct http_stream stream = queue_head(http_ctx->streams);
-                       req->qsource.stream_id = stream.id;
-                       if (stream.headers) {
-                               req->qsource.headers = *stream.headers;
-                               free(stream.headers);
-                               stream.headers = NULL;
-                       }
-               }
-#endif
+
+               /* TODO: http */
+//#if ENABLE_DOH2
+//             if (req->qsource.comm_flags.http) {
+//                     struct http_ctx *http_ctx = session_http_get_server_ctx(session);
+//                     struct http_stream stream = queue_head(http_ctx->streams);
+//                     req->qsource.stream_id = stream.id;
+//                     if (stream.headers) {
+//                             req->qsource.headers = *stream.headers;
+//                             free(stream.headers);
+//                             stream.headers = NULL;
+//                     }
+//             }
+//#endif
                 /* We need to store a copy of peer address. */
                 memcpy(&ctx->source.addr.ip, src_addr, kr_sockaddr_len(src_addr));
                 req->qsource.addr = &ctx->source.addr.ip;
@@ -381,12 +383,13 @@ static struct request_ctx *request_create(struct session *session,
                 req->qsource.comm_addr = &ctx->source.comm_addr.ip;
  
                 if (!dst_addr) /* We wouldn't have to copy in this case, but for consistency. */
-                       dst_addr = session_get_sockname(session);
+                       dst_addr = session2_get_sockname(session);
                 memcpy(&ctx->source.dst_addr.ip, dst_addr, kr_sockaddr_len(dst_addr));
                 req->qsource.dst_addr = &ctx->source.dst_addr.ip;
         }
  
-       req->selection_context.is_tls_capable = is_tls_capable;
+//     req->selection_context.is_tls_capable = is_tls_capable;
+       req->selection_context.is_tls_capable = false;
         req->selection_context.is_tcp_connected = is_tcp_connected;
         req->selection_context.is_tcp_waiting = is_tcp_waiting;
         array_init(req->selection_context.forwarding_targets);
@@ -518,12 +521,12 @@ static void qr_task_free(struct qr_task *task)
  }
  
  /*@ Register new qr_task within session. */
-static int qr_task_register(struct qr_task *task, struct session *session)
+static int qr_task_register(struct qr_task *task, struct session2 *session)
  {
-       if (kr_fails_assert(!session_flags(session)->outgoing && session_get_handle(session)->type == UV_TCP))
+       if (kr_fails_assert(!session->outgoing && session2_get_handle(session)->type == UV_TCP))
                 return kr_error(EINVAL);
  
-       session_tasklist_add(session, task);
+       session2_tasklist_add(session, task);
  
         struct request_ctx *ctx = task->ctx;
         if (kr_fails_assert(ctx && (ctx->source.session == NULL || ctx->source.session == session)))
@@ -534,10 +537,10 @@ static int qr_task_register(struct qr_task *task, struct session *session)
          * an in effect shrink TCP window size. To get more precise throttling,
          * we would need to copy remainder of the unread buffer and reassemble
          * when resuming reading. This is NYI.  */
-       if (session_tasklist_get_len(session) >= the_worker->tcp_pipeline_max &&
-           !session_flags(session)->throttled && !session_flags(session)->closing) {
-               session_stop_read(session);
-               session_flags(session)->throttled = true;
+       if (session2_tasklist_get_len(session) >= the_worker->tcp_pipeline_max &&
+           !session->throttled && !session->closing) {
+               session2_stop_read(session);
+               session->throttled = true;
         }
  
         return 0;
@@ -552,11 +555,11 @@ static void qr_task_complete(struct qr_task *task)
         kr_require(task->waiting.len == 0);
         kr_require(task->leading == false);
  
-       struct session *s = ctx->source.session;
+       struct session2 *s = ctx->source.session;
         if (s) {
-               kr_require(!session_flags(s)->outgoing && session_waitinglist_is_empty(s));
+               kr_require(!s->outgoing && session2_waitinglist_is_empty(s));
                 ctx->source.session = NULL;
-               session_tasklist_del(s, task);
+               session2_tasklist_del(s, task);
         }
  
         /* Release primary reference to task. */
@@ -576,9 +579,9 @@ int qr_task_on_send(struct qr_task *task, const uv_handle_t *handle, int status)
  
         if (!handle || kr_fails_assert(handle->data))
                 return status;
-       struct session* s = handle->data;
+       struct session2* s = handle->data;
  
-       if (handle->type == UV_UDP && session_flags(s)->outgoing) {
+       if (handle->type == UV_UDP && s->outgoing) {
                 // This should ensure that we are only dealing with our question to upstream
                 if (kr_fails_assert(!knot_wire_get_qr(task->pktbuf->wire)))
                         return status;
@@ -587,7 +590,7 @@ int qr_task_on_send(struct qr_task *task, const uv_handle_t *handle, int status)
                 if (kr_fails_assert(qry && task->transport))
                         return status;
                 size_t timeout = task->transport->timeout;
-               int ret = session_timer_start(s, on_udp_timeout, timeout, 0);
+               int ret = session2_timer_start(s, timeout, 0, PROTOLAYER_UNWRAP);
                 /* Start next step with timeout, fatal if can't start a timer. */
                 if (ret != 0) {
                         subreq_finalize(task, &task->transport->address.ip, task->pktbuf);
@@ -600,7 +603,7 @@ int qr_task_on_send(struct qr_task *task, const uv_handle_t *handle, int status)
                         const struct kr_request *req = &task->ctx->req;
                         if (kr_log_is_debug(WORKER, req)) {
                                 const char *peer_str = NULL;
-                               if (!session_flags(s)->outgoing) {
+                               if (!s->outgoing) {
                                         peer_str = "hidden"; // avoid logging downstream IPs
                                 } else if (task->transport) {
                                         peer_str = kr_straddr(&task->transport->address.ip);
@@ -615,15 +618,15 @@ int qr_task_on_send(struct qr_task *task, const uv_handle_t *handle, int status)
                         return status;
                 }
  
-               if (session_flags(s)->outgoing || session_flags(s)->closing)
+               if (s->outgoing || s->closing)
                         return status;
  
-               if (session_flags(s)->throttled &&
-                   session_tasklist_get_len(s) < the_worker->tcp_pipeline_max/2) {
+               if (s->throttled &&
+                   session2_tasklist_get_len(s) < the_worker->tcp_pipeline_max/2) {
                         /* Start reading again if the session is throttled and
                          * the number of outgoing requests is below watermark. */
-                       session_start_read(s);
-                       session_flags(s)->throttled = false;
+                       session2_start_read(s);
+                       s->throttled = false;
                 }
         }
  
@@ -648,32 +651,40 @@ static void on_write(uv_write_t *req, int status)
         free(req);
  }
  
-static int qr_task_send(struct qr_task *task, struct session *session,
+static void qr_task_wrap_finished(int status, struct session2 *session, const void *target, void *baton)
+{
+       struct qr_task *task = baton;
+       qr_task_on_send(task, NULL, status);
+       qr_task_unref(task);
+       wire_buf_reset(&session->wire_buf);
+}
+
+static int qr_task_send(struct qr_task *task, struct session2 *session,
                         const struct sockaddr *addr, knot_pkt_t *pkt)
  {
         if (!session)
                 return qr_task_on_send(task, NULL, kr_error(EIO));
  
         int ret = 0;
-       struct request_ctx *ctx = task->ctx;
+       //struct request_ctx *ctx = task->ctx; /* TODO */
  
-       uv_handle_t *handle = session_get_handle(session);
+       uv_handle_t *handle = session2_get_handle(session);
         if (kr_fails_assert(handle && handle->data == session))
                 return qr_task_on_send(task, NULL, kr_error(EINVAL));
         const bool is_stream = handle->type == UV_TCP;
         kr_require(is_stream || handle->type == UV_UDP);
  
         if (addr == NULL)
-               addr = session_get_peer(session);
+               addr = session2_get_peer(session);
  
         if (pkt == NULL)
                 pkt = worker_task_get_pktbuf(task);
  
-       if (session_flags(session)->outgoing && handle->type == UV_TCP) {
-               size_t try_limit = session_tasklist_get_len(session) + 1;
+       if (session->outgoing && handle->type == UV_TCP) {
+               size_t try_limit = session2_tasklist_get_len(session) + 1;
                 uint16_t msg_id = knot_wire_get_id(pkt->wire);
                 size_t try_count = 0;
-               while (session_tasklist_find_msgid(session, msg_id) &&
+               while (session2_tasklist_find_msgid(session, msg_id) &&
                        try_count <= try_limit) {
                         ++msg_id;
                         ++try_count;
@@ -687,73 +698,76 @@ static int qr_task_send(struct qr_task *task, struct session *session,
         task->send_time = kr_now();
         task->recv_time = 0; // task structure is being reused so we have to zero this out here
         /* Send using given protocol */
-       if (kr_fails_assert(!session_flags(session)->closing))
+       if (kr_fails_assert(!session->closing))
                 return qr_task_on_send(task, NULL, kr_error(EIO));
  
-       uv_handle_t *ioreq = malloc(is_stream ? sizeof(uv_write_t) : sizeof(uv_udp_send_t));
-       if (!ioreq)
-               return qr_task_on_send(task, handle, kr_error(ENOMEM));
-
         /* Pending ioreq on current task */
         qr_task_ref(task);
  
-       if (session_flags(session)->has_http) {
-#if ENABLE_DOH2
-               uv_write_t *write_req = (uv_write_t *)ioreq;
-               write_req->data = task;
-               ret = http_write(write_req, handle, pkt, ctx->req.qsource.stream_id, &on_write);
-#else
-               ret = kr_error(ENOPROTOOPT);
-#endif
-       } else if (session_flags(session)->has_tls) {
-               uv_write_t *write_req = (uv_write_t *)ioreq;
-               write_req->data = task;
-               ret = tls_write(write_req, handle, pkt, &on_write);
-       } else if (handle->type == UV_UDP) {
-               uv_udp_send_t *send_req = (uv_udp_send_t *)ioreq;
-               uv_buf_t buf = { (char *)pkt->wire, pkt->size };
-               send_req->data = task;
-               ret = uv_udp_send(send_req, (uv_udp_t *)handle, &buf, 1, addr, &on_send);
-       } else if (handle->type == UV_TCP) {
-               uv_write_t *write_req = (uv_write_t *)ioreq;
-               /* We need to write message length in native byte order,
-                * but we don't have a convenient place to store those bytes.
-                * The problem is that all memory referenced from buf[] MUST retain
-                * its contents at least until on_write() is called, and I currently
-                * can't see any convenient place outside the `pkt` structure.
-                * So we use directly the *individual* bytes in pkt->size.
-                * The call to htonl() and the condition will probably be inlinable. */
-               int lsbi, slsbi; /* (second) least significant byte index */
-               if (htonl(1) == 1) { /* big endian */
-                       lsbi  = sizeof(pkt->size) - 1;
-                       slsbi = sizeof(pkt->size) - 2;
-               } else {
-                       lsbi  = 0;
-                       slsbi = 1;
-               }
-               uv_buf_t buf[3] = {
-                       { (char *)&pkt->size + slsbi, 1 },
-                       { (char *)&pkt->size + lsbi,  1 },
-                       { (char *)pkt->wire, pkt->size },
-               };
-               write_req->data = task;
-               ret = uv_write(write_req, (uv_stream_t *)handle, buf, 3, &on_write);
-       } else {
-               kr_assert(false);
-       }
-
-       if (ret == 0) {
-               session_touch(session);
-               if (session_flags(session)->outgoing) {
-                       session_tasklist_add(session, task);
+       /* TODO */
+//     if (session_flags(session)->has_http) {
+//#if ENABLE_DOH2
+//             uv_write_t *write_req = (uv_write_t *)ioreq;
+//             write_req->data = task;
+//             ret = http_write(write_req, handle, pkt, ctx->req.qsource.stream_id, &on_write);
+//#else
+//             ret = kr_error(ENOPROTOOPT);
+//#endif
+//     } else if (session_flags(session)->has_tls) {
+//             uv_write_t *write_req = (uv_write_t *)ioreq;
+//             write_req->data = task;
+//             ret = tls_write(write_req, handle, pkt, &on_write);
+//     } else if (handle->type == UV_UDP) {
+//             uv_udp_send_t *send_req = (uv_udp_send_t *)ioreq;
+//             uv_buf_t buf = { (char *)pkt->wire, pkt->size };
+//             send_req->data = task;
+//             ret = uv_udp_send(send_req, (uv_udp_t *)handle, &buf, 1, addr, &on_send);
+//     } else if (handle->type == UV_TCP) {
+//             uv_write_t *write_req = (uv_write_t *)ioreq;
+//             /* We need to write message length in native byte order,
+//              * but we don't have a convenient place to store those bytes.
+//              * The problem is that all memory referenced from buf[] MUST retain
+//              * its contents at least until on_write() is called, and I currently
+//              * can't see any convenient place outside the `pkt` structure.
+//              * So we use directly the *individual* bytes in pkt->size.
+//              * The call to htonl() and the condition will probably be inlinable. */
+//             int lsbi, slsbi; /* (second) least significant byte index */
+//             if (htonl(1) == 1) { /* big endian */
+//                     lsbi  = sizeof(pkt->size) - 1;
+//                     slsbi = sizeof(pkt->size) - 2;
+//             } else {
+//                     lsbi  = 0;
+//                     slsbi = 1;
+//             }
+//             uv_buf_t buf[3] = {
+//                     { (char *)&pkt->size + slsbi, 1 },
+//                     { (char *)&pkt->size + lsbi,  1 },
+//                     { (char *)pkt->wire, pkt->size },
+//             };
+//             write_req->data = task;
+//             ret = uv_write(write_req, (uv_stream_t *)handle, buf, 3, &on_write);
+//     } else {
+//             kr_assert(false);
+//     }
+
+       /* Pending '_finished' callback on current task */
+       qr_task_ref(task);
+       ret = session2_wrap(session,
+                       protolayer_buffer((char *)pkt->wire, pkt->size),
+                       addr, qr_task_wrap_finished, task);
+
+       if (ret >= 0) {
+               session2_touch(session);
+               if (session->outgoing) {
+                       session2_tasklist_add(session, task);
                 }
                 if (the_worker->too_many_open &&
                     the_worker->stats.rconcurrent <
                         the_worker->rconcurrent_highwatermark - 10) {
                         the_worker->too_many_open = false;
                 }
+               ret = kr_ok();
         } else {
-               free(ioreq);
                 qr_task_unref(task);
                 if (ret == UV_EMFILE) {
                         the_worker->too_many_open = true;
@@ -761,9 +775,11 @@ static int qr_task_send(struct qr_task *task, struct session *session,
                         ret = kr_error(UV_EMFILE);
                 }
  
-               if (session_flags(session)->has_http)
-                       the_worker->stats.err_http += 1;
-               else if (session_flags(session)->has_tls)
+               /* TODO */
+//             if (session_flags(session)->has_http)
+//                     the_worker->stats.err_http += 1;
+//             else
+               if (session->secure)
                         the_worker->stats.err_tls += 1;
                 else if (handle->type == UV_UDP)
                         the_worker->stats.err_udp += 1;
@@ -772,8 +788,8 @@ static int qr_task_send(struct qr_task *task, struct session *session,
         }
  
         /* Update outgoing query statistics */
-       if (session_flags(session)->outgoing && addr) {
-               if (session_flags(session)->has_tls)
+       if (session->outgoing && addr) {
+               if (session->secure)
                         the_worker->stats.tls += 1;
                 else if (handle->type == UV_UDP)
                         the_worker->stats.udp += 1;
@@ -797,129 +813,132 @@ static struct kr_query *task_get_last_pending_query(struct qr_task *task)
         return array_tail(task->ctx->req.rplan.pending);
  }
  
-static int session_tls_hs_cb(struct session *session, int status)
-{
-       if (kr_fails_assert(session_flags(session)->outgoing))
-               return kr_error(EINVAL);
-       struct sockaddr *peer = session_get_peer(session);
-       int deletion_res = worker_del_tcp_waiting(peer);
-       int ret = kr_ok();
-
-       if (status) {
-               struct qr_task *task = session_waitinglist_get(session);
-               if (task) {
-                       // TLS handshake failed, report it to server selection
-                       struct kr_query *qry = array_tail(task->ctx->req.rplan.pending);
-                       qry->server_selection.error(qry, task->transport, KR_SELECTION_TLS_HANDSHAKE_FAILED);
-               }
-#ifndef NDEBUG
-               else {
-                       /* Task isn't in the list of tasks
-                        * waiting for connection to upstream.
-                        * So that it MUST be unsuccessful rehandshake.
-                        * Check it. */
-                       kr_require(deletion_res != 0);
-                       struct kr_sockaddr_key_storage key;
-                       ssize_t keylen = kr_sockaddr_key(&key, peer);
-                       if (keylen < 0)
-                               return keylen;
-                       trie_val_t *val;
-                       kr_require((val = trie_get_try(the_worker->tcp_connected, key.bytes, keylen)) && *val);
-               }
-#endif
-               return ret;
-       }
-
-       /* handshake was completed successfully */
-       struct tls_client_ctx *tls_client_ctx = session_tls_get_client_ctx(session);
-       tls_client_param_t *tls_params = tls_client_ctx->params;
-       gnutls_session_t tls_session = tls_client_ctx->c.tls_session;
-       if (gnutls_session_is_resumed(tls_session) != 0) {
-               kr_log_debug(TLSCLIENT, "TLS session has resumed\n");
-       } else {
-               kr_log_debug(TLSCLIENT, "TLS session has not resumed\n");
-               /* session wasn't resumed, delete old session data ... */
-               if (tls_params->session_data.data != NULL) {
-                       gnutls_free(tls_params->session_data.data);
-                       tls_params->session_data.data = NULL;
-                       tls_params->session_data.size = 0;
-               }
-               /* ... and get the new session data */
-               gnutls_datum_t tls_session_data = { NULL, 0 };
-               ret = gnutls_session_get_data2(tls_session, &tls_session_data);
-               if (ret == 0) {
-                       tls_params->session_data = tls_session_data;
-               }
-       }
-
-       struct session *s = worker_find_tcp_connected(peer);
-       ret = kr_ok();
-       if (deletion_res == kr_ok()) {
-               /* peer was in the waiting list, add to the connected list. */
-               if (s) {
-                       /* Something went wrong,
-                        * peer already is in the connected list. */
-                       ret = kr_error(EINVAL);
-               } else {
-                       ret = worker_add_tcp_connected(peer, session);
-               }
-       } else {
-               /* peer wasn't in the waiting list.
-                * It can be
-                * 1) either successful rehandshake; in this case peer
-                *    must be already in the connected list.
-                * 2) or successful handshake with session, which was timed out
-                *    by on_tcp_connect_timeout(); after successful tcp connection;
-                *    in this case peer isn't in the connected list.
-                **/
-               if (!s || s != session) {
-                       ret = kr_error(EINVAL);
-               }
-       }
-       if (ret == kr_ok()) {
-               while (!session_waitinglist_is_empty(session)) {
-                       struct qr_task *t = session_waitinglist_get(session);
-                       ret = qr_task_send(t, session, NULL, NULL);
-                       if (ret != 0) {
-                               break;
-                       }
-                       session_waitinglist_pop(session, true);
-               }
-       } else {
-               ret = kr_error(EINVAL);
-       }
-
-       if (ret != kr_ok()) {
-               /* Something went wrong.
-                * Either addition to the list of connected sessions
-                * or write to upstream failed. */
-               worker_del_tcp_connected(peer);
-               session_waitinglist_finalize(session, KR_STATE_FAIL);
-               session_tasklist_finalize(session, KR_STATE_FAIL);
-               session_close(session);
-       } else {
-               session_timer_stop(session);
-               session_timer_start(session, tcp_timeout_trigger,
-                                   MAX_TCP_INACTIVITY, MAX_TCP_INACTIVITY);
-       }
-       return kr_ok();
-}
-
-static int send_waiting(struct session *session)
+/* TODO: tls */
+//static int session_tls_hs_cb(struct session2 *session, int status)
+//{
+//     if (kr_fails_assert(session->outgoing))
+//             return kr_error(EINVAL);
+//     struct sockaddr *peer = session2_get_peer(session);
+//     int deletion_res = worker_del_tcp_waiting(peer);
+//     int ret = kr_ok();
+//
+//     if (status) {
+//             struct qr_task *task = session2_waitinglist_get(session);
+//             if (task) {
+//                     // TLS handshake failed, report it to server selection
+//                     struct kr_query *qry = array_tail(task->ctx->req.rplan.pending);
+//                     qry->server_selection.error(qry, task->transport, KR_SELECTION_TLS_HANDSHAKE_FAILED);
+//             }
+//#ifndef NDEBUG
+//             else {
+//                     /* Task isn't in the list of tasks
+//                      * waiting for connection to upstream.
+//                      * So that it MUST be unsuccessful rehandshake.
+//                      * Check it. */
+//                     kr_require(deletion_res != 0);
+//                     struct kr_sockaddr_key_storage key;
+//                     ssize_t keylen = kr_sockaddr_key(&key, peer);
+//                     if (keylen < 0)
+//                             return keylen;
+//                     trie_val_t *val;
+//                     kr_require((val = trie_get_try(the_worker->tcp_connected, key.bytes, keylen)) && *val);
+//             }
+//#endif
+//             return ret;
+//     }
+//
+//     /* handshake was completed successfully */
+//     struct tls_client_ctx *tls_client_ctx = session_tls_get_client_ctx(session);
+//     tls_client_param_t *tls_params = tls_client_ctx->params;
+//     gnutls_session_t tls_session = tls_client_ctx->c.tls_session;
+//     if (gnutls_session_is_resumed(tls_session) != 0) {
+//             kr_log_debug(TLSCLIENT, "TLS session has resumed\n");
+//     } else {
+//             kr_log_debug(TLSCLIENT, "TLS session has not resumed\n");
+//             /* session wasn't resumed, delete old session data ... */
+//             if (tls_params->session_data.data != NULL) {
+//                     gnutls_free(tls_params->session_data.data);
+//                     tls_params->session_data.data = NULL;
+//                     tls_params->session_data.size = 0;
+//             }
+//             /* ... and get the new session data */
+//             gnutls_datum_t tls_session_data = { NULL, 0 };
+//             ret = gnutls_session_get_data2(tls_session, &tls_session_data);
+//             if (ret == 0) {
+//                     tls_params->session_data = tls_session_data;
+//             }
+//     }
+//
+//     struct session2 *s = worker_find_tcp_connected(peer);
+//     ret = kr_ok();
+//     if (deletion_res == kr_ok()) {
+//             /* peer was in the waiting list, add to the connected list. */
+//             if (s) {
+//                     /* Something went wrong,
+//                      * peer already is in the connected list. */
+//                     ret = kr_error(EINVAL);
+//             } else {
+//                     ret = worker_add_tcp_connected(peer, session);
+//             }
+//     } else {
+//             /* peer wasn't in the waiting list.
+//              * It can be
+//              * 1) either successful rehandshake; in this case peer
+//              *    must be already in the connected list.
+//              * 2) or successful handshake with session, which was timed out
+//              *    by on_tcp_connect_timeout(); after successful tcp connection;
+//              *    in this case peer isn't in the connected list.
+//              **/
+//             if (!s || s != session) {
+//                     ret = kr_error(EINVAL);
+//             }
+//     }
+//     if (ret == kr_ok()) {
+//             while (!session_waitinglist_is_empty(session)) {
+//                     struct qr_task *t = session_waitinglist_get(session);
+//                     ret = qr_task_send(t, session, NULL, NULL);
+//                     if (ret != 0) {
+//                             break;
+//                     }
+//                     session_waitinglist_pop(session, true);
+//             }
+//     } else {
+//             ret = kr_error(EINVAL);
+//     }
+//
+//     if (ret != kr_ok()) {
+//             /* Something went wrong.
+//              * Either addition to the list of connected sessions
+//              * or write to upstream failed. */
+//             worker_del_tcp_connected(peer);
+//             session_waitinglist_finalize(session, KR_STATE_FAIL);
+//             session_tasklist_finalize(session, KR_STATE_FAIL);
+//             session_close(session);
+//     } else {
+//             session_timer_stop(session);
+//             session_timer_start(session, tcp_timeout_trigger,
+//                                 MAX_TCP_INACTIVITY, MAX_TCP_INACTIVITY);
+//     }
+//     return kr_ok();
+//}
+
+static int send_waiting(struct session2 *session)
  {
         int ret = 0;
-       while (!session_waitinglist_is_empty(session)) {
-               struct qr_task *t = session_waitinglist_get(session);
+       while (!session2_waitinglist_is_empty(session)) {
+               struct qr_task *t = session2_waitinglist_get(session);
                 ret = qr_task_send(t, session, NULL, NULL);
                 if (ret != 0) {
-                       struct sockaddr *peer = session_get_peer(session);
-                       session_waitinglist_finalize(session, KR_STATE_FAIL);
-                       session_tasklist_finalize(session, KR_STATE_FAIL);
+                       struct sockaddr *peer = session2_get_peer(session);
+                       session2_waitinglist_finalize(session, KR_STATE_FAIL);
+                       session2_tasklist_finalize(session, KR_STATE_FAIL);
                         worker_del_tcp_connected(peer);
-                       session_close(session);
+                       session2_unwrap(session,
+                                       protolayer_event_nd(PROTOLAYER_EVENT_CLOSE),
+                                       NULL, NULL, NULL);
                         break;
                 }
-               session_waitinglist_pop(session, true);
+               session2_waitinglist_pop(session, true);
         }
         return ret;
  }
@@ -928,16 +947,16 @@ static void on_connect(uv_connect_t *req, int status)
  {
         kr_require(the_worker);
         uv_stream_t *handle = req->handle;
-       struct session *session = handle->data;
-       struct sockaddr *peer = session_get_peer(session);
+       struct session2 *session = handle->data;
+       struct sockaddr *peer = session2_get_peer(session);
         free(req);
  
-       if (kr_fails_assert(session_flags(session)->outgoing))
+       if (kr_fails_assert(session->outgoing))
                 return;
  
-       if (session_flags(session)->closing) {
+       if (session->closing) {
                 worker_del_tcp_waiting(peer);
-               kr_assert(session_is_empty(session));
+               kr_assert(session2_is_empty(session));
                 return;
         }
  
@@ -947,7 +966,7 @@ static void on_connect(uv_connect_t *req, int status)
          * If no, most likely this is timed out connection
          * which was removed from waiting list by
          * on_tcp_connect_timeout() callback. */
-       struct session *s = worker_find_tcp_waiting(peer);
+       struct session2 *s = worker_find_tcp_waiting(peer);
         if (!s || s != session) {
                 /* session isn't on the waiting list.
                  * it's timed out session. */
@@ -957,9 +976,11 @@ static void on_connect(uv_connect_t *req, int status)
                                         "is already timed out, close\n",
                                         peer_str ? peer_str : "");
                 }
-               kr_assert(session_tasklist_is_empty(session));
-               session_waitinglist_retry(session, false);
-               session_close(session);
+               kr_assert(session2_tasklist_is_empty(session));
+               session2_waitinglist_retry(session, false);
+               session2_unwrap(session,
+                               protolayer_event_nd(PROTOLAYER_EVENT_CLOSE),
+                               NULL, NULL, NULL);
                 return;
         }
  
@@ -974,9 +995,11 @@ static void on_connect(uv_connect_t *req, int status)
                                         "is already connected, close\n",
                                         peer_str ? peer_str : "");
                 }
-               kr_assert(session_tasklist_is_empty(session));
-               session_waitinglist_retry(session, false);
-               session_close(session);
+               kr_assert(session2_tasklist_is_empty(session));
+               session2_waitinglist_retry(session, false);
+               session2_unwrap(session,
+                               protolayer_event_nd(PROTOLAYER_EVENT_CLOSE),
+                               NULL, NULL, NULL);
                 return;
         }
  
@@ -987,7 +1010,7 @@ static void on_connect(uv_connect_t *req, int status)
                                         peer_str ? peer_str : "", uv_strerror(status));
                 }
                 worker_del_tcp_waiting(peer);
-               struct qr_task *task = session_waitinglist_get(session);
+               struct qr_task *task = session2_waitinglist_get(session);
                 if (task && status != UV_ETIMEDOUT) {
                         /* Penalize upstream.
                         * In case of UV_ETIMEDOUT upstream has been
@@ -995,21 +1018,25 @@ static void on_connect(uv_connect_t *req, int status)
                         struct kr_query *qry = array_tail(task->ctx->req.rplan.pending);
                         qry->server_selection.error(qry, task->transport, KR_SELECTION_TCP_CONNECT_FAILED);
                 }
-               kr_assert(session_tasklist_is_empty(session));
-               session_waitinglist_retry(session, false);
-               session_close(session);
+               kr_assert(session2_tasklist_is_empty(session));
+               session2_waitinglist_retry(session, false);
+               session2_unwrap(session,
+                               protolayer_event_nd(PROTOLAYER_EVENT_CLOSE),
+                               NULL, NULL, NULL);
                 return;
         }
  
-       if (!session_flags(session)->has_tls) {
+       if (!session->secure) {
                 /* if there is a TLS, session still waiting for handshake,
                  * otherwise remove it from waiting list */
                 if (worker_del_tcp_waiting(peer) != 0) {
                         /* session isn't in list of waiting queries, *
                          * something gone wrong */
-                       session_waitinglist_finalize(session, KR_STATE_FAIL);
-                       kr_assert(session_tasklist_is_empty(session));
-                       session_close(session);
+                       session2_waitinglist_finalize(session, KR_STATE_FAIL);
+                       kr_assert(session2_tasklist_is_empty(session));
+                       session2_unwrap(session,
+                                       protolayer_event_nd(PROTOLAYER_EVENT_CLOSE),
+                                       NULL, NULL, NULL);
                         return;
                 }
         }
@@ -1019,46 +1046,48 @@ static void on_connect(uv_connect_t *req, int status)
                 kr_log_debug(WORKER, "=> connected to '%s'\n", peer_str ? peer_str : "");
         }
  
-       session_flags(session)->connected = true;
-       session_start_read(session);
+       /* TODO */
+//     session->connected = true;
+       session2_start_read(session);
  
         int ret = kr_ok();
-       if (session_flags(session)->has_tls) {
-               struct tls_client_ctx *tls_ctx = session_tls_get_client_ctx(session);
-               ret = tls_client_connect_start(tls_ctx, session, session_tls_hs_cb);
-               if (ret == kr_error(EAGAIN)) {
-                       session_timer_stop(session);
-                       session_timer_start(session, tcp_timeout_trigger,
-                                           MAX_TCP_INACTIVITY, MAX_TCP_INACTIVITY);
-                       return;
-               }
-       } else {
+//     if (session->secure) {
+//             struct tls_client_ctx *tls_ctx = session_tls_get_client_ctx(session);
+//             ret = tls_client_connect_start(tls_ctx, session, session_tls_hs_cb);
+//             if (ret == kr_error(EAGAIN)) {
+//                     session_timer_stop(session);
+//                     session_timer_start(session, tcp_timeout_trigger,
+//                                         MAX_TCP_INACTIVITY, MAX_TCP_INACTIVITY);
+//                     return;
+//             }
+//     } else {
                 worker_add_tcp_connected(peer, session);
-       }
+//     }
  
         ret = send_waiting(session);
         if (ret != 0) {
                 return;
         }
  
-       session_timer_stop(session);
-       session_timer_start(session, tcp_timeout_trigger,
-                           MAX_TCP_INACTIVITY, MAX_TCP_INACTIVITY);
+       session2_timer_stop(session);
+       session2_timer_start(session,
+                           MAX_TCP_INACTIVITY, MAX_TCP_INACTIVITY,
+                           PROTOLAYER_UNWRAP);
  }
  
  static void on_tcp_connect_timeout(uv_timer_t *timer)
  {
-       struct session *session = timer->data;
+       struct session2 *session = timer->data;
  
         uv_timer_stop(timer);
         kr_require(the_worker);
  
-       kr_assert(session_tasklist_is_empty(session));
+       kr_assert(session2_tasklist_is_empty(session));
  
-       struct sockaddr *peer = session_get_peer(session);
+       struct sockaddr *peer = session2_get_peer(session);
         worker_del_tcp_waiting(peer);
  
-       struct qr_task *task = session_waitinglist_get(session);
+       struct qr_task *task = session2_waitinglist_get(session);
         if (!task) {
                 /* Normally shouldn't happen. */
                 const char *peer_str = kr_straddr(peer);
@@ -1076,9 +1105,9 @@ static void on_tcp_connect_timeout(uv_timer_t *timer)
  
         qry->server_selection.error(qry, task->transport, KR_SELECTION_TCP_CONNECT_TIMEOUT);
  
-       the_worker->stats.timeout += session_waitinglist_get_len(session);
-       session_waitinglist_retry(session, true);
-       kr_assert(session_tasklist_is_empty(session));
+       the_worker->stats.timeout += session2_waitinglist_get_len(session);
+       session2_waitinglist_retry(session, true);
+       kr_assert(session2_tasklist_is_empty(session));
         /* uv_cancel() doesn't support uv_connect_t request,
          * so that we can't cancel it.
          * There still exists possibility of successful connection
@@ -1092,14 +1121,14 @@ static void on_tcp_connect_timeout(uv_timer_t *timer)
  /* This is called when I/O timeouts */
  static void on_udp_timeout(uv_timer_t *timer)
  {
-       struct session *session = timer->data;
-       kr_assert(session_get_handle(session)->data == session);
-       kr_assert(session_tasklist_get_len(session) == 1);
-       kr_assert(session_waitinglist_is_empty(session));
+       struct session2 *session = timer->data;
+       kr_assert(session2_get_handle(session)->data == session);
+       kr_assert(session2_tasklist_get_len(session) == 1);
+       kr_assert(session2_waitinglist_is_empty(session));
  
         uv_timer_stop(timer);
  
-       struct qr_task *task = session_tasklist_get_first(session);
+       struct qr_task *task = session2_tasklist_get_first(session);
         if (!task)
                 return;
  
@@ -1133,24 +1162,26 @@ static uv_handle_t *transmit(struct qr_task *task)
                 if (kr_resolve_checkout(&ctx->req, NULL, transport, task->pktbuf) != 0) {
                         return ret;
                 }
-               ret = ioreq_spawn(SOCK_DGRAM, choice->sin6_family, false, false);
+               ret = ioreq_spawn(SOCK_DGRAM, choice->sin6_family, PROTOLAYER_GRP_DOUDP);
                 if (!ret) {
                         return ret;
                 }
                 struct sockaddr *addr = (struct sockaddr *)choice;
-               struct session *session = ret->data;
-               struct sockaddr *peer = session_get_peer(session);
-               kr_assert(peer->sa_family == AF_UNSPEC && session_flags(session)->outgoing);
+               struct session2 *session = ret->data;
+               struct sockaddr *peer = session2_get_peer(session);
+               kr_assert(peer->sa_family == AF_UNSPEC && session->outgoing);
                 kr_require(addr->sa_family == AF_INET || addr->sa_family == AF_INET6);
                 memcpy(peer, addr, kr_sockaddr_len(addr));
                 if (qr_task_send(task, session, (struct sockaddr *)choice,
                                  task->pktbuf) != 0) {
-                       session_close(session);
+                       session2_unwrap(session,
+                                       protolayer_event_nd(PROTOLAYER_EVENT_CLOSE),
+                                       NULL, NULL, NULL);
                         ret = NULL;
                 } else {
                         task->pending[task->pending_count] = session;
                         task->pending_count += 1;
-                       session_start_read(session); /* Start reading answer */
+                       session2_start_read(session); /* Start reading answer */
                 }
         }
         return ret;
@@ -1299,7 +1330,7 @@ static int qr_task_finalize(struct qr_task *task, int state)
                 return kr_ok();
         }
         struct request_ctx *ctx = task->ctx;
-       struct session *source_session = ctx->source.session;
+       struct session2 *source_session = ctx->source.session;
         kr_resolve_finish(&ctx->req, state);
  
         task->finished = true;
@@ -1317,7 +1348,7 @@ static int qr_task_finalize(struct qr_task *task, int state)
                 return kr_ok();
         }
  
-       if (session_flags(source_session)->closing ||
+       if (source_session->closing ||
             ctx->source.addr.ip.sa_family == AF_UNSPEC)
                 return kr_error(EINVAL);
  
@@ -1326,7 +1357,8 @@ static int qr_task_finalize(struct qr_task *task, int state)
  
         /* Send back answer */
         int ret;
-       const uv_handle_t *src_handle = session_get_handle(source_session);
+       const uv_handle_t *src_handle = session2_get_handle(source_session);
+       /* TODO: this should probably just be a _wrap? */
         if (kr_fails_assert(src_handle->type == UV_UDP || src_handle->type == UV_TCP
                        || src_handle->type == UV_POLL)) {
                 ret = kr_error(EINVAL);
@@ -1346,8 +1378,8 @@ static int qr_task_finalize(struct qr_task *task, int state)
         if (ret != kr_ok()) {
                 (void) qr_task_on_send(task, NULL, kr_error(EIO));
                 /* Since source session is erroneous detach all tasks. */
-               while (!session_tasklist_is_empty(source_session)) {
-                       struct qr_task *t = session_tasklist_del_first(source_session, false);
+               while (!session2_tasklist_is_empty(source_session)) {
+                       struct qr_task *t = session2_tasklist_del_first(source_session, false);
                         struct request_ctx *c = t->ctx;
                         kr_assert(c->source.session == source_session);
                         c->source.session = NULL;
@@ -1356,7 +1388,9 @@ static int qr_task_finalize(struct qr_task *task, int state)
                          * (ie. task->leading is true) */
                         worker_task_unref(t);
                 }
-               session_close(source_session);
+               session2_unwrap(source_session,
+                               protolayer_event_nd(PROTOLAYER_EVENT_CLOSE),
+                               NULL, NULL, NULL);
         }
  
         qr_task_unref(task);
@@ -1388,22 +1422,22 @@ static int udp_task_step(struct qr_task *task,
         return kr_ok();
  }
  
-static int tcp_task_waiting_connection(struct session *session, struct qr_task *task)
+static int tcp_task_waiting_connection(struct session2 *session, struct qr_task *task)
  {
-       if (kr_fails_assert(session_flags(session)->outgoing && !session_flags(session)->closing))
+       if (kr_fails_assert(session->outgoing && !session->closing))
                 return kr_error(EINVAL);
         /* Add task to the end of list of waiting tasks.
          * It will be notified in on_connect() or qr_task_on_send(). */
-       int ret = session_waitinglist_push(session, task);
+       int ret = session2_waitinglist_push(session, task);
         if (ret < 0) {
                 return kr_error(EINVAL);
         }
         return kr_ok();
  }
  
-static int tcp_task_existing_connection(struct session *session, struct qr_task *task)
+static int tcp_task_existing_connection(struct session2 *session, struct qr_task *task)
  {
-       if (kr_fails_assert(session_flags(session)->outgoing && !session_flags(session)->closing))
+       if (kr_fails_assert(session->outgoing && !session->closing))
                 return kr_error(EINVAL);
  
         /* If there are any unsent queries, send it first. */
@@ -1413,7 +1447,7 @@ static int tcp_task_existing_connection(struct session *session, struct qr_task
         }
  
         /* No unsent queries at that point. */
-       if (session_tasklist_get_len(session) >= the_worker->tcp_pipeline_max) {
+       if (session2_tasklist_get_len(session) >= the_worker->tcp_pipeline_max) {
                 /* Too many outstanding queries, answer with SERVFAIL, */
                 return kr_error(EINVAL);
         }
@@ -1423,9 +1457,11 @@ static int tcp_task_existing_connection(struct session *session, struct qr_task
         if (ret != 0) {
                 /* Error, finalize task with SERVFAIL and
                  * close connection to upstream. */
-               session_tasklist_finalize(session, KR_STATE_FAIL);
-               worker_del_tcp_connected(session_get_peer(session));
-               session_close(session);
+               session2_tasklist_finalize(session, KR_STATE_FAIL);
+               worker_del_tcp_connected(session2_get_peer(session));
+               session2_unwrap(session,
+                               protolayer_event_nd(PROTOLAYER_EVENT_CLOSE),
+                               NULL, NULL, NULL);
                 return kr_error(EINVAL);
         }
  
@@ -1435,63 +1471,70 @@ static int tcp_task_existing_connection(struct session *session, struct qr_task
  static int tcp_task_make_connection(struct qr_task *task, const struct sockaddr *addr)
  {
         /* Check if there must be TLS */
-       struct tls_client_ctx *tls_ctx = NULL;
-       tls_client_param_t *entry = tls_client_param_get(
-                       the_network->tls_client_params, addr);
-       if (entry) {
-               /* Address is configured to be used with TLS.
-                * We need to allocate auxiliary data structure. */
-               tls_ctx = tls_client_ctx_new(entry);
-               if (!tls_ctx) {
-                       return kr_error(EINVAL);
-               }
-       }
+//     struct tls_client_ctx *tls_ctx = NULL;
+//     tls_client_param_t *entry = tls_client_param_get(
+//                     the_network->tls_client_params, addr);
+//     if (entry) {
+//             /* Address is configured to be used with TLS.
+//              * We need to allocate auxiliary data structure. */
+//             tls_ctx = tls_client_ctx_new(entry);
+//             if (!tls_ctx) {
+//                     return kr_error(EINVAL);
+//             }
+//     }
  
         uv_connect_t *conn = malloc(sizeof(uv_connect_t));
         if (!conn) {
-               tls_client_ctx_free(tls_ctx);
+//             tls_client_ctx_free(tls_ctx);
                 return kr_error(EINVAL);
         }
-       bool has_http = false;
-       bool has_tls = (tls_ctx != NULL);
-       uv_handle_t *client = ioreq_spawn(SOCK_STREAM, addr->sa_family, has_tls, has_http);
-       if (!client) {
-               tls_client_ctx_free(tls_ctx);
-               free(conn);
-               return kr_error(EINVAL);
-       }
-       struct session *session = client->data;
-       if (kr_fails_assert(session_flags(session)->has_tls == has_tls)) {
-               tls_client_ctx_free(tls_ctx);
-               free(conn);
-               return kr_error(EINVAL);
-       }
-       if (has_tls) {
-               tls_client_ctx_set_session(tls_ctx, session);
-               session_tls_set_client_ctx(session, tls_ctx);
-       }
+//     bool has_tls = (tls_ctx != NULL);
+//     uv_handle_t *client = ioreq_spawn(SOCK_STREAM, addr->sa_family,
+//                     (has_tls) ? PROTOLAYER_GRP_DOTLS : PROTOLAYER_GRP_DOTCP);
+       uv_handle_t *client = ioreq_spawn(SOCK_STREAM, addr->sa_family,
+                       PROTOLAYER_GRP_DOTCP);
+//     if (!client) {
+//             tls_client_ctx_free(tls_ctx);
+//             free(conn);
+//             return kr_error(EINVAL);
+//     }
+       struct session2 *session = client->data;
+       /* TODO: tls */
+//     if (kr_fails_assert(session->secure == has_tls)) {
+//             tls_client_ctx_free(tls_ctx);
+//             free(conn);
+//             return kr_error(EINVAL);
+//     }
+//     if (has_tls) {
+//             tls_client_ctx_set_session(tls_ctx, session);
+//             session_tls_set_client_ctx(session, tls_ctx);
+//     }
  
         /* Add address to the waiting list.
          * Now it "is waiting to be connected to." */
         int ret = worker_add_tcp_waiting(addr, session);
         if (ret < 0) {
                 free(conn);
-               session_close(session);
+               session2_unwrap(session,
+                               protolayer_event_nd(PROTOLAYER_EVENT_CLOSE),
+                               NULL, NULL, NULL);
                 return kr_error(EINVAL);
         }
  
         conn->data = session;
         /*  Store peer address for the session. */
-       struct sockaddr *peer = session_get_peer(session);
+       struct sockaddr *peer = session2_get_peer(session);
         memcpy(peer, addr, kr_sockaddr_len(addr));
  
         /*  Start watchdog to catch eventual connection timeout. */
-       ret = session_timer_start(session, on_tcp_connect_timeout,
-                                 KR_CONN_RTT_MAX, 0);
+       ret = session2_timer_start(session,
+                                 KR_CONN_RTT_MAX, 0, PROTOLAYER_UNWRAP);
         if (ret != 0) {
                 worker_del_tcp_waiting(addr);
                 free(conn);
-               session_close(session);
+               session2_unwrap(session,
+                               protolayer_event_nd(PROTOLAYER_EVENT_CLOSE),
+                               NULL, NULL, NULL);
                 return kr_error(EINVAL);
         }
  
@@ -1504,22 +1547,26 @@ static int tcp_task_make_connection(struct qr_task *task, const struct sockaddr
         /*  Start connection process to upstream. */
         ret = uv_tcp_connect(conn, (uv_tcp_t *)client, addr , on_connect);
         if (ret != 0) {
-               session_timer_stop(session);
+               session2_timer_stop(session);
                 worker_del_tcp_waiting(addr);
                 free(conn);
-               session_close(session);
+               session2_unwrap(session,
+                               protolayer_event_nd(PROTOLAYER_EVENT_CLOSE),
+                               NULL, NULL, NULL);
                 qry->server_selection.error(qry, task->transport, KR_SELECTION_TCP_CONNECT_FAILED);
                 return kr_error(EAGAIN);
         }
  
         /* Add task to the end of list of waiting tasks.
          * Will be notified either in on_connect() or in qr_task_on_send(). */
-       ret = session_waitinglist_push(session, task);
+       ret = session2_waitinglist_push(session, task);
         if (ret < 0) {
-               session_timer_stop(session);
+               session2_timer_stop(session);
                 worker_del_tcp_waiting(addr);
                 free(conn);
-               session_close(session);
+               session2_unwrap(session,
+                               protolayer_event_nd(PROTOLAYER_EVENT_CLOSE),
+                               NULL, NULL, NULL);
                 return kr_error(EINVAL);
         }
  
@@ -1550,7 +1597,7 @@ static int tcp_task_step(struct qr_task *task,
                 return qr_task_finalize(task, KR_STATE_FAIL);
         }
         int ret;
-       struct session* session = NULL;
+       struct session2* session = NULL;
         if ((session = worker_find_tcp_waiting(addr)) != NULL) {
                 /* Connection is in the list of waiting connections.
                  * It means that connection establishing is coming right now. */
@@ -1665,18 +1712,39 @@ static int qr_task_step(struct qr_task *task,
         }
  }
  
-int worker_submit(struct session *session, struct io_comm_data *comm,
+static int parse_packet(knot_pkt_t *query)
+{
+       if (!query){
+               return kr_error(EINVAL);
+       }
+
+       /* Parse query packet. */
+       int ret = knot_pkt_parse(query, 0);
+       if (ret == KNOT_ETRAIL) {
+               /* Extra data after message end. */
+               ret = kr_error(EMSGSIZE);
+       } else if (ret != KNOT_EOK) {
+               /* Malformed query. */
+               ret = kr_error(EPROTO);
+       } else {
+               ret = kr_ok();
+       }
+
+       return ret;
+}
+
+int worker_submit(struct session2 *session, struct comm_info *comm,
                    const uint8_t *eth_from, const uint8_t *eth_to, knot_pkt_t *pkt)
  {
         if (!session || !pkt)
                 return kr_error(EINVAL);
  
-       uv_handle_t *handle = session_get_handle(session);
+       uv_handle_t *handle = session2_get_handle(session);
         if (!handle || !handle->loop->data)
                 return kr_error(EINVAL);
  
         const bool is_query = (knot_wire_get_qr(pkt->wire) == 0);
-       const bool is_outgoing = session_flags(session)->outgoing;
+       const bool is_outgoing = session->outgoing;
  
         int ret = knot_pkt_parse(pkt, 0);
         if (ret == KNOT_ETRAIL && is_outgoing && !kr_fails_assert(pkt->parsed < pkt->size))
@@ -1684,13 +1752,14 @@ int worker_submit(struct session *session, struct io_comm_data *comm,
  
         struct http_ctx *http_ctx = NULL;
  #if ENABLE_DOH2
-       http_ctx = session_http_get_server_ctx(session);
-
-       /* Badly formed query when using DoH leads to a Bad Request */
-       if (http_ctx && !is_outgoing && ret) {
-               http_send_status(session, HTTP_STATUS_BAD_REQUEST);
-               return kr_error(ret);
-       }
+       /* TODO: devise a way to do this... don't know yet */
+//     http_ctx = session_http_get_server_ctx(session);
+//
+//     /* Badly formed query when using DoH leads to a Bad Request */
+//     if (http_ctx && !is_outgoing && ret) {
+//             http_send_status(session, HTTP_STATUS_BAD_REQUEST);
+//             return ret;
+//     }
  #endif
  
         if (!is_outgoing && http_ctx && queue_len(http_ctx->streams) <= 0)
@@ -1737,31 +1806,31 @@ int worker_submit(struct session *session, struct io_comm_data *comm,
                 }
         } else { /* response from upstream */
                 const uint16_t id = knot_wire_get_id(pkt->wire);
-               task = session_tasklist_del_msgid(session, id);
+               task = session2_tasklist_del_msgid(session, id);
                 if (task == NULL) {
                         VERBOSE_MSG(NULL, "=> ignoring packet with mismatching ID %d\n",
                                         (int)id);
                         return kr_error(ENOENT);
                 }
-               if (kr_fails_assert(!session_flags(session)->closing))
+               if (kr_fails_assert(!session->closing))
                         return kr_error(EINVAL);
                 addr = (comm) ? comm->src_addr : NULL;
                 /* Note receive time for RTT calculation */
                 task->recv_time = kr_now();
         }
-       if (kr_fails_assert(!uv_is_closing(session_get_handle(session))))
+       if (kr_fails_assert(!uv_is_closing(session2_get_handle(session))))
                 return kr_error(EINVAL);
  
         /* Packet was successfully parsed.
          * Task was created (found). */
-       session_touch(session);
+       session2_touch(session);
  
         /* Consume input and produce next message */
         return qr_task_step(task, addr, pkt);
  }
  
  static int trie_add_tcp_session(trie_t *trie, const struct sockaddr *addr,
-                                struct session *session)
+                                struct session2 *session)
  {
         if (kr_fails_assert(trie && addr))
                 return kr_error(EINVAL);
@@ -1788,7 +1857,7 @@ static int trie_del_tcp_session(trie_t *trie, const struct sockaddr *addr)
         return ret ? kr_error(ENOENT) : kr_ok();
  }
  
-static struct session *trie_find_tcp_session(trie_t *trie,
+static struct session2 *trie_find_tcp_session(trie_t *trie,
                                               const struct sockaddr *addr)
  {
         if (kr_fails_assert(trie && addr))
@@ -1801,7 +1870,7 @@ static struct session *trie_find_tcp_session(trie_t *trie,
         return val ? *val : NULL;
  }
  
-int worker_add_tcp_connected(const struct sockaddr* addr, struct session *session)
+int worker_add_tcp_connected(const struct sockaddr* addr, struct session2 *session)
  {
         return trie_add_tcp_session(the_worker->tcp_connected, addr, session);
  }
@@ -1811,13 +1880,13 @@ int worker_del_tcp_connected(const struct sockaddr* addr)
         return trie_del_tcp_session(the_worker->tcp_connected, addr);
  }
  
-struct session* worker_find_tcp_connected(const struct sockaddr* addr)
+struct session2* worker_find_tcp_connected(const struct sockaddr* addr)
  {
         return trie_find_tcp_session(the_worker->tcp_connected, addr);
  }
  
  static int worker_add_tcp_waiting(const struct sockaddr* addr,
-                                 struct session *session)
+                                 struct session2 *session)
  {
         return trie_add_tcp_session(the_worker->tcp_waiting, addr, session);
  }
@@ -1827,41 +1896,28 @@ int worker_del_tcp_waiting(const struct sockaddr* addr)
         return trie_del_tcp_session(the_worker->tcp_waiting, addr);
  }
  
-struct session* worker_find_tcp_waiting(const struct sockaddr* addr)
+struct session2* worker_find_tcp_waiting(const struct sockaddr* addr)
  {
         return trie_find_tcp_session(the_worker->tcp_waiting, addr);
  }
  
-int worker_end_tcp(struct session *session)
+int worker_end_tcp(struct session2 *session)
  {
         if (!session)
                 return kr_error(EINVAL);
  
-       session_timer_stop(session);
+       session2_timer_stop(session);
  
-       struct sockaddr *peer = session_get_peer(session);
+       struct sockaddr *peer = session2_get_peer(session);
  
         worker_del_tcp_waiting(peer);
         worker_del_tcp_connected(peer);
-       session_flags(session)->connected = false;
-
-       struct tls_client_ctx *tls_client_ctx = session_tls_get_client_ctx(session);
-       if (tls_client_ctx) {
-               /* Avoid gnutls_bye() call */
-               tls_set_hs_state(&tls_client_ctx->c, TLS_HS_NOT_STARTED);
-       }
  
-       struct tls_ctx *tls_ctx = session_tls_get_server_ctx(session);
-       if (tls_ctx) {
-               /* Avoid gnutls_bye() call */
-               tls_set_hs_state(&tls_ctx->c, TLS_HS_NOT_STARTED);
-       }
-
-       while (!session_waitinglist_is_empty(session)) {
-               struct qr_task *task = session_waitinglist_pop(session, false);
+       while (!session2_waitinglist_is_empty(session)) {
+               struct qr_task *task = session2_waitinglist_pop(session, false);
                 kr_assert(task->refs > 1);
-               session_tasklist_del(session, task);
-               if (session_flags(session)->outgoing) {
+               session2_tasklist_del(session, task);
+               if (session->outgoing) {
                         if (task->ctx->req.options.FORWARD) {
                                 /* We are in TCP_FORWARD mode.
                                  * To prevent failing at kr_resolve_consume()
@@ -1879,9 +1935,9 @@ int worker_end_tcp(struct session *session)
                 }
                 worker_task_unref(task);
         }
-       while (!session_tasklist_is_empty(session)) {
-               struct qr_task *task = session_tasklist_del_first(session, false);
-               if (session_flags(session)->outgoing) {
+       while (!session2_tasklist_is_empty(session)) {
+               struct qr_task *task = session2_tasklist_del_first(session, false);
+               if (session->outgoing) {
                         if (task->ctx->req.options.FORWARD) {
                                 struct kr_request *req = &task->ctx->req;
                                 struct kr_rplan *rplan = &req->rplan;
@@ -1895,8 +1951,25 @@ int worker_end_tcp(struct session *session)
                 }
                 worker_task_unref(task);
         }
-       session_close(session);
+
         return kr_ok();
+
+//     session_flags(session)->connected = false;
+//
+//     struct tls_client_ctx *tls_client_ctx = session_tls_get_client_ctx(session);
+//     if (tls_client_ctx) {
+//             /* Avoid gnutls_bye() call */
+//             tls_set_hs_state(&tls_client_ctx->c, TLS_HS_NOT_STARTED);
+//     }
+//
+//     struct tls_ctx *tls_ctx = session_tls_get_server_ctx(session);
+//     if (tls_ctx) {
+//             /* Avoid gnutls_bye() call */
+//             tls_set_hs_state(&tls_ctx->c, TLS_HS_NOT_STARTED);
+//     }
+//
+//     session_close(session);
+//     return kr_ok();
  }
  
  knot_pkt_t *worker_resolve_mk_pkt_dname(knot_dname_t *qname, uint16_t qtype, uint16_t qclass,
@@ -2044,7 +2117,7 @@ struct request_ctx *worker_task_get_request(struct qr_task *task)
         return task->ctx;
  }
  
-struct session *worker_request_get_source_session(const struct kr_request *req)
+struct session2 *worker_request_get_source_session(const struct kr_request *req)
  {
         static_assert(offsetof(struct request_ctx, req) == 0,
                         "Bad struct request_ctx definition.");
@@ -2112,12 +2185,253 @@ void worker_deinit(void)
         the_worker = NULL;
  }
  
+static inline knot_pkt_t *produce_packet_dgram(char *buf, size_t buf_len)
+{
+       return knot_pkt_new(buf, buf_len, &the_worker->pkt_pool);
+}
+
+static enum protolayer_cb_result pl_dns_dgram_unwrap(
+               struct protolayer_data *layer, struct protolayer_cb_ctx *ctx)
+{
+       struct session2 *session = ctx->manager->session;
+
+       if (ctx->payload.type == PROTOLAYER_PAYLOAD_EVENT) {
+               /* pass thru */
+               return protolayer_continue(ctx);
+       }
+
+       if (ctx->payload.type == PROTOLAYER_PAYLOAD_IOVEC) {
+               int ret = kr_ok();
+               for (int i = 0; i < ctx->payload.iovec.cnt; i++) {
+                       struct iovec *iov = &ctx->payload.iovec.iov[i];
+                       knot_pkt_t *pkt = produce_packet_dgram(
+                                       iov->iov_base, iov->iov_len);
+                       if (!pkt) {
+                               ret = KNOT_EMALF;
+                               break;
+                       }
+
+                       ret = worker_submit(session, &ctx->comm, NULL, NULL, pkt);
+                       if (ret)
+                               break;
+               }
+
+               return protolayer_break(ctx, ret);
+       } else if (ctx->payload.type == PROTOLAYER_PAYLOAD_BUFFER) {
+               knot_pkt_t *pkt = produce_packet_dgram(
+                               ctx->payload.buffer.buf,
+                               ctx->payload.buffer.len);
+               if (!pkt)
+                       return protolayer_break(ctx, KNOT_EMALF);
+
+               int ret = worker_submit(session, &ctx->comm, NULL, NULL, pkt);
+               return protolayer_break(ctx, ret);
+       } else if (ctx->payload.type == PROTOLAYER_PAYLOAD_WIRE_BUF) {
+               knot_pkt_t *pkt = produce_packet_dgram(
+                               wire_buf_data(ctx->payload.wire_buf),
+                               wire_buf_data_length(ctx->payload.wire_buf));
+               if (!pkt)
+                       return protolayer_break(ctx, KNOT_EMALF);
+
+               int ret = worker_submit(session, &ctx->comm, NULL, NULL, pkt);
+               wire_buf_reset(ctx->payload.wire_buf);
+               return protolayer_break(ctx, ret);
+       } else {
+               kr_assert(false && "Invalid payload");
+               return protolayer_break(ctx, kr_error(EINVAL));
+       }
+}
+
+struct pl_dns_stream_sess_data {
+       bool single : 1; /**< True: Stream only allows a single packet */
+       bool produced : 1; /**< True: At least one packet has been produced */
+};
+
+struct pl_dns_stream_iter_data {
+       struct {
+               knot_mm_t *pool;
+               void *mem;
+       } sent;
+};
+
+static void pl_dns_stream_sess_init_common(struct pl_dns_stream_sess_data *stream,
+                                       bool single)
+{
+       *stream = (struct pl_dns_stream_sess_data){
+               .single = single
+       };
+}
+
+static int pl_dns_mstream_sess_init(struct protolayer_manager *manager,
+                                    struct protolayer_data *layer)
+{
+       struct pl_dns_stream_sess_data *stream = protolayer_sess_data(layer);
+       pl_dns_stream_sess_init_common(stream, false);
+       return kr_ok();
+}
+
+static int pl_dns_sstream_sess_init(struct protolayer_manager *manager,
+                                    struct protolayer_data *layer)
+{
+       struct pl_dns_stream_sess_data *stream = protolayer_sess_data(layer);
+       pl_dns_stream_sess_init_common(stream, true);
+       return kr_ok();
+}
+
+static int pl_dns_stream_iter_init(struct protolayer_manager *manager,
+                                     struct protolayer_data *layer)
+{
+       struct pl_dns_stream_iter_data *stream = protolayer_iter_data(layer);
+       *stream = (struct pl_dns_stream_iter_data){0};
+       return kr_ok();
+}
+
+static int pl_dns_stream_iter_deinit(struct protolayer_manager *manager,
+                                     struct protolayer_data *layer)
+{
+       struct pl_dns_stream_iter_data *stream = protolayer_iter_data(layer);
+       mm_free(stream->sent.pool, stream->sent.mem);
+       return kr_ok();
+}
+
+static enum protolayer_cb_result pl_dns_stream_unwrap(
+               struct protolayer_data *layer, struct protolayer_cb_ctx *ctx)
+{
+       if (ctx->payload.type == PROTOLAYER_PAYLOAD_EVENT) {
+               /* pass thru */
+               return protolayer_continue(ctx);
+       }
+
+       if (kr_fails_assert(ctx->payload.type == PROTOLAYER_PAYLOAD_WIRE_BUF)) {
+               /* DNS stream only works with a wire buffer */
+               return protolayer_break(ctx, kr_error(EINVAL));
+       }
+
+       struct pl_dns_stream_sess_data *stream = protolayer_sess_data(layer);
+
+       if (stream->single && stream->produced) {
+               if (kr_log_is_debug(WORKER, NULL)) {
+                       kr_log_debug(WORKER, "Unexpected extra data from %s\n",
+                                   kr_straddr(ctx->comm.src_addr));
+               }
+               return protolayer_break(ctx, KNOT_EMALF);
+       }
+
+       struct wire_buf *wb = ctx->payload.wire_buf;
+       size_t pkt_len = ntohs(*(uint16_t *)wire_buf_data(wb));
+       if (wire_buf_data_length(wb) < pkt_len + sizeof(uint16_t))
+               return protolayer_wait(ctx);
+
+       wire_buf_trim(wb, sizeof(uint16_t));
+       knot_pkt_t *pkt = produce_packet_dgram(wire_buf_data(wb), pkt_len);
+       wire_buf_trim(wb, pkt_len);
+       stream->produced = true;
+       if (!pkt)
+               return protolayer_break(ctx, KNOT_EMALF);
+
+       int ret = worker_submit(ctx->manager->session, &ctx->comm, NULL, NULL, pkt);
+       return protolayer_break(ctx, ret);
+}
+
+struct sized_iovs {
+       uint16_t nlen;
+       struct iovec iovs[];
+};
+
+static enum protolayer_cb_result pl_dns_stream_wrap(
+               struct protolayer_data *layer, struct protolayer_cb_ctx *ctx)
+{
+       if (ctx->payload.type == PROTOLAYER_PAYLOAD_EVENT) {
+               /* pass thru */
+               return protolayer_continue(ctx);
+       }
+
+       struct pl_dns_stream_iter_data *stream = protolayer_iter_data(layer);
+       struct session2 *s = ctx->manager->session;
+
+       if (kr_fails_assert(!stream->sent.mem))
+               return protolayer_break(ctx, kr_error(EINVAL));
+
+       if (ctx->payload.type == PROTOLAYER_PAYLOAD_BUFFER) {
+               if (kr_fails_assert(ctx->payload.buffer.len <= UINT16_MAX))
+                       return protolayer_break(ctx, kr_error(EMSGSIZE));
+
+               const int iovcnt = 2;
+               struct sized_iovs *siov = mm_alloc(&s->pool,
+                               sizeof(*siov) + iovcnt * sizeof(struct iovec));
+               kr_require(siov);
+               siov->nlen = htons(ctx->payload.buffer.len);
+               siov->iovs[0] = (struct iovec){
+                       .iov_base = &siov->nlen,
+                       .iov_len = sizeof(siov->nlen)
+               };
+               siov->iovs[1] = (struct iovec){
+                       .iov_base = ctx->payload.buffer.buf,
+                       .iov_len = ctx->payload.buffer.len
+               };
+
+               stream->sent.mem = siov;
+               stream->sent.pool = &s->pool;
+
+               ctx->payload = protolayer_iovec(siov->iovs, iovcnt);
+               return protolayer_continue(ctx);
+       } else if (ctx->payload.type == PROTOLAYER_PAYLOAD_IOVEC) {
+               const int iovcnt = 1 + ctx->payload.iovec.cnt;
+               struct sized_iovs *siov = mm_alloc(&s->pool,
+                               sizeof(*siov) + iovcnt * sizeof(struct iovec));
+               kr_require(siov);
+
+               size_t total_len = 0;
+               for (int i = 0; i < ctx->payload.iovec.cnt; i++) {
+                       const struct iovec *iov = &ctx->payload.iovec.iov[i];
+                       total_len += iov->iov_len;
+                       siov->iovs[i + 1] = *iov;
+               }
+
+               if (kr_fails_assert(total_len <= UINT16_MAX))
+                       return protolayer_break(ctx, kr_error(EMSGSIZE));
+               siov->nlen = htons(total_len);
+               siov->iovs[0] = (struct iovec){
+                       .iov_base = &siov->nlen,
+                       .iov_len = sizeof(siov->nlen)
+               };
+
+               stream->sent.mem = siov;
+               stream->sent.pool = &s->pool;
+
+               ctx->payload = protolayer_iovec(siov->iovs, iovcnt);
+               return protolayer_continue(ctx);
+       } else {
+               kr_assert(false && "Invalid payload");
+               return protolayer_break(ctx, kr_error(EINVAL));
+       }
+}
+
+
  int worker_init(void)
  {
         if (kr_fails_assert(the_worker == NULL))
                 return kr_error(EINVAL);
         kr_bindings_register(the_engine->L); // TODO move
  
+       /* DNS protocol layers */
+       protolayer_globals[PROTOLAYER_DNS_DGRAM] = (struct protolayer_globals){
+               .unwrap = pl_dns_dgram_unwrap,
+       };
+       const struct protolayer_globals stream_common = {
+               .sess_size = sizeof(struct pl_dns_stream_sess_data),
+               .sess_init = NULL, /* replaced in specific layers below */
+               .iter_size = sizeof(struct pl_dns_stream_iter_data),
+               .iter_init = pl_dns_stream_iter_init,
+               .iter_deinit = pl_dns_stream_iter_deinit,
+               .unwrap = pl_dns_stream_unwrap,
+               .wrap = pl_dns_stream_wrap
+       };
+       protolayer_globals[PROTOLAYER_DNS_MSTREAM] = stream_common;
+       protolayer_globals[PROTOLAYER_DNS_MSTREAM].sess_init = pl_dns_mstream_sess_init;
+       protolayer_globals[PROTOLAYER_DNS_SSTREAM] = stream_common;
+       protolayer_globals[PROTOLAYER_DNS_SSTREAM].sess_init = pl_dns_sstream_sess_init;
+
         /* Create main worker. */
         the_worker = &the_worker_value;
         memset(the_worker, 0, sizeof(*the_worker));
diff --git a/daemon/worker.h b/daemon/worker.h

index ee9677c44fe7168eb9ee0629f69397df952f0cbc..40e1df0c2cc65cba37299ac4d61c700dc6a4b2dd 100644 (file)
--- a/daemon/worker.h
+++ b/daemon/worker.h
@@ -11,14 +11,14 @@
  
  /** Query resolution task (opaque). */
  struct qr_task;
-/** Worker state (opaque). */
+/** Worker state. */
  struct worker_ctx;
  /** Transport session (opaque). */
-struct session;
+struct session2;
  /** Zone import context (opaque). */
  struct zone_import_ctx;
  /** Data about the communication (defined in io.h). */
-struct io_comm_data;
+struct comm_info;
  
  /** Pointer to the singleton worker.  NULL if not initialized. */
  KR_EXPORT extern struct worker_ctx *the_worker;
@@ -39,14 +39,14 @@ void worker_deinit();
   * @param pkt         the packet, or NULL (an error from the transport layer)
   * @return 0 or an error code
   */
-int worker_submit(struct session *session, struct io_comm_data *comm,
+int worker_submit(struct session2 *session, struct comm_info *comm,
                    const uint8_t *eth_from, const uint8_t *eth_to, knot_pkt_t *pkt);
  
  /**
   * End current DNS/TCP session, this disassociates pending tasks from this session
   * which may be freely closed afterwards.
   */
-int worker_end_tcp(struct session *session);
+int worker_end_tcp(struct session2 *session);
  
  KR_EXPORT knot_pkt_t *worker_resolve_mk_pkt_dname(knot_dname_t *qname, uint16_t qtype, uint16_t qclass,
                                    const struct kr_qflags *options);
@@ -93,17 +93,17 @@ void worker_task_unref(struct qr_task *task);
  
  void worker_task_timeout_inc(struct qr_task *task);
  
-int worker_add_tcp_connected(const struct sockaddr *addr, struct session *session);
+int worker_add_tcp_connected(const struct sockaddr *addr, struct session2 *session);
  int worker_del_tcp_connected(const struct sockaddr *addr);
  int worker_del_tcp_waiting(const struct sockaddr* addr);
-struct session* worker_find_tcp_waiting(const struct sockaddr* addr);
-struct session* worker_find_tcp_connected(const struct sockaddr* addr);
+struct session2* worker_find_tcp_waiting(const struct sockaddr* addr);
+struct session2* worker_find_tcp_connected(const struct sockaddr* addr);
  knot_pkt_t *worker_task_get_pktbuf(const struct qr_task *task);
  
  struct request_ctx *worker_task_get_request(struct qr_task *task);
  
  /** Note: source session is NULL in case the request hasn't come over network. */
-KR_EXPORT struct session *worker_request_get_source_session(const struct kr_request *req);
+KR_EXPORT struct session2 *worker_request_get_source_session(const struct kr_request *req);
  
  uint16_t worker_task_pkt_get_msgid(struct qr_task *task);
  void worker_task_pkt_set_msgid(struct qr_task *task, uint16_t msgid);
@@ -163,8 +163,6 @@ struct worker_ctx {
         struct sockaddr_in out_addr4;
         struct sockaddr_in6 out_addr6;
  
-       uint8_t wire_buf[RECVMMSG_BATCH * KNOT_WIRE_MAX_PKTSIZE];
-
         struct worker_stats stats;
  
         bool too_many_open;
diff --git a/modules/dnstap/dnstap.c b/modules/dnstap/dnstap.c

index 757266728d0459b0c67c6f247ce117f19bea91cf..ab52bca3da4a4d015ee152de001252d704d26da8 100644 (file)
--- a/modules/dnstap/dnstap.c
+++ b/modules/dnstap/dnstap.c
@@ -10,7 +10,7 @@
  #include "modules/dnstap/dnstap.pb-c.h"
  
  #include "contrib/cleanup.h"
-#include "daemon/session.h"
+#include "daemon/session2.h"
  #include "daemon/worker.h"
  #include "lib/layer.h"
  #include "lib/resolve.h"
@@ -116,7 +116,7 @@ static int get_tcp_info(const struct kr_request *req, struct tcp_info *info)
         if (!req->qsource.dst_addr || !req->qsource.flags.tcp) /* not TCP-based */
                 return -abs(ENOENT);
         /* First obtain the file-descriptor. */
-       uv_handle_t *h = session_get_handle(worker_request_get_source_session(req));
+       uv_handle_t *h = session2_get_handle(worker_request_get_source_session(req));
         uv_os_fd_t fd;
         int ret = uv_fileno(h, &fd);
         if (ret)
author	Oto Šťáva <oto.stava@nic.cz>
	Tue, 2 Aug 2022 08:53:38 +0000 (10:53 +0200)
committer	Oto Šťáva <oto.stava@nic.cz>
	Thu, 26 Jan 2023 11:56:07 +0000 (12:56 +0100)
daemon/bindings/net.c		patch \| blob \| blame \| history
daemon/io.c		patch \| blob \| blame \| history
daemon/io.h		patch \| blob \| blame \| history
daemon/main.c		patch \| blob \| blame \| history
daemon/meson.build		patch \| blob \| blame \| history
daemon/network.c		patch \| blob \| blame \| history
daemon/proxyv2.c		patch \| blob \| blame \| history
daemon/proxyv2.h		patch \| blob \| blame \| history
daemon/session.c		patch \| blob \| blame \| history
daemon/session.h		patch \| blob \| blame \| history
daemon/session2.c		patch \| blob \| blame \| history
daemon/session2.h		patch \| blob \| blame \| history
daemon/worker.c		patch \| blob \| blame \| history
daemon/worker.h		patch \| blob \| blame \| history
modules/dnstap/dnstap.c		patch \| blob \| blame \| history