From: Oto Šťáva Date: Tue, 2 Aug 2022 08:53:38 +0000 (+0200) Subject: daemon: basic implementation of TCP and UDP with protolayers X-Git-Tag: v6.0.2~42^2~66 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5501d84bb244f55ef8ef06be95456b3b4f516fa3;p=thirdparty%2Fknot-resolver.git daemon: basic implementation of TCP and UDP with protolayers --- diff --git a/daemon/bindings/net.c b/daemon/bindings/net.c index 9a6374cd8..ae3a3c699 100644 --- a/daemon/bindings/net.c +++ b/daemon/bindings/net.c @@ -532,32 +532,35 @@ static int net_pipeline(lua_State *L) static int net_tls(lua_State *L) { - if (kr_fails_assert(the_network)) { - return 0; - } - - /* Only return current credentials. */ - if (lua_gettop(L) == 0) { - /* No credentials configured yet. */ - if (!the_network->tls_credentials) { - return 0; - } - lua_newtable(L); - lua_pushstring(L, the_network->tls_credentials->tls_cert); - lua_setfield(L, -2, "cert_file"); - lua_pushstring(L, the_network->tls_credentials->tls_key); - lua_setfield(L, -2, "key_file"); - return 1; - } - - if ((lua_gettop(L) != 2) || !lua_isstring(L, 1) || !lua_isstring(L, 2)) - lua_error_p(L, "net.tls takes two parameters: (\"cert_file\", \"key_file\")"); - - int r = tls_certificate_set(lua_tostring(L, 1), lua_tostring(L, 2)); - lua_error_maybe(L, r); - - lua_pushboolean(L, true); - return 1; + /* TODO */ + kr_assert(false && "Unimplemented"); + return 0; +// if (kr_fails_assert(the_network)) { +// return 0; +// } +// +// /* Only return current credentials. */ +// if (lua_gettop(L) == 0) { +// /* No credentials configured yet. */ +// if (!the_network->tls_credentials) { +// return 0; +// } +// lua_newtable(L); +// lua_pushstring(L, the_network->tls_credentials->tls_cert); +// lua_setfield(L, -2, "cert_file"); +// lua_pushstring(L, the_network->tls_credentials->tls_key); +// lua_setfield(L, -2, "key_file"); +// return 1; +// } +// +// if ((lua_gettop(L) != 2) || !lua_isstring(L, 1) || !lua_isstring(L, 2)) +// lua_error_p(L, "net.tls takes two parameters: (\"cert_file\", \"key_file\")"); +// +// int r = tls_certificate_set(lua_tostring(L, 1), lua_tostring(L, 2)); +// lua_error_maybe(L, r); +// +// lua_pushboolean(L, true); +// return 1; } /** Configure HTTP headers for DoH requests. */ @@ -614,90 +617,93 @@ static int net_doh_headers(lua_State *L) * more precisely, it's in a compatible canonical form. */ static int tls_params2lua(lua_State *L, trie_t *params) { - lua_newtable(L); - if (!params) /* Allowed special case. */ - return 1; - trie_it_t *it; - size_t list_index = 0; - for (it = trie_it_begin(params); !trie_it_finished(it); trie_it_next(it)) { - /* Prepare table for the current address - * and its index in the returned list. */ - lua_pushinteger(L, ++list_index); - lua_createtable(L, 0, 2); - - /* Get the "addr#port" string... */ - size_t ia_len; - const char *key = trie_it_key(it, &ia_len); - int af = AF_UNSPEC; - if (ia_len == 2 + sizeof(struct in_addr)) { - af = AF_INET; - } else if (ia_len == 2 + sizeof(struct in6_addr)) { - af = AF_INET6; - } - if (kr_fails_assert(key && af != AF_UNSPEC)) - lua_error_p(L, "internal error: bad IP address"); - uint16_t port; - memcpy(&port, key, sizeof(port)); - port = ntohs(port); - const char *ia = key + sizeof(port); - char str[INET6_ADDRSTRLEN + 1 + 5 + 1]; - size_t len = sizeof(str); - if (kr_fails_assert(kr_ntop_str(af, ia, port, str, &len) == kr_ok())) - lua_error_p(L, "internal error: bad IP address conversion"); - /* ...and push it as [1]. */ - lua_pushinteger(L, 1); - lua_pushlstring(L, str, len - 1 /* len includes '\0' */); - lua_settable(L, -3); - - const tls_client_param_t *e = *trie_it_val(it); - if (kr_fails_assert(e)) - lua_error_p(L, "internal problem - NULL entry for %s", str); - - /* .hostname = */ - if (e->hostname) { - lua_pushstring(L, e->hostname); - lua_setfield(L, -2, "hostname"); - } - - /* .ca_files = */ - if (e->ca_files.len) { - lua_createtable(L, e->ca_files.len, 0); - for (size_t i = 0; i < e->ca_files.len; ++i) { - lua_pushinteger(L, i + 1); - lua_pushstring(L, e->ca_files.at[i]); - lua_settable(L, -3); - } - lua_setfield(L, -2, "ca_files"); - } - - /* .pin_sha256 = ... ; keep sane indentation via goto. */ - if (!e->pins.len) goto no_pins; - lua_createtable(L, e->pins.len, 0); - for (size_t i = 0; i < e->pins.len; ++i) { - uint8_t pin_base64[TLS_SHA256_BASE64_BUFLEN]; - int err = kr_base64_encode(e->pins.at[i], TLS_SHA256_RAW_LEN, - pin_base64, sizeof(pin_base64)); - if (kr_fails_assert(err >= 0)) - lua_error_p(L, - "internal problem when converting pin_sha256: %s", - kr_strerror(err)); - lua_pushinteger(L, i + 1); - lua_pushlstring(L, (const char *)pin_base64, err); - /* pin_base64 isn't 0-terminated ^^^ */ - lua_settable(L, -3); - } - lua_setfield(L, -2, "pin_sha256"); - - no_pins:/* .insecure = */ - if (e->insecure) { - lua_pushboolean(L, true); - lua_setfield(L, -2, "insecure"); - } - /* Now the whole table is pushed atop the returned list. */ - lua_settable(L, -3); - } - trie_it_free(it); - return 1; + /* TODO */ + kr_assert(false && "Unimplemented"); + return 0; +// lua_newtable(L); +// if (!params) /* Allowed special case. */ +// return 1; +// trie_it_t *it; +// size_t list_index = 0; +// for (it = trie_it_begin(params); !trie_it_finished(it); trie_it_next(it)) { +// /* Prepare table for the current address +// * and its index in the returned list. */ +// lua_pushinteger(L, ++list_index); +// lua_createtable(L, 0, 2); +// +// /* Get the "addr#port" string... */ +// size_t ia_len; +// const char *key = trie_it_key(it, &ia_len); +// int af = AF_UNSPEC; +// if (ia_len == 2 + sizeof(struct in_addr)) { +// af = AF_INET; +// } else if (ia_len == 2 + sizeof(struct in6_addr)) { +// af = AF_INET6; +// } +// if (kr_fails_assert(key && af != AF_UNSPEC)) +// lua_error_p(L, "internal error: bad IP address"); +// uint16_t port; +// memcpy(&port, key, sizeof(port)); +// port = ntohs(port); +// const char *ia = key + sizeof(port); +// char str[INET6_ADDRSTRLEN + 1 + 5 + 1]; +// size_t len = sizeof(str); +// if (kr_fails_assert(kr_ntop_str(af, ia, port, str, &len) == kr_ok())) +// lua_error_p(L, "internal error: bad IP address conversion"); +// /* ...and push it as [1]. */ +// lua_pushinteger(L, 1); +// lua_pushlstring(L, str, len - 1 /* len includes '\0' */); +// lua_settable(L, -3); +// +// const tls_client_param_t *e = *trie_it_val(it); +// if (kr_fails_assert(e)) +// lua_error_p(L, "internal problem - NULL entry for %s", str); +// +// /* .hostname = */ +// if (e->hostname) { +// lua_pushstring(L, e->hostname); +// lua_setfield(L, -2, "hostname"); +// } +// +// /* .ca_files = */ +// if (e->ca_files.len) { +// lua_createtable(L, e->ca_files.len, 0); +// for (size_t i = 0; i < e->ca_files.len; ++i) { +// lua_pushinteger(L, i + 1); +// lua_pushstring(L, e->ca_files.at[i]); +// lua_settable(L, -3); +// } +// lua_setfield(L, -2, "ca_files"); +// } +// +// /* .pin_sha256 = ... ; keep sane indentation via goto. */ +// if (!e->pins.len) goto no_pins; +// lua_createtable(L, e->pins.len, 0); +// for (size_t i = 0; i < e->pins.len; ++i) { +// uint8_t pin_base64[TLS_SHA256_BASE64_BUFLEN]; +// int err = kr_base64_encode(e->pins.at[i], TLS_SHA256_RAW_LEN, +// pin_base64, sizeof(pin_base64)); +// if (kr_fails_assert(err >= 0)) +// lua_error_p(L, +// "internal problem when converting pin_sha256: %s", +// kr_strerror(err)); +// lua_pushinteger(L, i + 1); +// lua_pushlstring(L, (const char *)pin_base64, err); +// /* pin_base64 isn't 0-terminated ^^^ */ +// lua_settable(L, -3); +// } +// lua_setfield(L, -2, "pin_sha256"); +// +// no_pins:/* .insecure = */ +// if (e->insecure) { +// lua_pushboolean(L, true); +// lua_setfield(L, -2, "insecure"); +// } +// /* Now the whole table is pushed atop the returned list. */ +// lua_settable(L, -3); +// } +// trie_it_free(it); +// return 1; } static inline int cmp_sha256(const void *p1, const void *p2) @@ -706,224 +712,230 @@ static inline int cmp_sha256(const void *p1, const void *p2) } static int net_tls_client(lua_State *L) { - /* TODO idea: allow starting the lua table with *multiple* IP targets, - * meaning the authentication config should be applied to each. - */ - if (lua_gettop(L) == 0) - return tls_params2lua(L, the_network->tls_client_params); - /* Various basic sanity-checking. */ - if (lua_gettop(L) != 1 || !lua_istable(L, 1)) - lua_error_maybe(L, EINVAL); - /* check that only allowed keys are present */ - { - const char *bad_key = lua_table_checkindices(L, (const char *[]) - { "1", "hostname", "ca_file", "pin_sha256", "insecure", NULL }); - if (bad_key) - lua_error_p(L, "found unexpected key '%s'", bad_key); - } - - /**** Phase 1: get the parameter into a C struct, incl. parse of CA files, - * regardless of the address-pair having an entry already. */ - - tls_client_param_t *newcfg = tls_client_param_new(); - if (!newcfg) - lua_error_p(L, "out of memory or something like that :-/"); - /* Shortcut for cleanup actions needed from now on. */ - #define ERROR(...) do { \ - free(newcfg); \ - lua_error_p(L, __VA_ARGS__); \ - } while (false) - - /* .hostname - always accepted. */ - lua_getfield(L, 1, "hostname"); - if (!lua_isnil(L, -1)) { - const char *hn_str = lua_tostring(L, -1); - /* Convert to lower-case dname and back, for checking etc. */ - knot_dname_t dname[KNOT_DNAME_MAXLEN]; - if (!hn_str || !knot_dname_from_str(dname, hn_str, sizeof(dname))) - ERROR("invalid hostname"); - knot_dname_to_lower(dname); - char *h = knot_dname_to_str_alloc(dname); - if (!h) - ERROR("%s", kr_strerror(ENOMEM)); - /* Strip the final dot produced by knot_dname_*() */ - h[strlen(h) - 1] = '\0'; - newcfg->hostname = h; - } - lua_pop(L, 1); - - /* .ca_file - it can be a list of paths, contrary to the name. */ - bool has_ca_file = false; - lua_getfield(L, 1, "ca_file"); - if (!lua_isnil(L, -1)) { - if (!newcfg->hostname) - ERROR("missing hostname but specifying ca_file"); - lua_listify(L); - array_init(newcfg->ca_files); /*< placate apparently confused scan-build */ - if (array_reserve(newcfg->ca_files, lua_objlen(L, -1)) != 0) /*< optim. */ - ERROR("%s", kr_strerror(ENOMEM)); - /* Iterate over table at the top of the stack. - * http://www.lua.org/manual/5.1/manual.html#lua_next */ - for (lua_pushnil(L); lua_next(L, -2); lua_pop(L, 1)) { - has_ca_file = true; /* deferred here so that {} -> false */ - const char *ca_file = lua_tostring(L, -1); - if (!ca_file) - ERROR("ca_file contains a non-string"); - /* Let gnutls process it immediately, so garbage gets detected. */ - int ret = gnutls_certificate_set_x509_trust_file( - newcfg->credentials, ca_file, GNUTLS_X509_FMT_PEM); - if (ret < 0) { - ERROR("failed to import certificate file '%s': %s - %s\n", - ca_file, gnutls_strerror_name(ret), - gnutls_strerror(ret)); - } else { - kr_log_debug(TLSCLIENT, "imported %d certs from file '%s'\n", - ret, ca_file); - } - - ca_file = strdup(ca_file); - if (!ca_file || array_push(newcfg->ca_files, ca_file) < 0) - ERROR("%s", kr_strerror(ENOMEM)); - } - /* Sort the strings for easier comparison later. */ - if (newcfg->ca_files.len) { - qsort(&newcfg->ca_files.at[0], newcfg->ca_files.len, - sizeof(newcfg->ca_files.at[0]), strcmp_p); - } - } - lua_pop(L, 1); - - /* .pin_sha256 */ - lua_getfield(L, 1, "pin_sha256"); - if (!lua_isnil(L, -1)) { - if (has_ca_file) - ERROR("mixing pin_sha256 with ca_file is not supported"); - lua_listify(L); - array_init(newcfg->pins); /*< placate apparently confused scan-build */ - if (array_reserve(newcfg->pins, lua_objlen(L, -1)) != 0) /*< optim. */ - ERROR("%s", kr_strerror(ENOMEM)); - /* Iterate over table at the top of the stack. */ - for (lua_pushnil(L); lua_next(L, -2); lua_pop(L, 1)) { - const char *pin = lua_tostring(L, -1); - if (!pin) - ERROR("pin_sha256 is not a string"); - uint8_t *pin_raw = malloc(TLS_SHA256_RAW_LEN); - /* Push the string early to simplify error processing. */ - if (kr_fails_assert(pin_raw && array_push(newcfg->pins, pin_raw) >= 0)) { - free(pin_raw); - ERROR("%s", kr_strerror(ENOMEM)); - } - int ret = kr_base64_decode((const uint8_t *)pin, strlen(pin), - pin_raw, TLS_SHA256_RAW_LEN + 8); - if (ret < 0) { - ERROR("not a valid pin_sha256: '%s' (length %d), %s\n", - pin, (int)strlen(pin), knot_strerror(ret)); - } else if (ret != TLS_SHA256_RAW_LEN) { - ERROR("not a valid pin_sha256: '%s', " - "raw length %d instead of " - STR(TLS_SHA256_RAW_LEN)"\n", - pin, ret); - } - } - /* Sort the raw strings for easier comparison later. */ - if (newcfg->pins.len) { - qsort(&newcfg->pins.at[0], newcfg->pins.len, - sizeof(newcfg->pins.at[0]), cmp_sha256); - } - } - lua_pop(L, 1); - - /* .insecure */ - lua_getfield(L, 1, "insecure"); - if (lua_isnil(L, -1)) { - if (!newcfg->hostname && !newcfg->pins.len) - ERROR("no way to authenticate and not set as insecure"); - } else if (lua_isboolean(L, -1) && lua_toboolean(L, -1)) { - newcfg->insecure = true; - if (has_ca_file || newcfg->pins.len) - ERROR("set as insecure but provided authentication config"); - } else { - ERROR("incorrect value in the 'insecure' field"); - } - lua_pop(L, 1); - - /* Init CAs from system trust store, if needed. */ - if (!newcfg->insecure && !newcfg->pins.len && !has_ca_file) { - int ret = gnutls_certificate_set_x509_system_trust(newcfg->credentials); - if (ret <= 0) { - ERROR("failed to use system CA certificate store: %s", - ret ? gnutls_strerror(ret) : kr_strerror(ENOENT)); - } else { - kr_log_debug(TLSCLIENT, "imported %d certs from system store\n", - ret); - } - } - #undef ERROR - - /**** Phase 2: deal with the C authentication "table". */ - /* Parse address and port. */ - lua_pushinteger(L, 1); - lua_gettable(L, 1); - const char *addr_str = lua_tostring(L, -1); - if (!addr_str) - lua_error_p(L, "address is not a string"); - char buf[INET6_ADDRSTRLEN + 1]; - uint16_t port = 853; - const struct sockaddr *addr = NULL; - if (kr_straddr_split(addr_str, buf, &port) == kr_ok()) - addr = kr_straddr_socket(buf, port, NULL); - /* Add newcfg into the C map, saving the original into oldcfg. */ - if (!addr) - lua_error_p(L, "address '%s' could not be converted", addr_str); - tls_client_param_t **oldcfgp = tls_client_param_getptr( - &the_network->tls_client_params, addr, true); - free_const(addr); - if (!oldcfgp) - lua_error_p(L, "internal error when extending tls_client_params map"); - tls_client_param_t *oldcfg = *oldcfgp; - *oldcfgp = newcfg; /* replace old config in trie with the new one */ - /* If there was no original entry, it's easy! */ - if (!oldcfg) - return 0; - - /* Check for equality (newcfg vs. oldcfg), and print a warning if not equal.*/ - const bool ok_h = (!newcfg->hostname && !oldcfg->hostname) - || (newcfg->hostname && oldcfg->hostname && strcmp(newcfg->hostname, oldcfg->hostname) == 0); - bool ok_ca = newcfg->ca_files.len == oldcfg->ca_files.len; - for (int i = 0; ok_ca && i < newcfg->ca_files.len; ++i) - ok_ca = strcmp(newcfg->ca_files.at[i], oldcfg->ca_files.at[i]) == 0; - bool ok_pins = newcfg->pins.len == oldcfg->pins.len; - for (int i = 0; ok_pins && i < newcfg->pins.len; ++i) - ok_ca = memcmp(newcfg->pins.at[i], oldcfg->pins.at[i], TLS_SHA256_RAW_LEN) == 0; - const bool ok_insecure = newcfg->insecure == oldcfg->insecure; - if (!(ok_h && ok_ca && ok_pins && ok_insecure)) { - kr_log_warning(TLSCLIENT, - "warning: re-defining TLS authentication parameters for %s\n", - addr_str); - } - tls_client_param_unref(oldcfg); + /* TODO */ + kr_assert(false && "Unimplemented"); return 0; +// /* TODO idea: allow starting the lua table with *multiple* IP targets, +// * meaning the authentication config should be applied to each. +// */ +// if (lua_gettop(L) == 0) +// return tls_params2lua(L, the_network->tls_client_params); +// /* Various basic sanity-checking. */ +// if (lua_gettop(L) != 1 || !lua_istable(L, 1)) +// lua_error_maybe(L, EINVAL); +// /* check that only allowed keys are present */ +// { +// const char *bad_key = lua_table_checkindices(L, (const char *[]) +// { "1", "hostname", "ca_file", "pin_sha256", "insecure", NULL }); +// if (bad_key) +// lua_error_p(L, "found unexpected key '%s'", bad_key); +// } +// +// /**** Phase 1: get the parameter into a C struct, incl. parse of CA files, +// * regardless of the address-pair having an entry already. */ +// +// tls_client_param_t *newcfg = tls_client_param_new(); +// if (!newcfg) +// lua_error_p(L, "out of memory or something like that :-/"); +// /* Shortcut for cleanup actions needed from now on. */ +// #define ERROR(...) do { \ +// free(newcfg); \ +// lua_error_p(L, __VA_ARGS__); \ +// } while (false) +// +// /* .hostname - always accepted. */ +// lua_getfield(L, 1, "hostname"); +// if (!lua_isnil(L, -1)) { +// const char *hn_str = lua_tostring(L, -1); +// /* Convert to lower-case dname and back, for checking etc. */ +// knot_dname_t dname[KNOT_DNAME_MAXLEN]; +// if (!hn_str || !knot_dname_from_str(dname, hn_str, sizeof(dname))) +// ERROR("invalid hostname"); +// knot_dname_to_lower(dname); +// char *h = knot_dname_to_str_alloc(dname); +// if (!h) +// ERROR("%s", kr_strerror(ENOMEM)); +// /* Strip the final dot produced by knot_dname_*() */ +// h[strlen(h) - 1] = '\0'; +// newcfg->hostname = h; +// } +// lua_pop(L, 1); +// +// /* .ca_file - it can be a list of paths, contrary to the name. */ +// bool has_ca_file = false; +// lua_getfield(L, 1, "ca_file"); +// if (!lua_isnil(L, -1)) { +// if (!newcfg->hostname) +// ERROR("missing hostname but specifying ca_file"); +// lua_listify(L); +// array_init(newcfg->ca_files); /*< placate apparently confused scan-build */ +// if (array_reserve(newcfg->ca_files, lua_objlen(L, -1)) != 0) /*< optim. */ +// ERROR("%s", kr_strerror(ENOMEM)); +// /* Iterate over table at the top of the stack. +// * http://www.lua.org/manual/5.1/manual.html#lua_next */ +// for (lua_pushnil(L); lua_next(L, -2); lua_pop(L, 1)) { +// has_ca_file = true; /* deferred here so that {} -> false */ +// const char *ca_file = lua_tostring(L, -1); +// if (!ca_file) +// ERROR("ca_file contains a non-string"); +// /* Let gnutls process it immediately, so garbage gets detected. */ +// int ret = gnutls_certificate_set_x509_trust_file( +// newcfg->credentials, ca_file, GNUTLS_X509_FMT_PEM); +// if (ret < 0) { +// ERROR("failed to import certificate file '%s': %s - %s\n", +// ca_file, gnutls_strerror_name(ret), +// gnutls_strerror(ret)); +// } else { +// kr_log_debug(TLSCLIENT, "imported %d certs from file '%s'\n", +// ret, ca_file); +// } +// +// ca_file = strdup(ca_file); +// if (!ca_file || array_push(newcfg->ca_files, ca_file) < 0) +// ERROR("%s", kr_strerror(ENOMEM)); +// } +// /* Sort the strings for easier comparison later. */ +// if (newcfg->ca_files.len) { +// qsort(&newcfg->ca_files.at[0], newcfg->ca_files.len, +// sizeof(newcfg->ca_files.at[0]), strcmp_p); +// } +// } +// lua_pop(L, 1); +// +// /* .pin_sha256 */ +// lua_getfield(L, 1, "pin_sha256"); +// if (!lua_isnil(L, -1)) { +// if (has_ca_file) +// ERROR("mixing pin_sha256 with ca_file is not supported"); +// lua_listify(L); +// array_init(newcfg->pins); /*< placate apparently confused scan-build */ +// if (array_reserve(newcfg->pins, lua_objlen(L, -1)) != 0) /*< optim. */ +// ERROR("%s", kr_strerror(ENOMEM)); +// /* Iterate over table at the top of the stack. */ +// for (lua_pushnil(L); lua_next(L, -2); lua_pop(L, 1)) { +// const char *pin = lua_tostring(L, -1); +// if (!pin) +// ERROR("pin_sha256 is not a string"); +// uint8_t *pin_raw = malloc(TLS_SHA256_RAW_LEN); +// /* Push the string early to simplify error processing. */ +// if (kr_fails_assert(pin_raw && array_push(newcfg->pins, pin_raw) >= 0)) { +// free(pin_raw); +// ERROR("%s", kr_strerror(ENOMEM)); +// } +// int ret = kr_base64_decode((const uint8_t *)pin, strlen(pin), +// pin_raw, TLS_SHA256_RAW_LEN + 8); +// if (ret < 0) { +// ERROR("not a valid pin_sha256: '%s' (length %d), %s\n", +// pin, (int)strlen(pin), knot_strerror(ret)); +// } else if (ret != TLS_SHA256_RAW_LEN) { +// ERROR("not a valid pin_sha256: '%s', " +// "raw length %d instead of " +// STR(TLS_SHA256_RAW_LEN)"\n", +// pin, ret); +// } +// } +// /* Sort the raw strings for easier comparison later. */ +// if (newcfg->pins.len) { +// qsort(&newcfg->pins.at[0], newcfg->pins.len, +// sizeof(newcfg->pins.at[0]), cmp_sha256); +// } +// } +// lua_pop(L, 1); +// +// /* .insecure */ +// lua_getfield(L, 1, "insecure"); +// if (lua_isnil(L, -1)) { +// if (!newcfg->hostname && !newcfg->pins.len) +// ERROR("no way to authenticate and not set as insecure"); +// } else if (lua_isboolean(L, -1) && lua_toboolean(L, -1)) { +// newcfg->insecure = true; +// if (has_ca_file || newcfg->pins.len) +// ERROR("set as insecure but provided authentication config"); +// } else { +// ERROR("incorrect value in the 'insecure' field"); +// } +// lua_pop(L, 1); +// +// /* Init CAs from system trust store, if needed. */ +// if (!newcfg->insecure && !newcfg->pins.len && !has_ca_file) { +// int ret = gnutls_certificate_set_x509_system_trust(newcfg->credentials); +// if (ret <= 0) { +// ERROR("failed to use system CA certificate store: %s", +// ret ? gnutls_strerror(ret) : kr_strerror(ENOENT)); +// } else { +// kr_log_debug(TLSCLIENT, "imported %d certs from system store\n", +// ret); +// } +// } +// #undef ERROR +// +// /**** Phase 2: deal with the C authentication "table". */ +// /* Parse address and port. */ +// lua_pushinteger(L, 1); +// lua_gettable(L, 1); +// const char *addr_str = lua_tostring(L, -1); +// if (!addr_str) +// lua_error_p(L, "address is not a string"); +// char buf[INET6_ADDRSTRLEN + 1]; +// uint16_t port = 853; +// const struct sockaddr *addr = NULL; +// if (kr_straddr_split(addr_str, buf, &port) == kr_ok()) +// addr = kr_straddr_socket(buf, port, NULL); +// /* Add newcfg into the C map, saving the original into oldcfg. */ +// if (!addr) +// lua_error_p(L, "address '%s' could not be converted", addr_str); +// tls_client_param_t **oldcfgp = tls_client_param_getptr( +// &the_network->tls_client_params, addr, true); +// free_const(addr); +// if (!oldcfgp) +// lua_error_p(L, "internal error when extending tls_client_params map"); +// tls_client_param_t *oldcfg = *oldcfgp; +// *oldcfgp = newcfg; /* replace old config in trie with the new one */ +// /* If there was no original entry, it's easy! */ +// if (!oldcfg) +// return 0; +// +// /* Check for equality (newcfg vs. oldcfg), and print a warning if not equal.*/ +// const bool ok_h = (!newcfg->hostname && !oldcfg->hostname) +// || (newcfg->hostname && oldcfg->hostname && strcmp(newcfg->hostname, oldcfg->hostname) == 0); +// bool ok_ca = newcfg->ca_files.len == oldcfg->ca_files.len; +// for (int i = 0; ok_ca && i < newcfg->ca_files.len; ++i) +// ok_ca = strcmp(newcfg->ca_files.at[i], oldcfg->ca_files.at[i]) == 0; +// bool ok_pins = newcfg->pins.len == oldcfg->pins.len; +// for (int i = 0; ok_pins && i < newcfg->pins.len; ++i) +// ok_ca = memcmp(newcfg->pins.at[i], oldcfg->pins.at[i], TLS_SHA256_RAW_LEN) == 0; +// const bool ok_insecure = newcfg->insecure == oldcfg->insecure; +// if (!(ok_h && ok_ca && ok_pins && ok_insecure)) { +// kr_log_warning(TLSCLIENT, +// "warning: re-defining TLS authentication parameters for %s\n", +// addr_str); +// } +// tls_client_param_unref(oldcfg); +// return 0; } int net_tls_client_clear(lua_State *L) { - /* One parameter: address -> convert it to a struct sockaddr. */ - if (lua_gettop(L) != 1 || !lua_isstring(L, 1)) - lua_error_p(L, "net.tls_client_clear() requires one parameter (\"address\")"); - const char *addr_str = lua_tostring(L, 1); - char buf[INET6_ADDRSTRLEN + 1]; - uint16_t port = 853; - const struct sockaddr *addr = NULL; - if (kr_straddr_split(addr_str, buf, &port) == kr_ok()) - addr = kr_straddr_socket(buf, port, NULL); - if (!addr) - lua_error_p(L, "invalid IP address"); - /* Do the actual removal. */ - int r = tls_client_param_remove(the_network->tls_client_params, addr); - free_const(addr); - lua_error_maybe(L, r); - lua_pushboolean(L, true); - return 1; + /* TODO */ + kr_assert(false && "Unimplemented"); + return 0; +// /* One parameter: address -> convert it to a struct sockaddr. */ +// if (lua_gettop(L) != 1 || !lua_isstring(L, 1)) +// lua_error_p(L, "net.tls_client_clear() requires one parameter (\"address\")"); +// const char *addr_str = lua_tostring(L, 1); +// char buf[INET6_ADDRSTRLEN + 1]; +// uint16_t port = 853; +// const struct sockaddr *addr = NULL; +// if (kr_straddr_split(addr_str, buf, &port) == kr_ok()) +// addr = kr_straddr_socket(buf, port, NULL); +// if (!addr) +// lua_error_p(L, "invalid IP address"); +// /* Do the actual removal. */ +// int r = tls_client_param_remove(the_network->tls_client_params, addr); +// free_const(addr); +// lua_error_maybe(L, r); +// lua_pushboolean(L, true); +// return 1; } static int net_tls_padding(lua_State *L) @@ -969,79 +981,85 @@ static int net_tls_padding(lua_State *L) static int net_tls_sticket_secret_string(lua_State *L) { - size_t secret_len; - const char *secret; - - if (lua_gettop(L) == 0) { - /* Zero-length secret, implying random key. */ - secret_len = 0; - secret = NULL; - } else { - if (lua_gettop(L) != 1 || !lua_isstring(L, 1)) { - lua_error_p(L, - "net.tls_sticket_secret takes one parameter: (\"secret string\")"); - } - secret = lua_tolstring(L, 1, &secret_len); - if (secret_len < net_tls_sticket_MIN_SECRET_LEN || !secret) { - lua_error_p(L, "net.tls_sticket_secret - the secret is shorter than " - STR(net_tls_sticket_MIN_SECRET_LEN) " bytes"); - } - } - - tls_session_ticket_ctx_destroy(the_network->tls_session_ticket_ctx); - the_network->tls_session_ticket_ctx = - tls_session_ticket_ctx_create(the_network->loop, secret, secret_len); - if (the_network->tls_session_ticket_ctx == NULL) { - lua_error_p(L, - "net.tls_sticket_secret_string - can't create session ticket context"); - } - - lua_pushboolean(L, true); - return 1; + /* TODO */ + kr_assert(false && "Unimplemented"); + return 0; +// size_t secret_len; +// const char *secret; +// +// if (lua_gettop(L) == 0) { +// /* Zero-length secret, implying random key. */ +// secret_len = 0; +// secret = NULL; +// } else { +// if (lua_gettop(L) != 1 || !lua_isstring(L, 1)) { +// lua_error_p(L, +// "net.tls_sticket_secret takes one parameter: (\"secret string\")"); +// } +// secret = lua_tolstring(L, 1, &secret_len); +// if (secret_len < net_tls_sticket_MIN_SECRET_LEN || !secret) { +// lua_error_p(L, "net.tls_sticket_secret - the secret is shorter than " +// STR(net_tls_sticket_MIN_SECRET_LEN) " bytes"); +// } +// } +// +// tls_session_ticket_ctx_destroy(the_network->tls_session_ticket_ctx); +// the_network->tls_session_ticket_ctx = +// tls_session_ticket_ctx_create(the_network->loop, secret, secret_len); +// if (the_network->tls_session_ticket_ctx == NULL) { +// lua_error_p(L, +// "net.tls_sticket_secret_string - can't create session ticket context"); +// } +// +// lua_pushboolean(L, true); +// return 1; } static int net_tls_sticket_secret_file(lua_State *L) { - if (lua_gettop(L) != 1 || !lua_isstring(L, 1)) { - lua_error_p(L, - "net.tls_sticket_secret_file takes one parameter: (\"file name\")"); - } - - const char *file_name = lua_tostring(L, 1); - if (strlen(file_name) == 0) - lua_error_p(L, "net.tls_sticket_secret_file - empty file name"); - - FILE *fp = fopen(file_name, "r"); - if (fp == NULL) { - lua_error_p(L, "net.tls_sticket_secret_file - can't open file '%s': %s", - file_name, strerror(errno)); - } - - char secret_buf[TLS_SESSION_TICKET_SECRET_MAX_LEN]; - const size_t secret_len = fread(secret_buf, 1, sizeof(secret_buf), fp); - int err = ferror(fp); - if (err) { - lua_error_p(L, - "net.tls_sticket_secret_file - error reading from file '%s': %s", - file_name, strerror(err)); - } - if (secret_len < net_tls_sticket_MIN_SECRET_LEN) { - lua_error_p(L, - "net.tls_sticket_secret_file - file '%s' is shorter than " - STR(net_tls_sticket_MIN_SECRET_LEN) " bytes", - file_name); - } - fclose(fp); - - tls_session_ticket_ctx_destroy(the_network->tls_session_ticket_ctx); - the_network->tls_session_ticket_ctx = - tls_session_ticket_ctx_create(the_network->loop, secret_buf, secret_len); - if (the_network->tls_session_ticket_ctx == NULL) { - lua_error_p(L, - "net.tls_sticket_secret_file - can't create session ticket context"); - } - lua_pushboolean(L, true); - return 1; + /* TODO */ + kr_assert(false && "Unimplemented"); + return 0; +// if (lua_gettop(L) != 1 || !lua_isstring(L, 1)) { +// lua_error_p(L, +// "net.tls_sticket_secret_file takes one parameter: (\"file name\")"); +// } +// +// const char *file_name = lua_tostring(L, 1); +// if (strlen(file_name) == 0) +// lua_error_p(L, "net.tls_sticket_secret_file - empty file name"); +// +// FILE *fp = fopen(file_name, "r"); +// if (fp == NULL) { +// lua_error_p(L, "net.tls_sticket_secret_file - can't open file '%s': %s", +// file_name, strerror(errno)); +// } +// +// char secret_buf[TLS_SESSION_TICKET_SECRET_MAX_LEN]; +// const size_t secret_len = fread(secret_buf, 1, sizeof(secret_buf), fp); +// int err = ferror(fp); +// if (err) { +// lua_error_p(L, +// "net.tls_sticket_secret_file - error reading from file '%s': %s", +// file_name, strerror(err)); +// } +// if (secret_len < net_tls_sticket_MIN_SECRET_LEN) { +// lua_error_p(L, +// "net.tls_sticket_secret_file - file '%s' is shorter than " +// STR(net_tls_sticket_MIN_SECRET_LEN) " bytes", +// file_name); +// } +// fclose(fp); +// +// tls_session_ticket_ctx_destroy(the_network->tls_session_ticket_ctx); +// the_network->tls_session_ticket_ctx = +// tls_session_ticket_ctx_create(the_network->loop, secret_buf, secret_len); +// if (the_network->tls_session_ticket_ctx == NULL) { +// lua_error_p(L, +// "net.tls_sticket_secret_file - can't create session ticket context"); +// } +// lua_pushboolean(L, true); +// return 1; } static int net_outgoing(lua_State *L, int family) diff --git a/daemon/io.c b/daemon/io.c index 66ad03dae..c9fcc0eb2 100644 --- a/daemon/io.c +++ b/daemon/io.c @@ -21,7 +21,7 @@ #include "daemon/worker.h" #include "daemon/tls.h" #include "daemon/http.h" -#include "daemon/session.h" +#include "daemon/session2.h" #include "contrib/cleanup.h" #include "lib/utils.h" @@ -40,9 +40,9 @@ static void check_bufsize(uv_handle_t* handle) * This is magic presuming we can pull in a whole recvmmsg width in one wave. * Linux will double this the bufsize wanted. */ - const int bufsize_want = 2 * sizeof(the_worker->wire_buf) ; - negotiate_bufsize(uv_recv_buffer_size, handle, bufsize_want); - negotiate_bufsize(uv_send_buffer_size, handle, bufsize_want); + const int BUF_SIZE = 2 * sizeof(RECVMMSG_BATCH * KNOT_WIRE_MAX_PKTSIZE); + negotiate_bufsize(uv_recv_buffer_size, handle, BUF_SIZE); + negotiate_bufsize(uv_send_buffer_size, handle, BUF_SIZE); } #undef negotiate_bufsize @@ -57,26 +57,26 @@ static void handle_getbuf(uv_handle_t* handle, size_t suggested_size, uv_buf_t* * guaranteed to be unchanged only for the duration of * udp_read() and tcp_read(). */ - struct session *s = handle->data; - if (!session_flags(s)->has_tls) { - buf->base = (char *) session_wirebuf_get_free_start(s); - buf->len = session_wirebuf_get_free_size(s); - } else { - struct tls_common_ctx *ctx = session_tls_get_common_ctx(s); - buf->base = (char *) ctx->recv_buf; - buf->len = sizeof(ctx->recv_buf); - } + struct session2 *s = handle->data; + buf->base = wire_buf_free_space(&s->wire_buf); + buf->len = wire_buf_free_space_length(&s->wire_buf); +} + +static void udp_on_unwrapped(int status, struct session2 *session, + const void *target, void *baton) +{ + wire_buf_reset(&session->wire_buf); } void udp_recv(uv_udp_t *handle, ssize_t nread, const uv_buf_t *buf, const struct sockaddr *comm_addr, unsigned flags) { - struct session *s = handle->data; - if (session_flags(s)->closing || nread <= 0 || comm_addr->sa_family == AF_UNSPEC) + struct session2 *s = handle->data; + if (s->closing || nread <= 0 || comm_addr->sa_family == AF_UNSPEC) return; - if (session_flags(s)->outgoing) { - const struct sockaddr *peer = session_get_peer(s); + if (s->outgoing) { + const struct sockaddr *peer = session2_get_peer(s); if (kr_fails_assert(peer->sa_family != AF_UNSPEC)) return; if (kr_sockaddr_cmp(peer, comm_addr) != 0) { @@ -86,64 +86,16 @@ void udp_recv(uv_udp_t *handle, ssize_t nread, const uv_buf_t *buf, } } - const uint8_t *data = (const uint8_t *)buf->base; - ssize_t data_len = nread; - const struct sockaddr *src_addr = comm_addr; - const struct sockaddr *dst_addr = NULL; - struct proxy_result proxy; - bool has_proxy = false; - if (!session_flags(s)->outgoing && proxy_header_present(data, data_len)) { - if (!proxy_allowed(comm_addr)) { - kr_log_debug(IO, "<= ignoring PROXYv2 UDP from disallowed address '%s'\n", - kr_straddr(comm_addr)); - return; - } - - ssize_t trimmed = proxy_process_header(&proxy, s, data, data_len); - if (trimmed == KNOT_EMALF) { - if (kr_log_is_debug(IO, NULL)) { - kr_log_debug(IO, "<= ignoring malformed PROXYv2 UDP " - "from address '%s'\n", - kr_straddr(comm_addr)); - } - return; - } else if (trimmed < 0) { - if (kr_log_is_debug(IO, NULL)) { - kr_log_debug(IO, "<= error processing PROXYv2 UDP " - "from address '%s', ignoring\n", - kr_straddr(comm_addr)); - } - return; - } - - if (proxy.command == PROXY2_CMD_PROXY && proxy.family != AF_UNSPEC) { - has_proxy = true; - src_addr = &proxy.src_addr.ip; - dst_addr = &proxy.dst_addr.ip; - - if (kr_log_is_debug(IO, NULL)) { - kr_log_debug(IO, "<= UDP query from '%s'\n", - kr_straddr(src_addr)); - kr_log_debug(IO, "<= proxied through '%s'\n", - kr_straddr(comm_addr)); - } - } - data = session_wirebuf_get_free_start(s); - data_len = nread - trimmed; + int ret = wire_buf_consume(&s->wire_buf, nread); + if (ret) { + wire_buf_reset(&s->wire_buf); + return; } - ssize_t consumed = session_wirebuf_consume(s, data, data_len); - kr_assert(consumed == data_len); - - struct io_comm_data comm = { - .src_addr = src_addr, - .comm_addr = comm_addr, - .dst_addr = dst_addr, - .proxy = (has_proxy) ? &proxy : NULL - }; - session_wirebuf_process(s, &comm); - session_wirebuf_discard(s); - mp_flush(the_worker->pkt_pool.ctx); + ret = session2_unwrap(s, protolayer_wire_buf(&s->wire_buf), comm_addr, + udp_on_unwrapped, NULL); + if (ret) + wire_buf_reset(&s->wire_buf); } static int family_to_freebind_option(sa_family_t sa_family, int *level, int *name) @@ -180,6 +132,304 @@ static int family_to_freebind_option(sa_family_t sa_family, int *level, int *nam return kr_ok(); } + +struct pl_udp_iter_data { + struct proxy_result proxy; + bool has_proxy; +}; + +static int pl_udp_iter_init(struct protolayer_manager *manager, struct protolayer_data *layer) +{ + struct pl_udp_iter_data *udp = protolayer_iter_data(layer); + *udp = (struct pl_udp_iter_data){0}; + return 0; +} + +static enum protolayer_cb_result pl_udp_unwrap(struct protolayer_data *layer, struct protolayer_cb_ctx *ctx) +{ + if (ctx->payload.type == PROTOLAYER_PAYLOAD_EVENT) { + /* events should not happen in UDP (currently) */ + return protolayer_continue(ctx); + } + + ctx->payload = protolayer_as_buffer(&ctx->payload); + if (kr_fails_assert(ctx->payload.type == PROTOLAYER_PAYLOAD_BUFFER)) { + /* unsupported payload */ + return protolayer_break(ctx, kr_error(EINVAL)); + } + + struct session2 *s = ctx->manager->session; + struct pl_udp_iter_data *udp = protolayer_iter_data(layer); + + char *data = ctx->payload.buffer.buf; + ssize_t data_len = ctx->payload.buffer.len; + struct comm_info *comm = &ctx->comm; + comm->comm_addr = ctx->target; + comm->src_addr = ctx->target; + if (!s->outgoing && proxy_header_present(data, data_len)) { + if (!proxy_allowed(comm->comm_addr)) { + kr_log_debug(IO, "<= ignoring PROXYv2 UDP from disallowed address '%s'\n", + kr_straddr(comm->comm_addr)); + return protolayer_break(ctx, kr_error(EPERM)); + } + + ssize_t trimmed = proxy_process_header(&udp->proxy, data, data_len); + if (trimmed == KNOT_EMALF) { + if (kr_log_is_debug(IO, NULL)) { + kr_log_debug(IO, "<= ignoring malformed PROXYv2 UDP " + "from address '%s'\n", + kr_straddr(comm->comm_addr)); + } + return protolayer_break(ctx, kr_error(EINVAL)); + } else if (trimmed < 0) { + if (kr_log_is_debug(IO, NULL)) { + kr_log_debug(IO, "<= error processing PROXYv2 UDP " + "from address '%s', ignoring\n", + kr_straddr(comm->comm_addr)); + } + return protolayer_break(ctx, kr_error(EINVAL)); + } + + if (udp->proxy.command == PROXY2_CMD_PROXY && udp->proxy.family != AF_UNSPEC) { + udp->has_proxy = true; + + comm->src_addr = &udp->proxy.src_addr.ip; + comm->dst_addr = &udp->proxy.dst_addr.ip; + comm->proxy = &udp->proxy; + + if (kr_log_is_debug(IO, NULL)) { + kr_log_debug(IO, "<= UDP query from '%s'\n", + kr_straddr(comm->src_addr)); + kr_log_debug(IO, "<= proxied through '%s'\n", + kr_straddr(comm->comm_addr)); + } + } + + ctx->payload = protolayer_buffer(data + trimmed, data_len - trimmed); + } + + return protolayer_continue(ctx); +} + +static enum protolayer_cb_result pl_udp_wrap(struct protolayer_data *layer, struct protolayer_cb_ctx *ctx) +{ + return protolayer_push(ctx); +} + + +struct pl_tcp_sess_data { + struct proxy_result proxy; + struct wire_buf wire_buf; + bool had_data : 1; + bool has_proxy : 1; +}; + +static int pl_tcp_sess_init(struct protolayer_manager *manager, struct protolayer_data *layer) +{ + struct pl_tcp_sess_data *tcp = protolayer_sess_data(layer); + *tcp = (struct pl_tcp_sess_data){0}; + return 0; +} + +static int pl_tcp_sess_deinit(struct protolayer_manager *manager, struct protolayer_data *layer) +{ + struct pl_tcp_sess_data *tcp = protolayer_sess_data(layer); + wire_buf_deinit(&tcp->wire_buf); + return 0; +} + +static enum protolayer_cb_result pl_tcp_unwrap_timeout( + struct protolayer_data *layer, struct protolayer_cb_ctx *ctx) +{ + /* TODO - connecting timeout? */ + struct session2 *s = ctx->manager->session; + + if (kr_fails_assert(!s->closing)) + return protolayer_continue(ctx); + + if (!session2_tasklist_is_empty(s)) { + int finalized = session2_tasklist_finalize_expired(s); + the_worker->stats.timeout += finalized; + /* session2_tasklist_finalize_expired() may call worker_task_finalize(). + * If session is a source session and there were IO errors, + * worker_task_finalize() can finalize all tasks and close session. */ + if (s->closing) + return protolayer_continue(ctx); + } + + if (!session2_tasklist_is_empty(s)) { + session2_timer_stop(s); + session2_timer_start(s, + KR_RESOLVE_TIME_LIMIT / 2, + KR_RESOLVE_TIME_LIMIT / 2, + PROTOLAYER_UNWRAP); + } else { + /* Normally it should not happen, + * but better to check if there anything in this list. */ + while (!session2_waitinglist_is_empty(s)) { + struct qr_task *t = session2_waitinglist_pop(s, false); + worker_task_finalize(t, KR_STATE_FAIL); + worker_task_unref(t); + the_worker->stats.timeout += 1; + if (s->closing) + return protolayer_continue(ctx); + } + uint64_t idle_in_timeout = the_network->tcp.in_idle_timeout; + uint64_t idle_time = kr_now() - s->last_activity; + if (idle_time < idle_in_timeout) { + idle_in_timeout -= idle_time; + session2_timer_stop(s); + session2_timer_start(s, + idle_in_timeout, idle_in_timeout, + PROTOLAYER_UNWRAP); + } else { + struct sockaddr *peer = session2_get_peer(s); + char *peer_str = kr_straddr(peer); + kr_log_debug(IO, "=> closing connection to '%s'\n", + peer_str ? peer_str : ""); + if (s->outgoing) { + worker_del_tcp_waiting(peer); + worker_del_tcp_connected(peer); + } + session2_unwrap(s, protolayer_event_nd(PROTOLAYER_EVENT_CLOSE), NULL, NULL, NULL); + } + } + + return protolayer_continue(ctx); +} + +static enum protolayer_cb_result pl_tcp_unwrap(struct protolayer_data *layer, struct protolayer_cb_ctx *ctx) +{ + struct session2 *s = ctx->manager->session; + struct pl_tcp_sess_data *tcp = protolayer_sess_data(layer); + struct sockaddr *peer = session2_get_peer(s); + + if (ctx->payload.type == PROTOLAYER_PAYLOAD_EVENT) { + if (ctx->payload.event.type == PROTOLAYER_EVENT_TIMEOUT) + return pl_tcp_unwrap_timeout(layer, ctx); + + /* pass thru */ + return protolayer_continue(ctx); + } + + if (ctx->payload.type == PROTOLAYER_PAYLOAD_BUFFER) { + const char *buf = ctx->payload.buffer.buf; + const size_t len = ctx->payload.buffer.len; + + /* Copy a simple buffer into internal wirebuffer. */ + if (len > KNOT_WIRE_MAX_PKTSIZE) + return protolayer_break(ctx, kr_error(EMSGSIZE)); + + if (!tcp->wire_buf.buf) { + int ret = wire_buf_reserve(&tcp->wire_buf, + KNOT_WIRE_MAX_PKTSIZE); + if (ret) + return protolayer_break(ctx, ret); + } + + /* Try to make space */ + while (len > wire_buf_free_space_length(&tcp->wire_buf)) { + if (wire_buf_data_length(&tcp->wire_buf) > 0 || + tcp->wire_buf.start == 0) + return protolayer_break(ctx, kr_error(EMSGSIZE)); + + wire_buf_movestart(&tcp->wire_buf); + } + + memcpy(wire_buf_free_space(&tcp->wire_buf), buf, len); + wire_buf_consume(&tcp->wire_buf, ctx->payload.buffer.len); + ctx->payload = protolayer_wire_buf(&tcp->wire_buf); + } + + if (kr_fails_assert(ctx->payload.type == PROTOLAYER_PAYLOAD_WIRE_BUF)) { + /* TODO: iovec support unimplemented */ + return protolayer_break(ctx, kr_error(EINVAL)); + } + + char *data = wire_buf_data(ctx->payload.wire_buf); /* layer's or session's wirebuf */ + ssize_t data_len = wire_buf_data_length(ctx->payload.wire_buf); + struct comm_info *comm = &ctx->comm; + comm->src_addr = peer; + comm->comm_addr = peer; + comm->dst_addr = NULL; + if (!s->outgoing && !tcp->had_data && proxy_header_present(data, data_len)) { + if (!proxy_allowed(comm->src_addr)) { + if (kr_log_is_debug(IO, NULL)) { + kr_log_debug(IO, "<= connection to '%s': PROXYv2 not allowed " + "for this peer, close\n", + kr_straddr(peer)); + } + worker_end_tcp(s); + ctx->payload = protolayer_event_nd(PROTOLAYER_EVENT_CLOSE); + return protolayer_push(ctx); + } + + ssize_t trimmed = proxy_process_header(&tcp->proxy, data, data_len); + if (trimmed < 0) { + if (kr_log_is_debug(IO, NULL)) { + if (trimmed == KNOT_EMALF) { + kr_log_debug(IO, "<= connection to '%s': " + "malformed PROXYv2 header, close\n", + kr_straddr(comm->src_addr)); + } else { + kr_log_debug(IO, "<= connection to '%s': " + "error processing PROXYv2 header, close\n", + kr_straddr(comm->src_addr)); + } + } + worker_end_tcp(s); + ctx->payload = protolayer_event_nd(PROTOLAYER_EVENT_CLOSE); + return protolayer_push(ctx); + } else if (trimmed == 0) { + ctx->payload = protolayer_event_nd(PROTOLAYER_EVENT_CLOSE); + return protolayer_push(ctx); + } + + if (tcp->proxy.command != PROXY2_CMD_LOCAL && tcp->proxy.family != AF_UNSPEC) { + comm->src_addr = &tcp->proxy.src_addr.ip; + comm->dst_addr = &tcp->proxy.dst_addr.ip; + + if (kr_log_is_debug(IO, NULL)) { + kr_log_debug(IO, "<= TCP stream from '%s'\n", + kr_straddr(comm->src_addr)); + kr_log_debug(IO, "<= proxied through '%s'\n", + kr_straddr(comm->comm_addr)); + } + } + + wire_buf_trim(ctx->payload.wire_buf, trimmed); + } + + tcp->had_data = true; + + return protolayer_continue(ctx); +} + +static enum protolayer_cb_result pl_tcp_wrap(struct protolayer_data *layer, struct protolayer_cb_ctx *ctx) +{ + return protolayer_push(ctx); +} + + +void io_protolayers_init() +{ + protolayer_globals[PROTOLAYER_UDP] = (struct protolayer_globals){ + .iter_size = sizeof(struct pl_udp_iter_data), + .iter_init = pl_udp_iter_init, + .unwrap = pl_udp_unwrap, + .wrap = pl_udp_wrap + }; + + protolayer_globals[PROTOLAYER_TCP] = (struct protolayer_globals){ + .sess_size = sizeof(struct pl_tcp_sess_data), + .sess_init = pl_tcp_sess_init, + .sess_deinit = pl_tcp_sess_deinit, + .unwrap = pl_tcp_unwrap, + .wrap = pl_tcp_wrap + }; +} + + int io_bind(const struct sockaddr *addr, int type, const endpoint_flags_t *flags) { const int fd = socket(addr->sa_family, type, 0); @@ -265,12 +515,11 @@ int io_listen_udp(uv_loop_t *loop, uv_udp_t *handle, int fd) uv_handle_t *h = (uv_handle_t *)handle; check_bufsize(h); /* Handle is already created, just create context. */ - struct session *s = session_new(h, false, false); + struct session2 *s = session2_new_io(h, PROTOLAYER_GRP_DOUDP, false); kr_require(s); - session_flags(s)->outgoing = false; int socklen = sizeof(union kr_sockaddr); - ret = uv_udp_getsockname(handle, session_get_sockname(s), &socklen); + ret = uv_udp_getsockname(handle, &s->transport.io.sockname.ip, &socklen); if (ret) { kr_log_error(IO, "ERROR: getsockname failed: %s\n", uv_strerror(ret)); abort(); /* It might be nontrivial not to leak something here. */ @@ -279,70 +528,13 @@ int io_listen_udp(uv_loop_t *loop, uv_udp_t *handle, int fd) return io_start_read(h); } -void tcp_timeout_trigger(uv_timer_t *timer) -{ - struct session *s = timer->data; - - if (kr_fails_assert(!session_flags(s)->closing)) - return; - - if (!session_tasklist_is_empty(s)) { - int finalized = session_tasklist_finalize_expired(s); - the_worker->stats.timeout += finalized; - /* session_tasklist_finalize_expired() may call worker_task_finalize(). - * If session is a source session and there were IO errors, - * worker_task_finalize() can finalize all tasks and close session. */ - if (session_flags(s)->closing) { - return; - } - - } - if (!session_tasklist_is_empty(s)) { - uv_timer_stop(timer); - session_timer_start(s, tcp_timeout_trigger, - KR_RESOLVE_TIME_LIMIT / 2, - KR_RESOLVE_TIME_LIMIT / 2); - } else { - /* Normally it should not happen, - * but better to check if there anything in this list. */ - while (!session_waitinglist_is_empty(s)) { - struct qr_task *t = session_waitinglist_pop(s, false); - worker_task_finalize(t, KR_STATE_FAIL); - worker_task_unref(t); - the_worker->stats.timeout += 1; - if (session_flags(s)->closing) { - return; - } - } - uint64_t idle_in_timeout = the_network->tcp.in_idle_timeout; - uint64_t last_activity = session_last_activity(s); - uint64_t idle_time = kr_now() - last_activity; - if (idle_time < idle_in_timeout) { - idle_in_timeout -= idle_time; - uv_timer_stop(timer); - session_timer_start(s, tcp_timeout_trigger, - idle_in_timeout, idle_in_timeout); - } else { - struct sockaddr *peer = session_get_peer(s); - char *peer_str = kr_straddr(peer); - kr_log_debug(IO, "=> closing connection to '%s'\n", - peer_str ? peer_str : ""); - if (session_flags(s)->outgoing) { - worker_del_tcp_waiting(peer); - worker_del_tcp_connected(peer); - } - session_close(s); - } - } -} - static void tcp_recv(uv_stream_t *handle, ssize_t nread, const uv_buf_t *buf) { - struct session *s = handle->data; - if (kr_fails_assert(s && session_get_handle(s) == (uv_handle_t *)handle && handle->type == UV_TCP)) + struct session2 *s = handle->data; + if (kr_fails_assert(s && session2_get_handle(s) == (uv_handle_t *)handle && handle->type == UV_TCP)) return; - if (session_flags(s)->closing) { + if (s->closing) { return; } @@ -354,160 +546,117 @@ static void tcp_recv(uv_stream_t *handle, ssize_t nread, const uv_buf_t *buf) if (nread < 0 || !buf->base) { if (kr_log_is_debug(IO, NULL)) { - struct sockaddr *peer = session_get_peer(s); + struct sockaddr *peer = session2_get_peer(s); char *peer_str = kr_straddr(peer); kr_log_debug(IO, "=> connection to '%s' closed by peer (%s)\n", peer_str ? peer_str : "", uv_strerror(nread)); } worker_end_tcp(s); + session2_unwrap(s, protolayer_event_nd(PROTOLAYER_EVENT_FORCE_CLOSE), + NULL, NULL, NULL); return; } - const uint8_t *data = (const uint8_t *)buf->base; - ssize_t data_len = nread; - const struct sockaddr *src_addr = session_get_peer(s); - const struct sockaddr *dst_addr = NULL; - if (!session_flags(s)->outgoing && !session_flags(s)->no_proxy && - proxy_header_present(data, data_len)) { - if (!proxy_allowed(src_addr)) { - if (kr_log_is_debug(IO, NULL)) { - kr_log_debug(IO, "<= connection to '%s': PROXYv2 not allowed " - "for this peer, close\n", - kr_straddr(src_addr)); - } - worker_end_tcp(s); - return; - } - - struct proxy_result *proxy = session_proxy_create(s); - ssize_t trimmed = proxy_process_header(proxy, s, data, data_len); - if (trimmed < 0) { - if (kr_log_is_debug(IO, NULL)) { - if (trimmed == KNOT_EMALF) { - kr_log_debug(IO, "<= connection to '%s': " - "malformed PROXYv2 header, close\n", - kr_straddr(src_addr)); - } else { - kr_log_debug(IO, "<= connection to '%s': " - "error processing PROXYv2 header, close\n", - kr_straddr(src_addr)); - } - } - worker_end_tcp(s); - return; - } else if (trimmed == 0) { - return; - } - - if (proxy->command != PROXY2_CMD_LOCAL && proxy->family != AF_UNSPEC) { - src_addr = &proxy->src_addr.ip; - dst_addr = &proxy->dst_addr.ip; - - if (kr_log_is_debug(IO, NULL)) { - kr_log_debug(IO, "<= TCP stream from '%s'\n", - kr_straddr(src_addr)); - kr_log_debug(IO, "<= proxied through '%s'\n", - kr_straddr(session_get_peer(s))); - } - } - - data = session_wirebuf_get_free_start(s); - data_len = nread - trimmed; - } - - session_flags(s)->no_proxy = true; - - ssize_t consumed = 0; - if (session_flags(s)->has_tls) { - /* buf->base points to start of the tls receive buffer. - Decode data free space in session wire buffer. */ - consumed = tls_process_input_data(s, data, data_len); - if (consumed < 0) { - if (kr_log_is_debug(IO, NULL)) { - char *peer_str = kr_straddr(src_addr); - kr_log_debug(IO, "=> connection to '%s': " - "error processing TLS data, close\n", - peer_str ? peer_str : ""); - } - worker_end_tcp(s); - return; - } else if (consumed == 0) { - return; - } - data = session_wirebuf_get_free_start(s); - data_len = consumed; - } -#if ENABLE_DOH2 - int streaming = 1; - if (session_flags(s)->has_http) { - streaming = http_process_input_data(s, data, data_len, - &consumed); - if (streaming < 0) { - if (kr_log_is_debug(IO, NULL)) { - char *peer_str = kr_straddr(src_addr); - kr_log_debug(IO, "=> connection to '%s': " - "error processing HTTP data, close\n", - peer_str ? peer_str : ""); - } - worker_end_tcp(s); - return; - } - if (consumed == 0) { - return; - } - data = session_wirebuf_get_free_start(s); - data_len = consumed; - } -#endif - - /* data points to start of the free space in session wire buffer. - Simple increase internal counter. */ - consumed = session_wirebuf_consume(s, data, data_len); - kr_assert(consumed == data_len); - - struct io_comm_data comm = { - .src_addr = src_addr, - .comm_addr = session_get_peer(s), - .dst_addr = dst_addr, - .proxy = session_proxy_get(s) - }; - int ret = session_wirebuf_process(s, &comm); - if (ret < 0) { - /* An error has occurred, close the session. */ - worker_end_tcp(s); - } - session_wirebuf_compress(s); - mp_flush(the_worker->pkt_pool.ctx); -#if ENABLE_DOH2 - if (session_flags(s)->has_http && streaming == 0 && ret == 0) { - ret = http_send_status(s, HTTP_STATUS_BAD_REQUEST); - if (ret) { - /* An error has occurred, close the session. */ - worker_end_tcp(s); - } + int ret = wire_buf_consume(&s->wire_buf, nread); + if (ret) { + wire_buf_reset(&s->wire_buf); + return; } -#endif -} -#if ENABLE_DOH2 -static ssize_t tls_send(const uint8_t *buf, const size_t len, struct session *session) -{ - struct tls_ctx *ctx = session_tls_get_server_ctx(session); - ssize_t sent = 0; - kr_require(ctx); - - sent = gnutls_record_send(ctx->c.tls_session, buf, len); - if (sent < 0) { - kr_log_debug(DOH, "gnutls_record_send failed: %s (%zd)\n", - gnutls_strerror_name(sent), sent); - return kr_error(EIO); - } - return sent; + session2_unwrap(s, protolayer_wire_buf(&s->wire_buf), NULL, NULL, NULL); + +// ssize_t consumed = 0; +// if (session_flags(s)->has_tls) { +// /* buf->base points to start of the tls receive buffer. +// Decode data free space in session wire buffer. */ +// consumed = tls_process_input_data(s, data, data_len); +// if (consumed < 0) { +// if (kr_log_is_debug(IO, NULL)) { +// char *peer_str = kr_straddr(src_addr); +// kr_log_debug(IO, "=> connection to '%s': " +// "error processing TLS data, close\n", +// peer_str ? peer_str : ""); +// } +// worker_end_tcp(s); +// return; +// } else if (consumed == 0) { +// return; +// } +// data = session_wirebuf_get_free_start(s); +// data_len = consumed; +// } +//#if ENABLE_DOH2 +// int streaming = 1; +// if (session_flags(s)->has_http) { +// streaming = http_process_input_data(s, data, data_len, +// &consumed); +// if (streaming < 0) { +// if (kr_log_is_debug(IO, NULL)) { +// char *peer_str = kr_straddr(src_addr); +// kr_log_debug(IO, "=> connection to '%s': " +// "error processing HTTP data, close\n", +// peer_str ? peer_str : ""); +// } +// worker_end_tcp(s); +// return; +// } +// if (consumed == 0) { +// return; +// } +// data = session_wirebuf_get_free_start(s); +// data_len = consumed; +// } +//#endif +// +// /* data points to start of the free space in session wire buffer. +// Simple increase internal counter. */ +// consumed = session_wirebuf_consume(s, data, data_len); +// kr_assert(consumed == data_len); +// +// struct io_comm_data comm = { +// .src_addr = src_addr, +// .comm_addr = session_get_peer(s), +// .dst_addr = dst_addr, +// .proxy = session_proxy_get(s) +// }; +// int ret = session_wirebuf_process(s, &comm); +// if (ret < 0) { +// /* An error has occurred, close the session. */ +// worker_end_tcp(s); +// } +// session_wirebuf_compress(s); +// mp_flush(the_worker->pkt_pool.ctx); +//#if ENABLE_DOH2 +// if (session_flags(s)->has_http && streaming == 0 && ret == 0) { +// ret = http_send_status(s, HTTP_STATUS_BAD_REQUEST); +// if (ret) { +// /* An error has occurred, close the session. */ +// worker_end_tcp(s); +// } +// } +//#endif } -#endif -static void _tcp_accept(uv_stream_t *master, int status, bool tls, bool http) +/* TODO: http */ +//#if ENABLE_DOH2 +//static ssize_t tls_send(const uint8_t *buf, const size_t len, struct session *session) +//{ +// struct tls_ctx *ctx = session_tls_get_server_ctx(session); +// ssize_t sent = 0; +// kr_require(ctx); +// +// sent = gnutls_record_send(ctx->c.tls_session, buf, len); +// if (sent < 0) { +// kr_log_debug(DOH, "gnutls_record_send failed: %s (%zd)\n", +// gnutls_strerror_name(sent), sent); +// return kr_error(EIO); +// } +// return sent; +//} +//#endif + +static void _tcp_accept(uv_stream_t *master, int status, enum protolayer_grp grp) { if (status != 0) { return; @@ -518,7 +667,7 @@ static void _tcp_accept(uv_stream_t *master, int status, bool tls, bool http) return; } int res = io_create(master->loop, (uv_handle_t *)client, - SOCK_STREAM, AF_UNSPEC, tls, http); + SOCK_STREAM, AF_UNSPEC, grp, false); if (res) { if (res == UV_EMFILE) { the_worker->too_many_open = true; @@ -532,31 +681,37 @@ static void _tcp_accept(uv_stream_t *master, int status, bool tls, bool http) } /* struct session was allocated \ borrowed from memory pool. */ - struct session *s = client->data; - kr_require(session_flags(s)->outgoing == false); - kr_require(session_flags(s)->has_tls == tls); + struct session2 *s = client->data; + kr_require(s->outgoing == false); +// kr_require(s->secure == tls); /* TODO */ if (uv_accept(master, (uv_stream_t *)client) != 0) { /* close session, close underlying uv handles and * deallocate (or return to memory pool) memory. */ - session_close(s); + session2_unwrap(s, + protolayer_event_nd(PROTOLAYER_EVENT_CLOSE), + NULL, NULL, NULL); return; } /* Get peer's and our address. We apparently get specific sockname here * even if we listened on a wildcard address. */ - struct sockaddr *sa = session_get_peer(s); + struct sockaddr *sa = session2_get_peer(s); int sa_len = sizeof(struct sockaddr_in6); int ret = uv_tcp_getpeername(client, sa, &sa_len); if (ret || sa->sa_family == AF_UNSPEC) { - session_close(s); + session2_unwrap(s, + protolayer_event_nd(PROTOLAYER_EVENT_CLOSE), + NULL, NULL, NULL); return; } - sa = session_get_sockname(s); + sa = session2_get_sockname(s); sa_len = sizeof(struct sockaddr_in6); ret = uv_tcp_getsockname(client, sa, &sa_len); if (ret || sa->sa_family == AF_UNSPEC) { - session_close(s); + session2_unwrap(s, + protolayer_event_nd(PROTOLAYER_EVENT_CLOSE), + NULL, NULL, NULL); return; } @@ -567,77 +722,78 @@ static void _tcp_accept(uv_stream_t *master, int status, bool tls, bool http) uint64_t idle_in_timeout = the_network->tcp.in_idle_timeout; uint64_t timeout = KR_CONN_RTT_MAX / 2; - if (tls) { - timeout += TLS_MAX_HANDSHAKE_TIME; - struct tls_ctx *ctx = session_tls_get_server_ctx(s); - if (!ctx) { - ctx = tls_new(); - if (!ctx) { - session_close(s); - return; - } - ctx->c.session = s; - ctx->c.handshake_state = TLS_HS_IN_PROGRESS; - - /* Configure ALPN. */ - gnutls_datum_t proto; - if (!http) { - proto.data = (unsigned char *)"dot"; - proto.size = 3; - } else { - proto.data = (unsigned char *)"h2"; - proto.size = 2; - } - unsigned int flags = 0; -#if GNUTLS_VERSION_NUMBER >= 0x030500 - /* Mandatory ALPN means the protocol must match if and - * only if ALPN extension is used by the client. */ - flags |= GNUTLS_ALPN_MANDATORY; -#endif - ret = gnutls_alpn_set_protocols(ctx->c.tls_session, &proto, 1, flags); - if (ret != GNUTLS_E_SUCCESS) { - session_close(s); - return; - } - - session_tls_set_server_ctx(s, ctx); - } - } -#if ENABLE_DOH2 - if (http) { - struct http_ctx *ctx = session_http_get_server_ctx(s); - if (!ctx) { - if (!tls) { /* Plain HTTP is not supported. */ - session_close(s); - return; - } - ctx = http_new(s, tls_send); - if (!ctx) { - session_close(s); - return; - } - session_http_set_server_ctx(s, ctx); - } - } -#endif - session_timer_start(s, tcp_timeout_trigger, timeout, idle_in_timeout); + /* TODO: tls, http */ +// if (tls) { +// timeout += TLS_MAX_HANDSHAKE_TIME; +// struct tls_ctx *ctx = session_tls_get_server_ctx(s); +// if (!ctx) { +// ctx = tls_new(); +// if (!ctx) { +// session_close(s); +// return; +// } +// ctx->c.session = s; +// ctx->c.handshake_state = TLS_HS_IN_PROGRESS; +// +// /* Configure ALPN. */ +// gnutls_datum_t proto; +// if (!http) { +// proto.data = (unsigned char *)"dot"; +// proto.size = 3; +// } else { +// proto.data = (unsigned char *)"h2"; +// proto.size = 2; +// } +// unsigned int flags = 0; +//#if GNUTLS_VERSION_NUMBER >= 0x030500 +// /* Mandatory ALPN means the protocol must match if and +// * only if ALPN extension is used by the client. */ +// flags |= GNUTLS_ALPN_MANDATORY; +//#endif +// ret = gnutls_alpn_set_protocols(ctx->c.tls_session, &proto, 1, flags); +// if (ret != GNUTLS_E_SUCCESS) { +// session_close(s); +// return; +// } +// +// session_tls_set_server_ctx(s, ctx); +// } +// } +//#if ENABLE_DOH2 +// if (http) { +// struct http_ctx *ctx = session_http_get_server_ctx(s); +// if (!ctx) { +// if (!tls) { /* Plain HTTP is not supported. */ +// session_close(s); +// return; +// } +// ctx = http_new(s, tls_send); +// if (!ctx) { +// session_close(s); +// return; +// } +// session_http_set_server_ctx(s, ctx); +// } +// } +//#endif + session2_timer_start(s, timeout, idle_in_timeout, PROTOLAYER_UNWRAP); io_start_read((uv_handle_t *)client); } static void tcp_accept(uv_stream_t *master, int status) { - _tcp_accept(master, status, false, false); + _tcp_accept(master, status, PROTOLAYER_GRP_DOTCP); } static void tls_accept(uv_stream_t *master, int status) { - _tcp_accept(master, status, true, false); + _tcp_accept(master, status, PROTOLAYER_GRP_DOTLS); } #if ENABLE_DOH2 static void https_accept(uv_stream_t *master, int status) { - _tcp_accept(master, status, true, true); + _tcp_accept(master, status, PROTOLAYER_GRP_DOHTTPS); } #endif @@ -933,151 +1089,152 @@ int io_listen_pipe(uv_loop_t *loop, uv_pipe_t *handle, int fd) return 0; } -#if ENABLE_XDP -static void xdp_rx(uv_poll_t* handle, int status, int events) -{ - const int XDP_RX_BATCH_SIZE = 64; - if (status < 0) { - kr_log_error(XDP, "poll status %d: %s\n", status, uv_strerror(status)); - return; - } - if (events != UV_READABLE) { - kr_log_error(XDP, "poll unexpected events: %d\n", events); - return; - } - - xdp_handle_data_t *xhd = handle->data; - kr_require(xhd && xhd->session && xhd->socket); - uint32_t rcvd; - knot_xdp_msg_t msgs[XDP_RX_BATCH_SIZE]; - int ret = knot_xdp_recv(xhd->socket, msgs, XDP_RX_BATCH_SIZE, &rcvd - #if KNOT_VERSION_HEX >= 0x030100 - , NULL - #endif - ); - - if (kr_fails_assert(ret == KNOT_EOK)) { - /* ATM other error codes can only be returned when called incorrectly */ - kr_log_error(XDP, "knot_xdp_recv(): %d, %s\n", ret, knot_strerror(ret)); - return; - } - kr_log_debug(XDP, "poll triggered, processing a batch of %d packets\n", (int)rcvd); - kr_require(rcvd <= XDP_RX_BATCH_SIZE); - for (int i = 0; i < rcvd; ++i) { - const knot_xdp_msg_t *msg = &msgs[i]; - kr_require(msg->payload.iov_len <= KNOT_WIRE_MAX_PKTSIZE); - knot_pkt_t *kpkt = knot_pkt_new(msg->payload.iov_base, msg->payload.iov_len, - &the_worker->pkt_pool); - if (kpkt == NULL) { - ret = kr_error(ENOMEM); - } else { - struct io_comm_data comm = { - .src_addr = (const struct sockaddr *)&msg->ip_from, - .comm_addr = (const struct sockaddr *)&msg->ip_from, - .dst_addr = (const struct sockaddr *)&msg->ip_to - }; - ret = worker_submit(xhd->session, &comm, - msg->eth_from, msg->eth_to, kpkt); - } - if (ret) - kr_log_debug(XDP, "worker_submit() == %d: %s\n", ret, kr_strerror(ret)); - mp_flush(the_worker->pkt_pool.ctx); - } - knot_xdp_recv_finish(xhd->socket, msgs, rcvd); -} -/// Warn if the XDP program is running in emulated mode (XDP_SKB) -static void xdp_warn_mode(const char *ifname) -{ - if (kr_fails_assert(ifname)) - return; - - const unsigned if_index = if_nametoindex(ifname); - if (!if_index) { - kr_log_warning(XDP, "warning: interface %s, unexpected error when converting its name: %s\n", - ifname, strerror(errno)); - return; - } - - const knot_xdp_mode_t mode = knot_eth_xdp_mode(if_index); - switch (mode) { - case KNOT_XDP_MODE_FULL: - return; - case KNOT_XDP_MODE_EMUL: - kr_log_warning(XDP, "warning: interface %s running only with XDP emulation\n", - ifname); - return; - case KNOT_XDP_MODE_NONE: // enum warnings from compiler - break; - } - kr_log_warning(XDP, "warning: interface %s running in unexpected XDP mode %d\n", - ifname, (int)mode); -} -int io_listen_xdp(uv_loop_t *loop, struct endpoint *ep, const char *ifname) -{ - if (!ep || !ep->handle) { - return kr_error(EINVAL); - } - - // RLIMIT_MEMLOCK often needs raising when operating on BPF - static int ret_limit = 1; - if (ret_limit == 1) { - struct rlimit no_limit = { RLIM_INFINITY, RLIM_INFINITY }; - ret_limit = setrlimit(RLIMIT_MEMLOCK, &no_limit) - ? kr_error(errno) : 0; - } - if (ret_limit) return ret_limit; - - xdp_handle_data_t *xhd = malloc(sizeof(*xhd)); - if (!xhd) return kr_error(ENOMEM); - - xhd->socket = NULL; // needed for some reason - - // This call is a libknot version hell, unfortunately. - int ret = knot_xdp_init(&xhd->socket, ifname, ep->nic_queue, - #if KNOT_VERSION_HEX < 0x030100 - ep->port ? ep->port : KNOT_XDP_LISTEN_PORT_ALL, - KNOT_XDP_LOAD_BPF_MAYBE - #elif KNOT_VERSION_HEX < 0x030200 - ep->port ? ep->port : (KNOT_XDP_LISTEN_PORT_PASS | 0), - KNOT_XDP_LOAD_BPF_MAYBE - #else - KNOT_XDP_FILTER_UDP | (ep->port ? 0 : KNOT_XDP_FILTER_PASS), - ep->port, 0/*quic_port*/, - KNOT_XDP_LOAD_BPF_MAYBE, - NULL/*xdp_config*/ - #endif - ); - - if (!ret) xdp_warn_mode(ifname); - - if (!ret) ret = uv_idle_init(loop, &xhd->tx_waker); - if (ret || kr_fails_assert(xhd->socket)) { - free(xhd); - return ret == 0 ? kr_error(EINVAL) : kr_error(ret); - } - xhd->tx_waker.data = xhd->socket; - - ep->fd = knot_xdp_socket_fd(xhd->socket); // probably not useful - ret = uv_poll_init(loop, (uv_poll_t *)ep->handle, ep->fd); - if (ret) { - knot_xdp_deinit(xhd->socket); - free(xhd); - return kr_error(ret); - } - - // beware: this sets poll_handle->data - xhd->session = session_new(ep->handle, false, false); - kr_require(!session_flags(xhd->session)->outgoing); - session_get_sockname(xhd->session)->sa_family = AF_XDP; // to have something in there - - ep->handle->data = xhd; - ret = uv_poll_start((uv_poll_t *)ep->handle, UV_READABLE, xdp_rx); - return ret; -} -#endif - - -int io_create(uv_loop_t *loop, uv_handle_t *handle, int type, unsigned family, bool has_tls, bool has_http) +/* TODO: xdp */ +//#if ENABLE_XDP +//static void xdp_rx(uv_poll_t* handle, int status, int events) +//{ +// const int XDP_RX_BATCH_SIZE = 64; +// if (status < 0) { +// kr_log_error(XDP, "poll status %d: %s\n", status, uv_strerror(status)); +// return; +// } +// if (events != UV_READABLE) { +// kr_log_error(XDP, "poll unexpected events: %d\n", events); +// return; +// } +// +// xdp_handle_data_t *xhd = handle->data; +// kr_require(xhd && xhd->session && xhd->socket); +// uint32_t rcvd; +// knot_xdp_msg_t msgs[XDP_RX_BATCH_SIZE]; +// int ret = knot_xdp_recv(xhd->socket, msgs, XDP_RX_BATCH_SIZE, &rcvd +// #if KNOT_VERSION_HEX >= 0x030100 +// , NULL +// #endif +// ); +// +// if (kr_fails_assert(ret == KNOT_EOK)) { +// /* ATM other error codes can only be returned when called incorrectly */ +// kr_log_error(XDP, "knot_xdp_recv(): %d, %s\n", ret, knot_strerror(ret)); +// return; +// } +// kr_log_debug(XDP, "poll triggered, processing a batch of %d packets\n", (int)rcvd); +// kr_require(rcvd <= XDP_RX_BATCH_SIZE); +// for (int i = 0; i < rcvd; ++i) { +// const knot_xdp_msg_t *msg = &msgs[i]; +// kr_require(msg->payload.iov_len <= KNOT_WIRE_MAX_PKTSIZE); +// knot_pkt_t *kpkt = knot_pkt_new(msg->payload.iov_base, msg->payload.iov_len, +// &the_worker->pkt_pool); +// if (kpkt == NULL) { +// ret = kr_error(ENOMEM); +// } else { +// struct io_comm_data comm = { +// .src_addr = (const struct sockaddr *)&msg->ip_from, +// .comm_addr = (const struct sockaddr *)&msg->ip_from, +// .dst_addr = (const struct sockaddr *)&msg->ip_to +// }; +// ret = worker_submit(xhd->session, &comm, +// msg->eth_from, msg->eth_to, kpkt); +// } +// if (ret) +// kr_log_debug(XDP, "worker_submit() == %d: %s\n", ret, kr_strerror(ret)); +// mp_flush(the_worker->pkt_pool.ctx); +// } +// knot_xdp_recv_finish(xhd->socket, msgs, rcvd); +//} +///// Warn if the XDP program is running in emulated mode (XDP_SKB) +//static void xdp_warn_mode(const char *ifname) +//{ +// if (kr_fails_assert(ifname)) +// return; +// +// const unsigned if_index = if_nametoindex(ifname); +// if (!if_index) { +// kr_log_warning(XDP, "warning: interface %s, unexpected error when converting its name: %s\n", +// ifname, strerror(errno)); +// return; +// } +// +// const knot_xdp_mode_t mode = knot_eth_xdp_mode(if_index); +// switch (mode) { +// case KNOT_XDP_MODE_FULL: +// return; +// case KNOT_XDP_MODE_EMUL: +// kr_log_warning(XDP, "warning: interface %s running only with XDP emulation\n", +// ifname); +// return; +// case KNOT_XDP_MODE_NONE: // enum warnings from compiler +// break; +// } +// kr_log_warning(XDP, "warning: interface %s running in unexpected XDP mode %d\n", +// ifname, (int)mode); +//} +//int io_listen_xdp(uv_loop_t *loop, struct endpoint *ep, const char *ifname) +//{ +// if (!ep || !ep->handle) { +// return kr_error(EINVAL); +// } +// +// // RLIMIT_MEMLOCK often needs raising when operating on BPF +// static int ret_limit = 1; +// if (ret_limit == 1) { +// struct rlimit no_limit = { RLIM_INFINITY, RLIM_INFINITY }; +// ret_limit = setrlimit(RLIMIT_MEMLOCK, &no_limit) +// ? kr_error(errno) : 0; +// } +// if (ret_limit) return ret_limit; +// +// xdp_handle_data_t *xhd = malloc(sizeof(*xhd)); +// if (!xhd) return kr_error(ENOMEM); +// +// xhd->socket = NULL; // needed for some reason +// +// // This call is a libknot version hell, unfortunately. +// int ret = knot_xdp_init(&xhd->socket, ifname, ep->nic_queue, +// #if KNOT_VERSION_HEX < 0x030100 +// ep->port ? ep->port : KNOT_XDP_LISTEN_PORT_ALL, +// KNOT_XDP_LOAD_BPF_MAYBE +// #elif KNOT_VERSION_HEX < 0x030200 +// ep->port ? ep->port : (KNOT_XDP_LISTEN_PORT_PASS | 0), +// KNOT_XDP_LOAD_BPF_MAYBE +// #else +// KNOT_XDP_FILTER_UDP | (ep->port ? 0 : KNOT_XDP_FILTER_PASS), +// ep->port, 0/*quic_port*/, +// KNOT_XDP_LOAD_BPF_MAYBE, +// NULL/*xdp_config*/ +// #endif +// ); +// +// if (!ret) xdp_warn_mode(ifname); +// +// if (!ret) ret = uv_idle_init(loop, &xhd->tx_waker); +// if (ret || kr_fails_assert(xhd->socket)) { +// free(xhd); +// return ret == 0 ? kr_error(EINVAL) : kr_error(ret); +// } +// xhd->tx_waker.data = xhd->socket; +// +// ep->fd = knot_xdp_socket_fd(xhd->socket); // probably not useful +// ret = uv_poll_init(loop, (uv_poll_t *)ep->handle, ep->fd); +// if (ret) { +// knot_xdp_deinit(xhd->socket); +// free(xhd); +// return kr_error(ret); +// } +// +// // beware: this sets poll_handle->data +// xhd->session = session_new(ep->handle, false, false); +// kr_require(!session_flags(xhd->session)->outgoing); +// session_get_sockname(xhd->session)->sa_family = AF_XDP; // to have something in there +// +// ep->handle->data = xhd; +// ret = uv_poll_start((uv_poll_t *)ep->handle, UV_READABLE, xdp_rx); +// return ret; +//} +//#endif + +int io_create(uv_loop_t *loop, uv_handle_t *handle, int type, unsigned family, + enum protolayer_grp grp, bool outgoing) { int ret = -1; if (type == SOCK_DGRAM) { @@ -1089,7 +1246,7 @@ int io_create(uv_loop_t *loop, uv_handle_t *handle, int type, unsigned family, b if (ret != 0) { return ret; } - struct session *s = session_new(handle, has_tls, has_http); + struct session2 *s = session2_new_io(handle, grp, outgoing); if (s == NULL) { ret = -1; } @@ -1102,13 +1259,13 @@ static void io_deinit(uv_handle_t *handle) return; } if (handle->type != UV_POLL) { - session_free(handle->data); + session2_free(handle->data); } else { #if ENABLE_XDP xdp_handle_data_t *xhd = handle->data; uv_idle_stop(&xhd->tx_waker); uv_close((uv_handle_t *)&xhd->tx_waker, NULL); - session_free(xhd->session); + session2_free(xhd->session); knot_xdp_deinit(xhd->socket); free(xhd); #else diff --git a/daemon/io.h b/daemon/io.h index 0e88dc189..e589231dc 100644 --- a/daemon/io.h +++ b/daemon/io.h @@ -11,33 +11,14 @@ #include "lib/generic/array.h" #include "daemon/worker.h" #include "daemon/engine.h" +#include "daemon/session2.h" struct tls_ctx; struct tls_client_ctx; struct io_stream_data; -/** Communication data. */ -struct io_comm_data { - /** The original address the data came from. May be that of a proxied - * client, if they came through a proxy. May be `NULL` if - * the communication did not come from network. */ - const struct sockaddr *src_addr; - - /** The actual address the resolver is communicating with. May be - * the address of a proxy if the communication came through one, - * otherwise it will be the same as `src_addr`. May be `NULL` if - * the communication did not come from network. */ - const struct sockaddr *comm_addr; - - /** The original destination address. May be the resolver's address, or - * the address of a proxy if the communication came through one. May be - * `NULL` if the communication did not come from network. */ - const struct sockaddr *dst_addr; - - /** Data parsed from a PROXY header. May be `NULL` if the communication - * did not come through a proxy, or if the PROXYv2 protocol was not used. */ - const struct proxy_result *proxy; -}; +/** Initializes the protocol layers managed by io. */ +void io_protolayers_init(); /** Bind address into a file-descriptor (only, no libuv). type is e.g. SOCK_DGRAM */ int io_bind(const struct sockaddr *addr, int type, const endpoint_flags_t *flags); @@ -64,7 +45,7 @@ void tcp_timeout_trigger(uv_timer_t *timer); * \param family = AF_* * \param has_tls has meanings only when type is SOCK_STREAM */ int io_create(uv_loop_t *loop, uv_handle_t *handle, int type, - unsigned family, bool has_tls, bool has_http); + unsigned family, enum protolayer_grp grp, bool outgoing); void io_free(uv_handle_t *handle); int io_start_read(uv_handle_t *handle); @@ -74,7 +55,7 @@ int io_stop_read(uv_handle_t *handle); * (Other cases store a direct struct session pointer in ::data.) */ typedef struct { struct knot_xdp_socket *socket; - struct session *session; + struct session2 *session; uv_idle_t tx_waker; } xdp_handle_data_t; diff --git a/daemon/main.c b/daemon/main.c index 5f63ee6df..9df6dc79f 100644 --- a/daemon/main.c +++ b/daemon/main.c @@ -569,6 +569,8 @@ int main(int argc, char **argv) uv_strerror(ret)); } + io_protolayers_init(); + /* Start listening, in the sense of network_listen_fd(). */ if (start_listening(&the_args->fds) != 0) { ret = EXIT_FAILURE; diff --git a/daemon/meson.build b/daemon/meson.build index 1ff28ec03..b0b119f0d 100644 --- a/daemon/meson.build +++ b/daemon/meson.build @@ -14,18 +14,17 @@ kresd_src = files([ 'main.c', 'network.c', 'proxyv2.c', - 'session.c', 'session2.c', - 'tls.c', - 'tls_ephemeral_credentials.c', - 'tls_session_ticket-srv.c', +# 'tls.c', +# 'tls_ephemeral_credentials.c', +# 'tls_session_ticket-srv.c', 'udp_queue.c', 'worker.c', 'zimport.c', ]) -if nghttp2.found() - kresd_src += files(['http.c']) -endif +#if nghttp2.found() +# kresd_src += files(['http.c']) +#endif c_src_lint += kresd_src @@ -51,7 +50,7 @@ kresd_deps = [ gnutls, libsystemd, capng, - nghttp2, +# nghttp2, malloc, ] diff --git a/daemon/network.c b/daemon/network.c index 737a3b3e2..706523bc3 100644 --- a/daemon/network.c +++ b/daemon/network.c @@ -72,8 +72,9 @@ void network_init(uv_loop_t *loop, int tcp_backlog) the_network->proxy_addrs4 = trie_create(NULL); the_network->proxy_addrs6 = trie_create(NULL); the_network->tls_client_params = NULL; - the_network->tls_session_ticket_ctx = /* unsync. random, by default */ - tls_session_ticket_ctx_create(loop, NULL, 0); + /* TODO: tls */ +// the_network->tls_session_ticket_ctx = /* unsync. random, by default */ +// tls_session_ticket_ctx_create(loop, NULL, 0); the_network->tcp.in_idle_timeout = 10000; the_network->tcp.tls_handshake_timeout = TLS_MAX_HANDSHAKE_TIME; the_network->tcp_backlog = tcp_backlog; @@ -299,9 +300,10 @@ void network_deinit(void) network_proxy_free_addr_data(the_network->proxy_addrs6); trie_free(the_network->proxy_addrs6); - tls_credentials_free(the_network->tls_credentials); - tls_client_params_free(the_network->tls_client_params); - tls_session_ticket_ctx_destroy(the_network->tls_session_ticket_ctx); + /* TODO: tls */ +// tls_credentials_free(the_network->tls_credentials); +// tls_client_params_free(the_network->tls_client_params); +// tls_session_ticket_ctx_destroy(the_network->tls_session_ticket_ctx); #ifndef NDEBUG memset(the_network, 0, sizeof(*the_network)); #endif @@ -417,14 +419,14 @@ static int open_endpoint(const char *addr_str, } if (is_xdp) { - #if ENABLE_XDP - uv_poll_t *ep_handle = malloc(sizeof(uv_poll_t)); - ep->handle = (uv_handle_t *)ep_handle; - ret = !ep->handle ? ENOMEM - : io_listen_xdp(the_network->loop, ep, addr_str); - #else +// #if ENABLE_XDP +// uv_poll_t *ep_handle = malloc(sizeof(uv_poll_t)); +// ep->handle = (uv_handle_t *)ep_handle; +// ret = !ep->handle ? ENOMEM +// : io_listen_xdp(the_network->loop, ep, addr_str); +// #else ret = ESOCKTNOSUPPORT; - #endif +// #endif goto finish_ret; } /* else */ @@ -842,18 +844,19 @@ int network_close(const char *addr_str, int port) void network_new_hostname(void) { - if (the_network->tls_credentials && - the_network->tls_credentials->ephemeral_servicename) { - struct tls_credentials *newcreds; - newcreds = tls_get_ephemeral_credentials(); - if (newcreds) { - tls_credentials_release(the_network->tls_credentials); - the_network->tls_credentials = newcreds; - kr_log_info(TLS, "Updated ephemeral X.509 cert with new hostname\n"); - } else { - kr_log_error(TLS, "Failed to update ephemeral X.509 cert with new hostname, using existing one\n"); - } - } + /* TODO: tls */ +// if (the_network->tls_credentials && +// the_network->tls_credentials->ephemeral_servicename) { +// struct tls_credentials *newcreds; +// newcreds = tls_get_ephemeral_credentials(); +// if (newcreds) { +// tls_credentials_release(the_network->tls_credentials); +// the_network->tls_credentials = newcreds; +// kr_log_info(TLS, "Updated ephemeral X.509 cert with new hostname\n"); +// } else { +// kr_log_error(TLS, "Failed to update ephemeral X.509 cert with new hostname, using existing one\n"); +// } +// } } #ifdef SO_ATTACH_BPF diff --git a/daemon/proxyv2.c b/daemon/proxyv2.c index c293a6236..f796aad80 100644 --- a/daemon/proxyv2.c +++ b/daemon/proxyv2.c @@ -167,7 +167,7 @@ bool proxy_allowed(const struct sockaddr *saddr) return kr_bitcmp((char *) &addr, (char *) &found->addr, found->netmask) == 0; } -ssize_t proxy_process_header(struct proxy_result *out, struct session *s, +ssize_t proxy_process_header(struct proxy_result *out, const void *buf, const ssize_t nread) { if (!buf) @@ -287,5 +287,5 @@ ssize_t proxy_process_header(struct proxy_result *out, struct session *s, } fill_wirebuf: - return session_wirebuf_trim(s, hdr_len); + return hdr_len; } diff --git a/daemon/proxyv2.h b/daemon/proxyv2.h index 3cbb5213a..c18e71ce3 100644 --- a/daemon/proxyv2.h +++ b/daemon/proxyv2.h @@ -6,7 +6,7 @@ #include -#include "daemon/session.h" +#include "daemon/session2.h" #include "lib/utils.h" extern const char PROXY2_SIGNATURE[12]; @@ -42,8 +42,7 @@ static inline bool proxy_header_present(const void* buf, const ssize_t nread) bool proxy_allowed(const struct sockaddr *saddr); /** Parses the PROXYv2 header from buf of size nread and writes the result into - * out. The rest of the buffer is moved to free bytes of the specified session's - * wire buffer. The function assumes that the PROXYv2 signature is present + * out. The function assumes that the PROXYv2 signature is present * and has been already checked by the caller (like `udp_recv` or `tcp_recv`). */ -ssize_t proxy_process_header(struct proxy_result *out, struct session *s, +ssize_t proxy_process_header(struct proxy_result *out, const void *buf, ssize_t nread); diff --git a/daemon/session.c b/daemon/session.c index 97256be24..2f7ce6012 100644 --- a/daemon/session.c +++ b/daemon/session.c @@ -767,7 +767,7 @@ void session_unpoison(struct session *session) kr_asan_unpoison(session, sizeof(*session)); } -int session_wirebuf_process(struct session *session, struct io_comm_data *comm) +int session_wirebuf_process(struct session *session, struct comm_info *comm) { int ret = 0; if (session->wire_buf_start_idx == session->wire_buf_end_idx) diff --git a/daemon/session.h b/daemon/session.h index eccf45b5f..66e86fbd1 100644 --- a/daemon/session.h +++ b/daemon/session.h @@ -13,7 +13,7 @@ struct qr_task; struct worker_ctx; struct session; -struct io_comm_data; +struct comm_info; struct proxy_result; struct session_flags { @@ -140,7 +140,7 @@ size_t session_wirebuf_get_free_size(struct session *session); void session_wirebuf_discard(struct session *session); /** Move all data to the beginning of the buffer. */ void session_wirebuf_compress(struct session *session); -int session_wirebuf_process(struct session *session, struct io_comm_data *comm); +int session_wirebuf_process(struct session *session, struct comm_info *comm); ssize_t session_wirebuf_consume(struct session *session, const uint8_t *data, ssize_t len); /** Trims `len` bytes from the start of the session's wire buffer. diff --git a/daemon/session2.c b/daemon/session2.c index 0dcb0134b..49dcecbda 100644 --- a/daemon/session2.c +++ b/daemon/session2.c @@ -7,21 +7,24 @@ #include "lib/log.h" #include "lib/utils.h" +#include "daemon/io.h" +#include "daemon/worker.h" #include "daemon/session2.h" -typedef void (*session2_push_cb)(struct session2 *s, int status, - void *target, void *baton); - static int session2_transport_pushv(struct session2 *s, const struct iovec *iov, int iovcnt, - void *target, - session2_push_cb cb, void *baton); + const void *target, + protolayer_finished_cb cb, void *baton); static inline int session2_transport_push(struct session2 *s, char *buf, size_t buf_len, - void *target, - session2_push_cb cb, void *baton); + const void *target, + protolayer_finished_cb cb, void *baton); +static int session2_transport_event(struct session2 *s, + struct protolayer_event event, + const void *target, + protolayer_finished_cb cb, void *baton); struct protolayer_globals protolayer_globals[PROTOLAYER_PROTOCOL_COUNT] = {0}; @@ -55,17 +58,53 @@ enum protolayer_protocol protolayer_grp_doh[] = { enum protolayer_protocol *protolayer_grps[PROTOLAYER_GRP_COUNT] = { -#define XX(id, name, desc) [PROTOLAYER_GRP_##id] = protolayer_grp_##name, +#define XX(cid, vid, name) [PROTOLAYER_GRP_##cid] = protolayer_grp_##vid, PROTOLAYER_GRP_MAP(XX) #undef XX }; -char *protolayer_grp_descs[PROTOLAYER_GRP_COUNT] = { -#define XX(id, name, desc) [PROTOLAYER_GRP_##id] = desc, +char *protolayer_grp_names[PROTOLAYER_GRP_COUNT] = { + [PROTOLAYER_GRP_NULL] = "(null)", +#define XX(cid, vid, name) [PROTOLAYER_GRP_##cid] = name, PROTOLAYER_GRP_MAP(XX) #undef XX }; +char *protolayer_event_names[PROTOLAYER_EVENT_COUNT] = { + [PROTOLAYER_EVENT_NULL] = "(null)", +#define XX(cid) [PROTOLAYER_EVENT_##cid] = #cid, + PROTOLAYER_EVENT_MAP(XX) +#undef XX +}; + +char *protolayer_payload_names[PROTOLAYER_PAYLOAD_COUNT] = { + [PROTOLAYER_PAYLOAD_NULL] = "(null)", +#define XX(cid, name) [PROTOLAYER_PAYLOAD_##cid] = name, + PROTOLAYER_PAYLOAD_MAP(XX) +#undef XX +}; + + +struct protolayer_payload protolayer_as_buffer(const struct protolayer_payload *payload) +{ + if (payload->type == PROTOLAYER_PAYLOAD_BUFFER) + return *payload; + + if (payload->type == PROTOLAYER_PAYLOAD_WIRE_BUF) + return (struct protolayer_payload){ + .type = PROTOLAYER_PAYLOAD_BUFFER, + .buffer = { + .buf = wire_buf_data(payload->wire_buf), + .len = wire_buf_data_length(payload->wire_buf) + } + }; + + kr_assert(false && "Unsupported payload type."); + return (struct protolayer_payload){ + .type = PROTOLAYER_PAYLOAD_NULL + }; +} + /** Gets context for the layer with the specified index from the manager. */ static inline struct protolayer_data *protolayer_manager_get( @@ -79,6 +118,14 @@ static inline struct protolayer_data *protolayer_manager_get( return (struct protolayer_data *)(pl_data_beg + offsets[layer_ix]); } +static inline bool protolayer_cb_ctx_is_last(struct protolayer_cb_ctx *ctx) +{ + unsigned int last_ix = (ctx->direction == PROTOLAYER_UNWRAP) + ? ctx->manager->num_layers - 1 + : 0; + return ctx->layer_ix == last_ix; +} + static inline void protolayer_cb_ctx_next(struct protolayer_cb_ctx *ctx) { if (ctx->direction == PROTOLAYER_UNWRAP) @@ -88,26 +135,43 @@ static inline void protolayer_cb_ctx_next(struct protolayer_cb_ctx *ctx) } static int protolayer_cb_ctx_finish(struct protolayer_cb_ctx *ctx, int ret, - bool reset_layers) + bool deinit_iter_data) { - if (reset_layers) { + struct session2 *session = ctx->manager->session; + + if (deinit_iter_data) { struct protolayer_manager *m = ctx->manager; struct protolayer_globals *globals = &protolayer_globals[m->grp]; for (size_t i = 0; i < m->num_layers; i++) { struct protolayer_data *d = protolayer_manager_get(m, i); - if (globals->reset) - globals->reset(m, d); + if (globals->iter_deinit) + globals->iter_deinit(m, d); } + m->iter_data_inited = false; } + if (ret) + kr_log_debug(PROTOLAYER, "layer context of group '%s' ended with return code %d\n", + protolayer_grp_names[ctx->manager->grp], ret); + if (ctx->status) - kr_log_debug(PROTOLAYER, "layer iteration of group '%s' ended with status %d", - protolayer_grp_descs[ctx->manager->grp], ret); + kr_log_debug(PROTOLAYER, "layer %u iteration of group '%s' ended with status %d\n", + ctx->layer_ix, protolayer_grp_names[ctx->manager->grp], ctx->status); if (ctx->finished_cb) - ctx->finished_cb(ret, ctx->finished_cb_target, + ctx->finished_cb(ret, session, ctx->finished_cb_target, ctx->finished_cb_baton); + + /* events bounce back from unwrap to wrap */ + bool bounce_back = (ctx->direction == PROTOLAYER_UNWRAP + && ret == PROTOLAYER_RET_NORMAL + && !ctx->status + && ctx->payload.type == PROTOLAYER_PAYLOAD_EVENT); + if (bounce_back) + session2_wrap(session, ctx->payload, NULL, NULL, NULL); + free(ctx); + return ret; } @@ -129,31 +193,42 @@ static int protolayer_step(struct protolayer_cb_ctx *ctx) enum protolayer_protocol protocol = ldata->protocol; struct protolayer_globals *globals = &protolayer_globals[protocol]; + enum protolayer_cb_result result = PROTOLAYER_CB_RESULT_MAGIC; if (!ldata->processed) { /* Avoid repetition */ ctx->async_mode = false; ctx->status = 0; - ctx->result = PROTOLAYER_CB_NULL; + ctx->action = PROTOLAYER_CB_ACTION_NULL; protolayer_cb cb = (ctx->direction == PROTOLAYER_UNWRAP) ? globals->unwrap : globals->wrap; - cb(ldata, ctx); + if (cb) + result = cb(ldata, ctx); + else + ctx->action = PROTOLAYER_CB_ACTION_CONTINUE; ldata->processed = true; + } else { + kr_assert(false && "Repeated protocol layer step"); } - if (!ctx->result) { + if (kr_fails_assert(result == PROTOLAYER_CB_RESULT_MAGIC)) { + /* Callback did not use a continuation function to return. */ + return kr_error(EINVAL); + } + + if (!ctx->action) { + /* Next step is from a callback */ ctx->async_mode = true; - return PROTOLAYER_RET_ASYNC; /* Next step is callback */ + return PROTOLAYER_RET_ASYNC; } - if (ctx->result == PROTOLAYER_CB_WAIT) { + if (ctx->action == PROTOLAYER_CB_ACTION_WAIT) { kr_assert(ctx->status == 0); return protolayer_cb_ctx_finish( ctx, PROTOLAYER_RET_WAITING, false); } - if (ctx->result == PROTOLAYER_CB_BREAK) { - kr_assert(ctx->status <= 0); + if (ctx->action == PROTOLAYER_CB_ACTION_BREAK) { return protolayer_cb_ctx_finish( ctx, PROTOLAYER_RET_NORMAL, true); } @@ -164,13 +239,17 @@ static int protolayer_step(struct protolayer_cb_ctx *ctx) ctx, kr_error(ECANCELED), true); } - if (ctx->result == PROTOLAYER_CB_CONTINUE) { + if (ctx->action == PROTOLAYER_CB_ACTION_CONTINUE) { + if (protolayer_cb_ctx_is_last(ctx)) + return protolayer_cb_ctx_finish( + ctx, PROTOLAYER_RET_NORMAL, true); + protolayer_cb_ctx_next(ctx); continue; } /* Should never get here */ - kr_assert(false); + kr_assert(false && "Invalid layer callback action"); return protolayer_cb_ctx_finish(ctx, kr_error(EINVAL), true); } } @@ -185,7 +264,7 @@ static int protolayer_step(struct protolayer_cb_ctx *ctx) static int protolayer_manager_submit( struct protolayer_manager *manager, enum protolayer_direction direction, - char *buf, size_t buf_len, void *target, + struct protolayer_payload payload, const void *target, protolayer_finished_cb cb, void *baton) { size_t layer_ix = (direction == PROTOLAYER_UNWRAP) @@ -194,8 +273,31 @@ static int protolayer_manager_submit( struct protolayer_cb_ctx *ctx = malloc(sizeof(*ctx)); // TODO - mempool? kr_require(ctx); + if (kr_log_is_debug(PROTOLAYER, NULL)) { + const char *sess_dir = manager->session->outgoing ? "out" : "in"; + const char *event_name = (payload.type == PROTOLAYER_PAYLOAD_EVENT) + ? protolayer_event_names[payload.event.type] + : ""; + const char *event_space = (payload.type == PROTOLAYER_PAYLOAD_EVENT) ? " " : ""; + kr_log_debug(PROTOLAYER, "[%s] %s%s%s submitted to grp '%s' in %s direction\n", + sess_dir, + protolayer_payload_names[payload.type], + event_space, event_name, + protolayer_grp_names[manager->grp], + (direction == PROTOLAYER_UNWRAP) ? "unwrap" : "wrap"); + } + + for (size_t i = 0; i < manager->num_layers; i++) { + struct protolayer_data *data = protolayer_manager_get(manager, i); + data->processed = false; + struct protolayer_globals *globals = &protolayer_globals[data->protocol]; + if (globals->iter_init) + globals->iter_init(manager, data); + } + *ctx = (struct protolayer_cb_ctx) { - .data = { .target = target }, + .payload = payload, + .target = target, .direction = direction, .layer_ix = layer_ix, .manager = manager, @@ -203,7 +305,6 @@ static int protolayer_manager_submit( .finished_cb_target = target, .finished_cb_baton = baton }; - protolayer_set_buffer(ctx, buf, buf_len); return protolayer_step(ctx); } @@ -234,13 +335,17 @@ struct protolayer_manager *protolayer_manager_new(struct session2 *s, size_t total_data_size = 0; for (size_t i = 0; i < num_layers; i++) { offsets[i] = total_data_size; - size_t d = protolayer_globals[protocols[i]].data_size; - size += ALIGN_TO(d, CPU_STRUCT_ALIGN); + total_data_size += ALIGN_TO(sizeof(struct protolayer_data), + CPU_STRUCT_ALIGN); + total_data_size += ALIGN_TO(protolayer_globals[protocols[i]].sess_size, + CPU_STRUCT_ALIGN); + total_data_size += ALIGN_TO(protolayer_globals[protocols[i]].iter_size, + CPU_STRUCT_ALIGN); } size += total_data_size; /* Allocate and initialize manager */ - struct protolayer_manager *m = malloc(size); + struct protolayer_manager *m = calloc(1, size); kr_require(m); m->grp = grp; m->session = s; @@ -252,8 +357,9 @@ struct protolayer_manager *protolayer_manager_new(struct session2 *s, struct protolayer_globals *globals = &protolayer_globals[protocols[i]]; struct protolayer_data *data = protolayer_manager_get(m, i); data->protocol = protocols[i]; - data->size = globals->data_size; - globals->init(m, data); + data->sess_size = ALIGN_TO(globals->sess_size, CPU_STRUCT_ALIGN); + if (globals->sess_init) + globals->sess_init(m, data); } return m; @@ -265,128 +371,542 @@ void protolayer_manager_free(struct protolayer_manager *m) for (size_t i = 0; i < m->num_layers; i++) { struct protolayer_data *data = protolayer_manager_get(m, i); - protolayer_globals[data->protocol].deinit(m, data); + struct protolayer_globals *globals = &protolayer_globals[data->protocol]; + if (globals->sess_deinit) + globals->sess_deinit(m, data); } free(m); } -void protolayer_continue(struct protolayer_cb_ctx *ctx) +enum protolayer_cb_result protolayer_continue(struct protolayer_cb_ctx *ctx) { if (ctx->async_mode) { protolayer_cb_ctx_next(ctx); protolayer_step(ctx); } else { - ctx->result = PROTOLAYER_CB_CONTINUE; + ctx->action = PROTOLAYER_CB_ACTION_CONTINUE; } + return PROTOLAYER_CB_RESULT_MAGIC; } -void protolayer_wait(struct protolayer_cb_ctx *ctx) +enum protolayer_cb_result protolayer_wait(struct protolayer_cb_ctx *ctx) { if (ctx->async_mode) { protolayer_cb_ctx_finish(ctx, PROTOLAYER_RET_WAITING, false); } else { - ctx->result = PROTOLAYER_CB_WAIT; + ctx->action = PROTOLAYER_CB_ACTION_WAIT; } + return PROTOLAYER_CB_RESULT_MAGIC; } -void protolayer_break(struct protolayer_cb_ctx *ctx, int status) +enum protolayer_cb_result protolayer_break(struct protolayer_cb_ctx *ctx, int status) { ctx->status = status; if (ctx->async_mode) { protolayer_cb_ctx_finish(ctx, PROTOLAYER_RET_NORMAL, true); } else { - ctx->result = PROTOLAYER_CB_BREAK; + ctx->action = PROTOLAYER_CB_ACTION_BREAK; } + return PROTOLAYER_CB_RESULT_MAGIC; } -static void protolayer_push_finished(struct session2 *s, int status, void *target, void *baton) +static void protolayer_push_finished(int status, struct session2 *s, const void *target, void *baton) { - protolayer_break(baton, status); + struct protolayer_cb_ctx *ctx = baton; + if (ctx->converted_wire_buf) { + wire_buf_reset(ctx->converted_wire_buf); + ctx->converted_wire_buf = NULL; + } + protolayer_break(ctx, status); } -void protolayer_pushv(struct protolayer_cb_ctx *ctx, - struct iovec *iov, int iovcnt, - void *target) +enum protolayer_cb_result protolayer_push(struct protolayer_cb_ctx *ctx) { - int ret = session2_transport_pushv(ctx->manager->session, iov, iovcnt, - target, protolayer_push_finished, ctx); + int ret; + struct session2 *session = ctx->manager->session; + + if (ctx->payload.type == PROTOLAYER_PAYLOAD_WIRE_BUF) { + ctx->converted_wire_buf = ctx->payload.wire_buf; + ctx->payload = protolayer_as_buffer(&ctx->payload); + } + + if (kr_log_is_debug(PROTOLAYER, NULL)) { + kr_log_debug(PROTOLAYER, "Pushing %s\n", + protolayer_payload_names[ctx->payload.type]); + } + + if (ctx->payload.type == PROTOLAYER_PAYLOAD_BUFFER) { + ret = session2_transport_push(session, + ctx->payload.buffer.buf, ctx->payload.buffer.len, + ctx->target, protolayer_push_finished, ctx); + } else if (ctx->payload.type == PROTOLAYER_PAYLOAD_IOVEC) { + ret = session2_transport_pushv(session, + ctx->payload.iovec.iov, ctx->payload.iovec.cnt, + ctx->target, protolayer_push_finished, ctx); + } else if (ctx->payload.type == PROTOLAYER_PAYLOAD_EVENT) { + ret = session2_transport_event(session, + ctx->payload.event, + ctx->target, protolayer_push_finished, ctx); + } else { + kr_assert(false && "Invalid payload type"); + ret = kr_error(EINVAL); + } + + /* Push error - otherwise the callback will be called by a push + * function called above. */ if (ret && ctx->finished_cb) - ctx->finished_cb(ret, ctx->finished_cb_target, + ctx->finished_cb(ret, session, ctx->finished_cb_target, ctx->finished_cb_baton); + + return PROTOLAYER_CB_RESULT_MAGIC; } -void protolayer_push(struct protolayer_cb_ctx *ctx, char *buf, size_t buf_len, - void *target) + +int wire_buf_init(struct wire_buf *wb, size_t initial_size) { - int ret = session2_transport_push(ctx->manager->session, buf, buf_len, - target, protolayer_push_finished, ctx); - if (ret && ctx->finished_cb) - ctx->finished_cb(ret, ctx->finished_cb_target, - ctx->finished_cb_baton); + char *buf = malloc(initial_size); + kr_require(buf); + + *wb = (struct wire_buf){ + .buf = buf, + .size = initial_size + }; + + return kr_ok(); +} + +void wire_buf_deinit(struct wire_buf *wb) +{ + free(wb->buf); +} + +int wire_buf_reserve(struct wire_buf *wb, size_t size) +{ + if (wb->buf && wb->size >= size) + return kr_ok(); + + wb->buf = realloc(wb->buf, size); + kr_require(wb->buf); + wb->size = size; + return kr_ok(); +} + +int wire_buf_consume(struct wire_buf *wb, size_t length) +{ + size_t ne = wb->end + length; + if (kr_fails_assert(wb->buf && ne <= wb->size)) + return kr_error(EINVAL); + + wb->end = ne; + return kr_ok(); +} + +int wire_buf_trim(struct wire_buf *wb, size_t length) +{ + size_t ns = wb->start + length; + if (kr_fails_assert(ns <= wb->end)) + return kr_error(EINVAL); + + wb->start = ns; + return kr_ok(); +} + +int wire_buf_movestart(struct wire_buf *wb) +{ + if (kr_fails_assert(wb->buf)) + return kr_error(EINVAL); + if (wb->start == 0) + return kr_ok(); + + size_t len = wire_buf_data_length(wb); + if (len) + memmove(wb->buf, wire_buf_data(wb), len); + wb->end -= wb->start; + wb->start = 0; + return kr_ok(); +} + +int wire_buf_reset(struct wire_buf *wb) +{ + wb->start = 0; + wb->end = 0; + wb->error = false; + return kr_ok(); } struct session2 *session2_new(enum session2_transport_type transport_type, - void *transport_ctx, enum protolayer_grp layer_grp, bool outgoing) { - kr_require(transport_type && transport_ctx && layer_grp); + kr_require(transport_type && layer_grp); struct session2 *s = malloc(sizeof(*s)); kr_require(s); - s->transport.type = transport_type; - s->transport.ctx = transport_ctx; - - s->layers = protolayer_manager_new(s, layer_grp); - if (!s->layers) { + struct protolayer_manager *layers = protolayer_manager_new(s, layer_grp); + if (!layers) { free(s); return NULL; } - s->outgoing = outgoing; + *s = (struct session2) { + .transport = { + .type = transport_type, + }, + .layers = layers, + .outgoing = outgoing, + .tasks = trie_create(NULL), + }; + + mm_ctx_mempool(&s->pool, 4 * CPU_PAGE_SIZE); + queue_init(s->waiting); + + int ret = wire_buf_init(&s->wire_buf, KNOT_WIRE_MAX_PKTSIZE); + kr_require(!ret); + + ret = uv_timer_init(uv_default_loop(), &s->timer); + kr_require(!ret); + s->timer.data = s; + + session2_touch(s); return s; } -void session2_free(struct session2 *s) +static void session2_timer_on_close(uv_handle_t *handle) { + struct session2 *s = handle->data; protolayer_manager_free(s->layers); free(s); } -int session2_unwrap(struct session2 *s, char *buf, size_t buf_len, void *target, - protolayer_finished_cb cb, void *baton) +void session2_free(struct session2 *s) +{ + trie_free(s->tasks); + queue_deinit(s->waiting); + uv_close((uv_handle_t *)&s->timer, session2_timer_on_close); +} + +int session2_start_read(struct session2 *session) +{ + if (session->transport.type == SESSION2_TRANSPORT_IO) + return io_start_read(session->transport.io.handle); + + /* TODO - probably just some event for this */ + kr_assert(false && "Parent start_read unsupported"); + return kr_error(EINVAL); +} + +int session2_stop_read(struct session2 *session) +{ + if (session->transport.type == SESSION2_TRANSPORT_IO) + return io_stop_read(session->transport.io.handle); + + /* TODO - probably just some event for this */ + kr_assert(false && "Parent stop_read unsupported"); + return kr_error(EINVAL); +} + +struct sockaddr *session2_get_peer(struct session2 *s) +{ + while (s && s->transport.type == SESSION2_TRANSPORT_PARENT) + s = s->transport.parent; + + return (s && s->transport.type == SESSION2_TRANSPORT_IO) + ? &s->transport.io.peer.ip + : NULL; +} + +struct sockaddr *session2_get_sockname(struct session2 *s) +{ + while (s && s->transport.type == SESSION2_TRANSPORT_PARENT) + s = s->transport.parent; + + return (s && s->transport.type == SESSION2_TRANSPORT_IO) + ? &s->transport.io.sockname.ip + : NULL; +} + +uv_handle_t *session2_get_handle(struct session2 *s) +{ + while (s && s->transport.type == SESSION2_TRANSPORT_PARENT) + s = s->transport.parent; + + return (s && s->transport.type == SESSION2_TRANSPORT_IO) + ? s->transport.io.handle + : NULL; +} + +static void session2_on_timeout(uv_timer_t *timer) +{ + struct session2 *s = timer->data; + protolayer_manager_submit(s->layers, s->timer_direction, + protolayer_event_nd(PROTOLAYER_EVENT_TIMEOUT), + NULL, NULL, NULL); +} + +int session2_timer_start(struct session2 *s, uint64_t timeout, uint64_t repeat, + enum protolayer_direction direction) +{ + s->timer_direction = direction; + return uv_timer_start(&s->timer, session2_on_timeout, timeout, repeat); +} + +int session2_timer_restart(struct session2 *s) +{ + return uv_timer_again(&s->timer); +} + +int session2_timer_stop(struct session2 *s) +{ + return uv_timer_stop(&s->timer); +} + +int session2_tasklist_add(struct session2 *session, struct qr_task *task) +{ + trie_t *t = session->tasks; + uint16_t task_msg_id = 0; + const char *key = NULL; + size_t key_len = 0; + if (session->outgoing) { + knot_pkt_t *pktbuf = worker_task_get_pktbuf(task); + task_msg_id = knot_wire_get_id(pktbuf->wire); + key = (const char *)&task_msg_id; + key_len = sizeof(task_msg_id); + } else { + key = (const char *)&task; + key_len = sizeof(char *); + } + trie_val_t *v = trie_get_ins(t, key, key_len); + if (kr_fails_assert(v)) + return kr_error(ENOMEM); + if (*v == NULL) { + *v = task; + worker_task_ref(task); + } else if (kr_fails_assert(*v == task)) { + return kr_error(EINVAL); + } + return kr_ok(); +} + +int session2_tasklist_del(struct session2 *session, struct qr_task *task) +{ + trie_t *t = session->tasks; + uint16_t task_msg_id = 0; + const char *key = NULL; + size_t key_len = 0; + trie_val_t val; + if (session->outgoing) { + knot_pkt_t *pktbuf = worker_task_get_pktbuf(task); + task_msg_id = knot_wire_get_id(pktbuf->wire); + key = (const char *)&task_msg_id; + key_len = sizeof(task_msg_id); + } else { + key = (const char *)&task; + key_len = sizeof(char *); + } + int ret = trie_del(t, key, key_len, &val); + if (ret == KNOT_EOK) { + kr_require(val == task); + worker_task_unref(val); + } + return ret; +} + +struct qr_task *session2_tasklist_get_first(struct session2 *session) +{ + trie_val_t *val = trie_get_first(session->tasks, NULL, NULL); + return val ? (struct qr_task *) *val : NULL; +} + +struct qr_task *session2_tasklist_del_first(struct session2 *session, bool deref) +{ + trie_val_t val = NULL; + int res = trie_del_first(session->tasks, NULL, NULL, &val); + if (res != KNOT_EOK) { + val = NULL; + } else if (deref) { + worker_task_unref(val); + } + return (struct qr_task *)val; +} + +struct qr_task *session2_tasklist_find_msgid(const struct session2 *session, uint16_t msg_id) +{ + if (kr_fails_assert(session->outgoing)) + return NULL; + trie_t *t = session->tasks; + struct qr_task *ret = NULL; + const char *key = (const char *)&msg_id; + size_t key_len = sizeof(msg_id); + trie_val_t val; + int res = trie_del(t, key, key_len, &val); + if (res == KNOT_EOK) { + if (worker_task_numrefs(val) > 1) { + ret = val; + } + worker_task_unref(val); + } + return ret; +} + +struct qr_task *session2_tasklist_del_msgid(const struct session2 *session, uint16_t msg_id) +{ + if (kr_fails_assert(session->outgoing)) + return NULL; + trie_t *t = session->tasks; + struct qr_task *ret = NULL; + trie_val_t *val = trie_get_try(t, (char *)&msg_id, sizeof(msg_id)); + if (val) { + ret = *val; + } + return ret; +} + +void session2_tasklist_finalize(struct session2 *session, int status) +{ + while (session2_tasklist_get_len(session) > 0) { + struct qr_task *t = session2_tasklist_del_first(session, false); + kr_require(worker_task_numrefs(t) > 0); + worker_task_finalize(t, status); + worker_task_unref(t); + } +} + +int session2_tasklist_finalize_expired(struct session2 *session) +{ + int ret = 0; + queue_t(struct qr_task *) q; + uint64_t now = kr_now(); + trie_t *t = session->tasks; + trie_it_t *it; + queue_init(q); + for (it = trie_it_begin(t); !trie_it_finished(it); trie_it_next(it)) { + trie_val_t *v = trie_it_val(it); + struct qr_task *task = (struct qr_task *)*v; + if ((now - worker_task_creation_time(task)) >= KR_RESOLVE_TIME_LIMIT) { + struct kr_request *req = worker_task_request(task); + if (!kr_fails_assert(req)) + kr_query_inform_timeout(req, req->current_query); + queue_push(q, task); + worker_task_ref(task); + } + } + trie_it_free(it); + + struct qr_task *task = NULL; + uint16_t msg_id = 0; + char *key = (char *)&task; + int32_t keylen = sizeof(struct qr_task *); + if (session->outgoing) { + key = (char *)&msg_id; + keylen = sizeof(msg_id); + } + while (queue_len(q) > 0) { + task = queue_head(q); + if (session->outgoing) { + knot_pkt_t *pktbuf = worker_task_get_pktbuf(task); + msg_id = knot_wire_get_id(pktbuf->wire); + } + int res = trie_del(t, key, keylen, NULL); + if (!worker_task_finished(task)) { + /* task->pending_count must be zero, + * but there are can be followers, + * so run worker_task_subreq_finalize() to ensure retrying + * for all the followers. */ + worker_task_subreq_finalize(task); + worker_task_finalize(task, KR_STATE_FAIL); + } + if (res == KNOT_EOK) { + worker_task_unref(task); + } + queue_pop(q); + worker_task_unref(task); + ++ret; + } + + queue_deinit(q); + return ret; +} + +int session2_waitinglist_push(struct session2 *session, struct qr_task *task) +{ + queue_push(session->waiting, task); + worker_task_ref(task); + return kr_ok(); +} + +struct qr_task *session2_waitinglist_get(const struct session2 *session) +{ + return (queue_len(session->waiting) > 0) ? (queue_head(session->waiting)) : NULL; +} + +struct qr_task *session2_waitinglist_pop(struct session2 *session, bool deref) +{ + struct qr_task *t = session2_waitinglist_get(session); + queue_pop(session->waiting); + if (deref) { + worker_task_unref(t); + } + return t; +} + +void session2_waitinglist_retry(struct session2 *session, bool increase_timeout_cnt) +{ + while (!session2_waitinglist_is_empty(session)) { + struct qr_task *task = session2_waitinglist_pop(session, false); + if (increase_timeout_cnt) { + worker_task_timeout_inc(task); + } + worker_task_step(task, session2_get_peer(session), NULL); + worker_task_unref(task); + } +} + +void session2_waitinglist_finalize(struct session2 *session, int status) +{ + while (!session2_waitinglist_is_empty(session)) { + struct qr_task *t = session2_waitinglist_pop(session, false); + worker_task_finalize(t, status); + worker_task_unref(t); + } +} + +int session2_unwrap(struct session2 *s, struct protolayer_payload payload, + const void *target, protolayer_finished_cb cb, void *baton) { return protolayer_manager_submit(s->layers, PROTOLAYER_UNWRAP, - buf, buf_len, target, cb, baton); + payload, target, cb, baton); } -int session2_wrap(struct session2 *s, char *buf, size_t buf_len, void *target, - protolayer_finished_cb cb, void *baton) +int session2_wrap(struct session2 *s, struct protolayer_payload payload, + const void *target, protolayer_finished_cb cb, void *baton) { return protolayer_manager_submit(s->layers, PROTOLAYER_WRAP, - buf, buf_len, target, cb, baton); + payload, target, cb, baton); } struct parent_pushv_ctx { struct session2 *session; - session2_push_cb cb; - void *target; + protolayer_finished_cb cb; + const void *target; void *baton; char *buf; size_t buf_len; }; -static void session2_transport_parent_pushv_finished(int status, void *target, void *baton) +static void session2_transport_parent_pushv_finished(int status, + struct session2 *session, + const void *target, + void *baton) { struct parent_pushv_ctx *ctx = baton; if (ctx->cb) - ctx->cb(ctx->session, status, target, ctx->baton); + ctx->cb(status, ctx->session, target, ctx->baton); free(ctx->buf); free(ctx); } @@ -395,7 +915,7 @@ static void session2_transport_udp_pushv_finished(uv_udp_send_t *req, int status { struct parent_pushv_ctx *ctx = req->data; if (ctx->cb) - ctx->cb(ctx->session, status, ctx->target, ctx->baton); + ctx->cb(status, ctx->session, ctx->target, ctx->baton); free(ctx->buf); free(ctx); free(req); @@ -405,7 +925,7 @@ static void session2_transport_stream_pushv_finished(uv_write_t *req, int status { struct parent_pushv_ctx *ctx = req->data; if (ctx->cb) - ctx->cb(ctx->session, status, ctx->target, ctx->baton); + ctx->cb(status, ctx->session, ctx->target, ctx->baton); free(ctx->buf); free(ctx); free(req); @@ -448,15 +968,15 @@ static int concat_iovs(const struct iovec *iov, int iovcnt, char **buf, size_t * static int session2_transport_pushv(struct session2 *s, const struct iovec *iov, int iovcnt, - void *target, - session2_push_cb cb, void *baton) + const void *target, + protolayer_finished_cb cb, void *baton) { if (kr_fails_assert(s)) return kr_error(EINVAL); struct parent_pushv_ctx *ctx = malloc(sizeof(*ctx)); kr_require(ctx); - *ctx = (struct parent_pushv_ctx) { + *ctx = (struct parent_pushv_ctx){ .session = s, .cb = cb, .baton = baton, @@ -464,8 +984,8 @@ static int session2_transport_pushv(struct session2 *s, }; switch (s->transport.type) { - case SESSION2_TRANSPORT_HANDLE:; - uv_handle_t *handle = s->transport.handle; + case SESSION2_TRANSPORT_IO:; + uv_handle_t *handle = s->transport.io.handle; if (kr_fails_assert(handle)) { free(ctx); return kr_error(EINVAL); @@ -484,12 +1004,12 @@ static int session2_transport_pushv(struct session2 *s, uv_write(req, (uv_stream_t *)handle, (uv_buf_t *)iov, iovcnt, session2_transport_stream_pushv_finished); return kr_ok(); + } else { + kr_assert(false && "Unsupported handle"); + free(ctx); + return kr_error(EINVAL); } - kr_assert(false && "Unsupported handle"); - free(ctx); - return kr_error(EINVAL); - case SESSION2_TRANSPORT_PARENT:; struct session2 *parent = s->transport.parent; if (kr_fails_assert(parent)) { @@ -501,8 +1021,9 @@ static int session2_transport_pushv(struct session2 *s, free(ctx); return ret; } - session2_wrap(parent, ctx->buf, ctx->buf_len, target, - session2_transport_parent_pushv_finished, ctx); + session2_wrap(parent, protolayer_buffer(ctx->buf, ctx->buf_len), + target, session2_transport_parent_pushv_finished, + ctx); return kr_ok(); default: @@ -514,28 +1035,29 @@ static int session2_transport_pushv(struct session2 *s, struct push_ctx { struct iovec iov; - session2_push_cb cb; + protolayer_finished_cb cb; void *baton; }; -static void session2_transport_single_push_finished(struct session2 *s, - int status, - void *target, void *baton) +static void session2_transport_single_push_finished(int status, + struct session2 *s, + const void *target, + void *baton) { struct push_ctx *ctx = baton; if (ctx->cb) - ctx->cb(s, status, target, ctx->baton); + ctx->cb(status, s, target, ctx->baton); free(ctx); } static inline int session2_transport_push(struct session2 *s, char *buf, size_t buf_len, - void *target, - session2_push_cb cb, void *baton) + const void *target, + protolayer_finished_cb cb, void *baton) { struct push_ctx *ctx = malloc(sizeof(*ctx)); kr_require(ctx); - *ctx = (struct push_ctx) { + *ctx = (struct push_ctx){ .iov = { .iov_base = buf, .iov_len = buf_len @@ -547,3 +1069,107 @@ static inline int session2_transport_push(struct session2 *s, return session2_transport_pushv(s, &ctx->iov, 1, target, session2_transport_single_push_finished, ctx); } + +struct event_ctx { + struct session2 *session; + protolayer_finished_cb cb; + void *baton; + const void *target; +}; + +static void session2_transport_io_event_finished(uv_handle_t *handle) +{ + struct event_ctx *ctx = handle->data; + if (ctx->cb) + ctx->cb(kr_ok(), ctx->session, ctx->target, ctx->baton); + free(ctx); +} + +static void session2_transport_parent_event_finished(int status, + struct session2 *session, + const void *target, + void *baton) +{ + struct event_ctx *ctx = baton; + if (ctx->cb) + ctx->cb(status, ctx->session, target, ctx->baton); + free(ctx); +} + +static int session2_handle_close(struct session2 *s, uv_handle_t *handle, + struct event_ctx *ctx) +{ + io_stop_read(handle); + handle->data = ctx; + uv_close(handle, session2_transport_io_event_finished); + + return kr_ok(); +} + +static int session2_transport_event(struct session2 *s, + struct protolayer_event event, + const void *target, + protolayer_finished_cb cb, void *baton) +{ + if (s->closing) { + if (cb) + cb(kr_error(ESTALE), s, target, baton); + return kr_ok(); + } + + bool is_close_event = (event.type == PROTOLAYER_EVENT_CLOSE || + event.type == PROTOLAYER_EVENT_FORCE_CLOSE); + if (is_close_event) { + kr_require(session2_is_empty(s)); + session2_timer_stop(s); + s->closing = true; + } + + struct event_ctx *ctx = malloc(sizeof(*ctx)); + kr_require(ctx); + *ctx = (struct event_ctx){ + .session = s, + .cb = cb, + .baton = baton, + .target = target + }; + + switch (s->transport.type) { + case SESSION2_TRANSPORT_IO:; + uv_handle_t *handle = s->transport.io.handle; + if (kr_fails_assert(handle)) { + free(ctx); + return kr_error(EINVAL); + } + + if (is_close_event) + return session2_handle_close(s, handle, ctx); + + return kr_ok(); + + case SESSION2_TRANSPORT_PARENT: + session2_wrap(s, protolayer_event(event), target, + session2_transport_parent_event_finished, ctx); + return kr_ok(); + + default: + kr_assert(false && "Invalid transport"); + free(ctx); + return kr_error(EINVAL); + } +} + +void session2_kill_ioreq(struct session2 *session, struct qr_task *task) +{ + if (!session || session->closing) + return; + if (kr_fails_assert(session->outgoing + && session->transport.type == SESSION2_TRANSPORT_IO + && session->transport.io.handle)) + return; + session2_tasklist_del(session, task); + if (session->transport.io.handle->type == UV_UDP) + session2_unwrap(session, + protolayer_event_nd(PROTOLAYER_EVENT_CLOSE), + NULL, NULL, NULL); +} diff --git a/daemon/session2.h b/daemon/session2.h index 8cf1c3439..efb40897a 100644 --- a/daemon/session2.h +++ b/daemon/session2.h @@ -9,16 +9,42 @@ #include #include "contrib/mempattern.h" +#include "lib/generic/queue.h" +#include "lib/generic/trie.h" +#include "lib/utils.h" /* Forward declarations */ struct session2; struct protolayer_cb_ctx; +/** Information about the transport - addresses and proxy. */ +struct comm_info { + /** The original address the data came from. May be that of a proxied + * client, if they came through a proxy. May be `NULL` if + * the communication did not come from network. */ + const struct sockaddr *src_addr; + + /** The actual address the resolver is communicating with. May be + * the address of a proxy if the communication came through one, + * otherwise it will be the same as `src_addr`. May be `NULL` if + * the communication did not come from network. */ + const struct sockaddr *comm_addr; + + /** The original destination address. May be the resolver's address, or + * the address of a proxy if the communication came through one. May be + * `NULL` if the communication did not come from network. */ + const struct sockaddr *dst_addr; + + /** Data parsed from a PROXY header. May be `NULL` if the communication + * did not come through a proxy, or if the PROXYv2 protocol was not used. */ + const struct proxy_result *proxy; +}; + /** Protocol types - individual implementations of protocol layers. */ enum protolayer_protocol { PROTOLAYER_NULL = 0, - PROTOLAYER_TCP, PROTOLAYER_UDP, + PROTOLAYER_TCP, PROTOLAYER_TLS, PROTOLAYER_HTTP, @@ -32,16 +58,24 @@ enum protolayer_protocol { PROTOLAYER_PROTOCOL_COUNT }; +/** Protocol layer groups. Each of these represents a sequence of layers in + * the unwrap direction. This macro is used to generate `enum protolayer_grp` + * and `protolayer_grp_descs[]`. + * + * Parameters are: + * 1. Constant name (for e.g. PROTOLAYER_GRP_* constants) + * 2. Variable name (for e.g. protolayer_grp_* arrays) + * 3. Human-readable name for logging */ #define PROTOLAYER_GRP_MAP(XX) \ XX(DOUDP, doudp, "DNS UDP") \ XX(DOTCP, dotcp, "DNS TCP") \ - XX(DOT, dot, "DNS-over-TLS") \ - XX(DOH, doh, "DNS-over-HTTPS") + XX(DOTLS, dot, "DNS-over-TLS") \ + XX(DOHTTPS, doh, "DNS-over-HTTPS") /** Pre-defined sequences of protocol layers. */ enum protolayer_grp { PROTOLAYER_GRP_NULL = 0, -#define XX(id, name, desc) PROTOLAYER_GRP_##id, +#define XX(cid, vid, name) PROTOLAYER_GRP_##cid, PROTOLAYER_GRP_MAP(XX) #undef XX PROTOLAYER_GRP_COUNT @@ -49,23 +83,29 @@ enum protolayer_grp { /** Maps protocol layer group IDs to human-readable descriptions. * E.g. PROTOLAYER_GRP_DOH has description 'DNS-over-HTTPS'. */ -extern char *protolayer_grp_descs[]; +extern char *protolayer_grp_names[]; /** Flow control indicators for protocol layer `wrap` and `unwrap` callbacks. - * Use with `protolayer_continue`, `protolayer_wait` and `protolayer_break` - * functions. */ -enum protolayer_cb_result { - PROTOLAYER_CB_NULL = 0, - - PROTOLAYER_CB_CONTINUE, - PROTOLAYER_CB_WAIT, - PROTOLAYER_CB_BREAK, - PROTOLAYER_CB_PUSH, + * Use via `protolayer_continue`, `protolayer_wait`, `protolayer_break`, and + * `protolayer_push` functions. */ +enum protolayer_cb_action { + PROTOLAYER_CB_ACTION_NULL = 0, + + PROTOLAYER_CB_ACTION_CONTINUE, + PROTOLAYER_CB_ACTION_WAIT, + PROTOLAYER_CB_ACTION_BREAK, }; +/** Direction of layer sequence processing. */ enum protolayer_direction { - PROTOLAYER_WRAP, + /** Processes buffers in order of layers as defined in the layer group. + * In this direction, protocol data should be removed from the buffer, + * parsing additional data provided by the protocol. */ PROTOLAYER_UNWRAP, + + /** Processes buffers in reverse order of layers as defined in the layer + * group. In this direction, protocol data should be added. */ + PROTOLAYER_WRAP, }; enum protolayer_ret { @@ -94,104 +134,215 @@ enum protolayer_ret { * function. * `baton` is the `baton` parameter passed to the * `session2_(un)wrap` function. */ -typedef void (*protolayer_finished_cb)(int status, void *target, void *baton); +typedef void (*protolayer_finished_cb)(int status, struct session2 *session, + const void *target, void *baton); + +#define PROTOLAYER_EVENT_MAP(XX) \ + XX(CLOSE) /**< Signal to gracefully close the session - + * i.e. layers add their standard disconnection + * ceremony (e.g. `gnutls_bye()`). */\ + XX(FORCE_CLOSE) /**< Signal to forcefully close the + * session - i.e. layers SHOULD NOT add + * any disconnection ceremony, if + * avoidable. */\ + XX(TIMEOUT) /**< Signal that the session has timed out. */ + +/** Event type, to be interpreted by a layer. */ +enum protolayer_event_type { + PROTOLAYER_EVENT_NULL = 0, +#define XX(cid) PROTOLAYER_EVENT_##cid, + PROTOLAYER_EVENT_MAP(XX) +#undef XX + PROTOLAYER_EVENT_COUNT +}; -enum protolayer_cb_data_type { - PROTOLAYER_CB_DATA_NULL = 0, - PROTOLAYER_CB_DATA_BUFFER, - PROTOLAYER_CB_DATA_IOVEC, +extern char *protolayer_event_names[]; + +/** Event, with optional auxiliary data. */ +struct protolayer_event { + enum protolayer_event_type type; + union { + void *ptr; + char raw[sizeof(void *)]; + } data; /**< Optional data supplied with the event. + * May be used by a layer. */ +}; + +#define PROTOLAYER_PAYLOAD_MAP(XX) \ + XX(BUFFER, "Buffer") \ + XX(IOVEC, "IOVec") \ + XX(EVENT, "Event") \ + XX(WIRE_BUF, "Wire buffer") + +/** Defines whether the data for a `struct protolayer_cb_ctx` is represented + * by a single buffer, an array of `struct iovec`, or an `enum protolayer_event`. */ +enum protolayer_payload_type { + PROTOLAYER_PAYLOAD_NULL = 0, +#define XX(cid, name) PROTOLAYER_PAYLOAD_##cid, + PROTOLAYER_PAYLOAD_MAP(XX) +#undef XX + PROTOLAYER_PAYLOAD_COUNT +}; + +extern char *protolayer_payload_names[]; + +/** Data processed by the sequence of layers. All pointed-to memory is always + * owned by its creator. It is also the layer (group) implementor's + * responsibility to keep data compatible in between layers. No payload memory + * is ever (de-)allocated by the protolayer manager! */ +struct protolayer_payload { + enum protolayer_payload_type type; + union { + /** Only valid if `type` is `_BUFFER`. */ + struct { + char *buf; + size_t len; + } buffer; + + /** Only valid if `type` is `_IOVEC`. */ + struct { + struct iovec *iov; + int cnt; + } iovec; + + /** Only valid if `type` is `_EVENT`. */ + struct protolayer_event event; + + /** Only valid if `type` is `_WIRE_BUF`. */ + struct wire_buf *wire_buf; + }; }; /** Context for protocol layer callbacks, containing buffer data and internal * information for protocol layer manager. */ struct protolayer_cb_ctx { /* read-write */ + /** The payload */ + struct protolayer_payload payload; + /** Transport information (e.g. UDP sender address). May be `NULL`. */ + const void *target; + /** Communication information. Typically written into by one of the + * first layers facilitating transport protocol processing. + * Zero-initialized in the beginning. */ + struct comm_info comm; - /** Data processed by the sequence of layers. All the data is always - * owned by its creator. It is also the layer (group) implementor's - * responsibility to keep data compatible in between layers. No data is - * ever (de-)allocated by the protolayer manager! */ - struct { - enum protolayer_cb_data_type type; - union { - /** Only valid if `type` is `_BUFFER`. */ - struct { - char *buf; - size_t len; - } buffer; - - /** Only valid if `type` is `_IOVEC`. */ - struct { - struct iovec *iov; - int cnt; - } iovec; - }; - /** Always valid; may be `NULL`. */ - void *target; - } data; + /* callback for when the layer iteration has ended - read-only */ + protolayer_finished_cb finished_cb; + const void *finished_cb_target; + void *finished_cb_baton; + struct wire_buf *converted_wire_buf; - /* internal manager information - private */ + /* internal information for the manager - private */ enum protolayer_direction direction; bool async_mode; unsigned int layer_ix; struct protolayer_manager *manager; int status; - enum protolayer_cb_result result; - - /* callback for when the layer iteration has ended - read-only */ - protolayer_finished_cb finished_cb; - void *finished_cb_target; - void *finished_cb_baton; + enum protolayer_cb_action action; }; -/** Convenience function to put a buffer pointer to the specified context. */ -static inline void protolayer_set_buffer(struct protolayer_cb_ctx *ctx, - char *buf, size_t len) +/** Convenience function to get a buffer-type payload. */ +static inline struct protolayer_payload protolayer_buffer(char *buf, size_t len) +{ + return (struct protolayer_payload){ + .type = PROTOLAYER_PAYLOAD_BUFFER, + .buffer = { + .buf = buf, + .len = len + } + }; +} + +/** Convenience function to get an iovec-type payload. */ +static inline struct protolayer_payload protolayer_iovec( + struct iovec *iov, int iovcnt) { - ctx->data.type = PROTOLAYER_CB_DATA_BUFFER; - ctx->data.buffer.buf = buf; - ctx->data.buffer.len = len; + return (struct protolayer_payload){ + .type = PROTOLAYER_PAYLOAD_IOVEC, + .iovec = { + .iov = iov, + .cnt = iovcnt + } + }; } -/** Convenience function to put an iovec pointer to the specified context. */ -static inline void protolayer_set_iovec(struct protolayer_cb_ctx *ctx, - struct iovec *iov, int iovcnt) +/** Convenience function to get an event-type payload. */ +static inline struct protolayer_payload protolayer_event(struct protolayer_event event) { - ctx->data.type = PROTOLAYER_CB_DATA_IOVEC; - ctx->data.iovec.iov = iov; - ctx->data.iovec.cnt = iovcnt; + return (struct protolayer_payload){ + .type = PROTOLAYER_PAYLOAD_EVENT, + .event = event + }; } +/** Convenience function to get an event-type payload without auxiliary data. */ +static inline struct protolayer_payload protolayer_event_nd(enum protolayer_event_type event) +{ + return (struct protolayer_payload){ + .type = PROTOLAYER_PAYLOAD_EVENT, + .event = { + .type = event + } + }; +} -/** Common header for per-session layer-specific data. When implementing - * a new layer, this is to be put at the beginning of the struct. */ -#define PROTOLAYER_DATA_HEADER struct {\ - enum protolayer_protocol protocol;\ - size_t size; /**< Size of the entire struct (incl. header) */\ - bool processed; /**< Safeguard so that the layer does not get executed - * multiple times. */\ +/** Convenience function to get a wire-buf-type payload. */ +static inline struct protolayer_payload protolayer_wire_buf(struct wire_buf *wire_buf) +{ + return (struct protolayer_payload){ + .type = PROTOLAYER_PAYLOAD_WIRE_BUF, + .wire_buf = wire_buf + }; } +/** Convenience function to represent the specified payload as a buffer-type. + * Supports only `_BUFFER` and `_WIRE_BUF` on the input, otherwise returns + * `_NULL` type or aborts on assertion if allowed. */ +struct protolayer_payload protolayer_as_buffer(const struct protolayer_payload *payload); + + /** Per-session layer-specific data - generic struct. */ struct protolayer_data { - PROTOLAYER_DATA_HEADER; - uint8_t data[]; + enum protolayer_protocol protocol; + bool processed : 1; /**< Internal safeguard so that the layer does not + * get executed multiple times on the same buffer. */ + size_t sess_size; /**< Size of the session data (aligned). */ + size_t iter_size; /**< Size of the iteration data (aligned). */ + uint8_t data[]; /**< Memory for the layer-specific structs. */ }; -typedef void (*protolayer_cb)(struct protolayer_data *layer, - struct protolayer_cb_ctx *ctx); -typedef int (*protolayer_data_cb)(struct protolayer_manager *manager, - struct protolayer_data *layer); +/** Get a pointer to the session data of the layer. This data shares + * its lifetime with a session. */ +static inline void *protolayer_sess_data(struct protolayer_data *d) +{ + return d->data; +} -/** The default implementation for the `struct protolayer_globals::reset` - * callback. Simply calls the `deinit` and `init` callbacks. */ -int protolayer_data_reset_default(struct protolayer_manager *manager, - struct protolayer_data *layer); +/** Gets a pointer to the iteration data of the layer. This data shares its + * lifetime with an iteration through layers; it is also kept intact when + * an iteration ends with a `_WAIT` action. */ +static inline void *protolayer_iter_data(struct protolayer_data *d) +{ + return d->data + d->sess_size; +} +/** Return value of `protolayer_cb` callbacks. To be generated by continuation + * functions, never returned directly. */ +enum protolayer_cb_result { + PROTOLAYER_CB_RESULT_MAGIC = 0x364F392E, +}; + +typedef enum protolayer_cb_result (*protolayer_cb)( + struct protolayer_data *layer, struct protolayer_cb_ctx *ctx); +typedef int (*protolayer_data_cb)(struct protolayer_manager *manager, + struct protolayer_data *layer); /** A collection of protocol layers and their layer-specific data. */ struct protolayer_manager { enum protolayer_grp grp; + bool iter_data_inited : 1; /**< True: layers' iteration data is + * initialized (e.g. from a previous + * iteration). */ struct session2 *session; size_t num_layers; char data[]; @@ -207,19 +358,26 @@ void protolayer_manager_free(struct protolayer_manager *m); /** Global data for a specific layered protocol. */ struct protolayer_globals { - size_t data_size; /**< Size of the layer-specific data struct. */ - protolayer_data_cb init; /**< Initializes the layer-specific data struct. */ - protolayer_data_cb deinit; /**< De-initializes the layer-specific data struct. */ - protolayer_data_cb reset; /**< Resets the layer-specific data struct - * after finishing a sequence. Default - * implementation is available as - * `protolayer_data_reset_default`. */ - protolayer_cb unwrap; /**< Strips the buffer of protocol-specific - * data. E.g. a HTTP layer removes HTTP - * status and headers. */ - protolayer_cb wrap; /**< Wraps the buffer into protocol-specific - * data. E.g. a HTTP layer adds HTTP status - * and headers. */ + size_t sess_size; /**< Size of the layer-specific session data struct. */ + size_t iter_size; /**< Size of the layer-specific iteration data struct. */ + protolayer_data_cb sess_init; /**< Called upon session creation to + * initialize layer-specific session + * data. */ + protolayer_data_cb sess_deinit; /**< Called upon session destruction to + * deinitialize layer-specific session + * data. */ + protolayer_data_cb iter_init; /**< Called at the beginning of a layer + * sequence to initialize layer-specific + * iteration data. */ + protolayer_data_cb iter_deinit; /**< Called at the end of a layer + * sequence to deinitialize + * layer-specific iteration data. */ + protolayer_cb unwrap; /**< Strips the buffer of protocol-specific + * data. E.g. a HTTP layer removes HTTP + * status and headers. */ + protolayer_cb wrap; /**< Wraps the buffer into protocol-specific + * data. E.g. a HTTP layer adds HTTP status + * and headers. */ }; /** Global data about layered protocols. Indexed by `enum protolayer_protocol`. */ @@ -227,81 +385,162 @@ extern struct protolayer_globals protolayer_globals[PROTOLAYER_PROTOCOL_COUNT]; /** *Continuation function* - signals the protolayer manager to continue * processing the next layer. */ -void protolayer_continue(struct protolayer_cb_ctx *ctx); +enum protolayer_cb_result protolayer_continue(struct protolayer_cb_ctx *ctx); /** *Continuation function* - signals that the layer needs more data to produce * a new buffer for the next layer. */ -void protolayer_wait(struct protolayer_cb_ctx *ctx); +enum protolayer_cb_result protolayer_wait(struct protolayer_cb_ctx *ctx); /** *Continuation function* - signals that the layer wants to stop processing * of the buffer and clean up, possibly due to an error (indicated by * `status`). * * `status` must be 0 or a negative integer. */ -void protolayer_break(struct protolayer_cb_ctx *ctx, int status); +enum protolayer_cb_result protolayer_break(struct protolayer_cb_ctx *ctx, int status); /** *Continuation function* - pushes data to the session's transport and * signals that the layer wants to stop processing of the buffer and clean up. * - * `target` is the target data for the transport - in most cases, it will be - * unused and may be `NULL`; except for UDP, where it must point to a `struct - * sockaddr_*` to indicate the target address. - * * This function is meant to be called by the `wrap` callback of first layer in * the sequence. */ -void protolayer_pushv(struct protolayer_cb_ctx *ctx, - struct iovec *iov, int iovcnt, void *target); +enum protolayer_cb_result protolayer_push(struct protolayer_cb_ctx *ctx); -/** *Continuation function* - pushes data to the session's transport and - * signals that the layer wants to stop processing of the buffer and clean up. +static inline enum protolayer_cb_result protolayer_async() +{ + return PROTOLAYER_CB_RESULT_MAGIC; +} + + +/** Wire buffer. * - * `target` is the target data for the transport - in most cases, it will be - * unused and may be `NULL`; except for UDP, where it must point to a `struct - * sockaddr_*` to indicate the target address. + * May be initialized via `wire_buf_init` or to zero (ZII), then reserved via + * `wire_buf_reserve`. */ +struct wire_buf { + char *buf; /**< Buffer memory. */ + size_t size; /**< Current size of the buffer memory. */ + size_t start; /**< Index at which the valid data of the buffer starts (inclusive). */ + size_t end; /**< Index at which the valid data of the buffer ends (exclusive). */ + bool error; /**< Whether there has been an error. */ +}; + +/** Allocates the wire buffer with the specified `initial_size`. */ +int wire_buf_init(struct wire_buf *wb, size_t initial_size); + +/** De-allocates the wire buffer. */ +void wire_buf_deinit(struct wire_buf *wb); + +/** Ensures that the wire buffer's size is at least `size`. `*wb` must be + * initialized, either to zero or via `wire_buf_init`. */ +int wire_buf_reserve(struct wire_buf *wb, size_t size); + +/** Adds `length` to the end index of the valid data, marking `length` more + * bytes as valid. * - * This function is meant to be called by the `wrap` callback of first layer in - * the sequence. */ -void protolayer_push(struct protolayer_cb_ctx *ctx, char *buf, size_t buf_len, - void *target); + * Returns 0 on success. + * Returns `kr_error(EINVAL)` if the end index would exceed the + * buffer size. */ +int wire_buf_consume(struct wire_buf *wb, size_t length); + +/** Adds `length` to the start index of the valid data, marking `length` less + * bytes as valid. + * + * Returns 0 on success. + * Returns `kr_error(EINVAL)` if the start index would exceed + * the end index. */ +int wire_buf_trim(struct wire_buf *wb, size_t length); + +/** Moves the valid bytes of the buffer to the buffer's beginning. */ +int wire_buf_movestart(struct wire_buf *wb); + +/** Resets the valid bytes of the buffer to zero, as well as the error flag. */ +int wire_buf_reset(struct wire_buf *wb); + +static void *wire_buf_data(const struct wire_buf *wb) +{ + return &wb->buf[wb->start]; +} + +static size_t wire_buf_data_length(const struct wire_buf *wb) +{ + return wb->end - wb->start; +} + +static void *wire_buf_free_space(const struct wire_buf *wb) +{ + return &wb->buf[wb->end]; +} + +static size_t wire_buf_free_space_length(const struct wire_buf *wb) +{ + return wb->size - wb->end; +} /** Indicates how a session sends data in the `wrap` direction and receives * data in the `unwrap` direction. */ enum session2_transport_type { SESSION2_TRANSPORT_NULL = 0, - SESSION2_TRANSPORT_HANDLE, + SESSION2_TRANSPORT_IO, SESSION2_TRANSPORT_PARENT, }; struct session2 { + /** Data for sending data out in the `wrap` direction and receiving new + * data in the `unwrap` direction. */ struct { - enum session2_transport_type type; + enum session2_transport_type type; /**< See `enum session2_transport_type` */ union { - void *ctx; - uv_handle_t *handle; + /** For `_IO` type transport. Contains a libuv handle + * and session-related addresses. */ + struct { + uv_handle_t *handle; + union kr_sockaddr peer; + union kr_sockaddr sockname; + } io; + + /** For `_PARENT` type transport. */ struct session2 *parent; }; } transport; - struct protolayer_manager *layers; + struct protolayer_manager *layers; /**< Protocol layers of this session. */ + knot_mm_t pool; + + uv_timer_t timer; + enum protolayer_direction timer_direction; /**< Timeout event direction. */ + + trie_t *tasks; /**< list of tasks associated with given session. */ + queue_t(struct qr_task *) waiting; /**< list of tasks waiting for sending to upstream. */ + + struct wire_buf wire_buf; + + uint64_t last_activity; /**< Time of last IO activity (if any occurs). + * Otherwise session creation time. */ + + bool closing : 1; + bool throttled : 1; bool outgoing : 1; + bool secure : 1; /**< Whether encryption takes place in this session. + * Layers may use this to determine whether padding + * should be applied. */ }; /** Allocates and initializes a new session with the specified protocol layer * group, and the provided transport context. */ struct session2 *session2_new(enum session2_transport_type transport_type, - void *transport_ctx, enum protolayer_grp layer_grp, bool outgoing); /** Allocates and initializes a new session with the specified protocol layer * group, using a *libuv handle* as its transport. */ -static inline struct session2 *session2_new_handle(uv_handle_t *handle, - enum protolayer_grp layer_grp, - bool outgoing) +static inline struct session2 *session2_new_io(uv_handle_t *handle, + enum protolayer_grp layer_grp, + bool outgoing) { - return session2_new(SESSION2_TRANSPORT_HANDLE, handle, layer_grp, - outgoing); + struct session2 *s = session2_new(SESSION2_TRANSPORT_IO, layer_grp, outgoing); + s->transport.io.handle = handle; + handle->data = s; + return s; } /** Allocates and initializes a new session with the specified protocol layer @@ -310,16 +549,96 @@ static inline struct session2 *session2_new_child(struct session2 *parent, enum protolayer_grp layer_grp, bool outgoing) { - return session2_new(SESSION2_TRANSPORT_PARENT, parent, layer_grp, - outgoing); + struct session2 *s = session2_new(SESSION2_TRANSPORT_PARENT, layer_grp, outgoing); + s->transport.parent = parent; + return s; } /** De-allocates the session. */ void session2_free(struct session2 *s); -/** Sends the specified buffer to be processed in the `unwrap` direction by the - * session's protocol layers. The `target` parameter may contain a pointer to - * transport-specific data, e.g. for UDP, it shall contain a pointer to the +/** Start reading from the underlying transport. */ +int session2_start_read(struct session2 *session); + +/** Stop reading from the underlying transport. */ +int session2_stop_read(struct session2 *session); + +/** Gets the peer address from the specified session, iterating through the + * session hierarchy (child-to-parent) until an `_IO` session is found if + * needed. + * + * May return `NULL` if no peer is set. */ +struct sockaddr *session2_get_peer(struct session2 *s); + +/** Gets the sockname from the specified session, iterating through the + * session hierarchy (child-to-parent) until an `_IO` session is found if + * needed. + * + * May return `NULL` if no peer is set. */ +struct sockaddr *session2_get_sockname(struct session2 *s); + +/** Gets the libuv handle from the specified session, iterating through the + * session hierarchy (child-to-parent) until an `_IO` session is found if + * needed. + * + * May return `NULL` if no peer is set. */ +uv_handle_t *session2_get_handle(struct session2 *s); + +/** Start the session timer. When the timer ends, a `_TIMEOUT` event is sent + * in the specified `direction`. */ +int session2_timer_start(struct session2 *s, uint64_t timeout, uint64_t repeat, + enum protolayer_direction direction); + +/** Restart the session timer without changing any of its parameters. */ +int session2_timer_restart(struct session2 *s); + +/** Stop the session timer. */ +int session2_timer_stop(struct session2 *s); + +int session2_tasklist_add(struct session2 *session, struct qr_task *task); +int session2_tasklist_del(struct session2 *session, struct qr_task *task); +struct qr_task *session2_tasklist_get_first(struct session2 *session); +struct qr_task *session2_tasklist_del_first(struct session2 *session, bool deref); +struct qr_task *session2_tasklist_find_msgid(const struct session2 *session, uint16_t msg_id); +struct qr_task *session2_tasklist_del_msgid(const struct session2 *session, uint16_t msg_id); +void session2_tasklist_finalize(struct session2 *session, int status); +int session2_tasklist_finalize_expired(struct session2 *session); + +static inline size_t session2_tasklist_get_len(const struct session2 *session) +{ + return trie_weight(session->tasks); +} + +static inline bool session2_tasklist_is_empty(const struct session2 *session) +{ + return session2_tasklist_get_len(session) == 0; +} + +int session2_waitinglist_push(struct session2 *session, struct qr_task *task); +struct qr_task *session2_waitinglist_get(const struct session2 *session); +struct qr_task *session2_waitinglist_pop(struct session2 *session, bool deref); +void session2_waitinglist_retry(struct session2 *session, bool increase_timeout_cnt); +void session2_waitinglist_finalize(struct session2 *session, int status); + +static inline size_t session2_waitinglist_get_len(const struct session2 *session) +{ + return queue_len(session->waiting); +} + +static inline bool session2_waitinglist_is_empty(const struct session2 *session) +{ + return session2_waitinglist_get_len(session) == 0; +} + +static inline bool session2_is_empty(const struct session2 *session) +{ + return session2_tasklist_is_empty(session) && + session2_waitinglist_is_empty(session); +} + +/** Sends the specified `payload` to be processed in the `unwrap` direction by + * the session's protocol layers. The `target` parameter may contain a pointer + * to transport-specific data, e.g. for UDP, it shall contain a pointer to the * sender's `struct sockaddr_*`. * * Once all layers are processed, `cb` is called with `baton` passed as one @@ -328,10 +647,10 @@ void session2_free(struct session2 *s); * * Returns one of `enum protolayer_ret` or a negative number * indicating an error. */ -int session2_unwrap(struct session2 *s, char *buf, size_t buf_len, void *target, - protolayer_finished_cb cb, void *baton); +int session2_unwrap(struct session2 *s, struct protolayer_payload payload, + const void *target, protolayer_finished_cb cb, void *baton); -/** Sends the specified buffer to be processed in the `wrap` direction by the +/** Sends the specified `payload` to be processed in the `wrap` direction by the * session's protocol layers. The `target` parameter may contain a pointer to * some data specific to the producer-consumer layer of this session. * @@ -341,5 +660,15 @@ int session2_unwrap(struct session2 *s, char *buf, size_t buf_len, void *target, * * Returns one of `enum protolayer_ret` or a negative number * indicating an error. */ -int session2_wrap(struct session2 *s, char *buf, size_t buf_len, void *target, - protolayer_finished_cb cb, void *baton); +int session2_wrap(struct session2 *s, struct protolayer_payload payload, + const void *target, protolayer_finished_cb cb, void *baton); + +/** Removes the specified request task from the session's tasklist. The session + * must be outgoing. If the session is UDP, a signal to close is also sent to it. */ +void session2_kill_ioreq(struct session2 *session, struct qr_task *task); + +/** Update `last_activity` to the current timestamp. */ +static inline void session2_touch(struct session2 *session) +{ + session->last_activity = kr_now(); +} diff --git a/daemon/worker.c b/daemon/worker.c index 432ebe5aa..f946fc761 100644 --- a/daemon/worker.c +++ b/daemon/worker.c @@ -28,7 +28,7 @@ #include "daemon/engine.h" #include "daemon/io.h" #include "daemon/proxyv2.h" -#include "daemon/session.h" +#include "daemon/session2.h" #include "daemon/tls.h" #include "daemon/http.h" #include "daemon/udp_queue.h" @@ -51,7 +51,7 @@ struct request_ctx struct qr_task *task; struct { /** NULL if the request didn't come over network. */ - struct session *session; + struct session2 *session; /** Requestor's address; separate because of UDP session "sharing". */ union kr_sockaddr addr; /** Request communication address; if not from a proxy, same as addr. */ @@ -70,7 +70,7 @@ struct qr_task struct request_ctx *ctx; knot_pkt_t *pktbuf; qr_tasklist_t waiting; - struct session *pending[MAX_PENDING]; + struct session2 *pending[MAX_PENDING]; uint16_t pending_count; uint16_t timeouts; uint16_t iter_count; @@ -100,11 +100,11 @@ static void qr_task_free(struct qr_task *task); static int qr_task_step(struct qr_task *task, const struct sockaddr *packet_source, knot_pkt_t *packet); -static int qr_task_send(struct qr_task *task, struct session *session, +static int qr_task_send(struct qr_task *task, struct session2 *session, const struct sockaddr *addr, knot_pkt_t *pkt); static int qr_task_finalize(struct qr_task *task, int state); static void qr_task_complete(struct qr_task *task); -static int worker_add_tcp_waiting(const struct sockaddr* addr, struct session *session); +static int worker_add_tcp_waiting(const struct sockaddr* addr, struct session2 *session); static void on_tcp_connect_timeout(uv_timer_t *timer); static void on_udp_timeout(uv_timer_t *timer); static void subreq_finalize(struct qr_task *task, const struct sockaddr *packet_source, knot_pkt_t *pkt); @@ -115,8 +115,8 @@ struct worker_ctx *the_worker = NULL; /*! @internal Create a UDP/TCP handle for an outgoing AF_INET* connection. * socktype is SOCK_* */ -static uv_handle_t *ioreq_spawn(int socktype, sa_family_t family, bool has_tls, - bool has_http) +static uv_handle_t *ioreq_spawn(int socktype, sa_family_t family, + enum protolayer_grp grp) { bool precond = (socktype == SOCK_DGRAM || socktype == SOCK_STREAM) && (family == AF_INET || family == AF_INET6); @@ -131,7 +131,7 @@ static uv_handle_t *ioreq_spawn(int socktype, sa_family_t family, bool has_tls, if (!handle) { return NULL; } - int ret = io_create(the_worker->loop, handle, socktype, family, has_tls, has_http); + int ret = io_create(the_worker->loop, handle, socktype, family, grp, true); if (ret) { if (ret == UV_EMFILE) { the_worker->too_many_open = true; @@ -168,8 +168,8 @@ static uv_handle_t *ioreq_spawn(int socktype, sa_family_t family, bool has_tls, } /* Set current handle as a subrequest type. */ - struct session *session = handle->data; - session_flags(session)->outgoing = true; + struct session2 *session = handle->data; + session->outgoing = true; /* Connect or issue query datagram */ return handle; } @@ -177,7 +177,7 @@ static uv_handle_t *ioreq_spawn(int socktype, sa_family_t family, bool has_tls, static void ioreq_kill_pending(struct qr_task *task) { for (uint16_t i = 0; i < task->pending_count; ++i) { - session_kill_ioreq(task->pending[i], task); + session2_kill_ioreq(task->pending[i], task); } task->pending_count = 0; } @@ -214,7 +214,7 @@ static uint8_t *alloc_wire_cb(struct kr_request *req, uint16_t *maxlen) return NULL; struct request_ctx *ctx = (struct request_ctx *)req; /* We know it's an AF_XDP socket; otherwise this CB isn't assigned. */ - uv_handle_t *handle = session_get_handle(ctx->source.session); + uv_handle_t *handle = session2_get_handle(ctx->source.session); if (kr_fails_assert(handle->type == UV_POLL)) return NULL; xdp_handle_data_t *xhd = handle->data; @@ -249,7 +249,7 @@ static void free_wire(const struct request_ctx *ctx) if (likely(ans->wire == NULL)) /* sent most likely */ return; /* We know it's an AF_XDP socket; otherwise alloc_wire_cb isn't assigned. */ - uv_handle_t *handle = session_get_handle(ctx->source.session); + uv_handle_t *handle = session2_get_handle(ctx->source.session); if (kr_fails_assert(handle->type == UV_POLL)) return; xdp_handle_data_t *xhd = handle->data; @@ -269,11 +269,11 @@ static void free_wire(const struct request_ctx *ctx) } #endif /* Helper functions for transport selection */ -static inline bool is_tls_capable(struct sockaddr *address) { - tls_client_param_t *tls_entry = tls_client_param_get( - the_network->tls_client_params, address); - return tls_entry; -} +//static inline bool is_tls_capable(struct sockaddr *address) { +// tls_client_param_t *tls_entry = tls_client_param_get( +// the_network->tls_client_params, address); +// return tls_entry; +//} static inline bool is_tcp_connected(struct sockaddr *address) { return worker_find_tcp_connected(address); @@ -288,8 +288,8 @@ static inline bool is_tcp_waiting(struct sockaddr *address) { * session and addr point to the source of the request, and they are NULL * in case the request didn't come from network. */ -static struct request_ctx *request_create(struct session *session, - struct io_comm_data *comm, +static struct request_ctx *request_create(struct session2 *session, + struct comm_info *comm, const uint8_t *eth_from, const uint8_t *eth_to, uint32_t uid) @@ -307,7 +307,7 @@ static struct request_ctx *request_create(struct session *session, } /* TODO Relocate pool to struct request */ - if (session && kr_fails_assert(session_flags(session)->outgoing == false)) { + if (session && kr_fails_assert(session->outgoing == false)) { pool_release(pool.ctx); return NULL; } @@ -348,9 +348,9 @@ static struct request_ctx *request_create(struct session *session, const struct sockaddr *dst_addr = comm->dst_addr; const struct proxy_result *proxy = comm->proxy; - req->qsource.comm_flags.tcp = session_get_handle(session)->type == UV_TCP; - req->qsource.comm_flags.tls = session_flags(session)->has_tls; - req->qsource.comm_flags.http = session_flags(session)->has_http; + req->qsource.comm_flags.tcp = session2_get_handle(session)->type == UV_TCP; + req->qsource.comm_flags.tls = session->secure; +// req->qsource.comm_flags.http = session->has_http; /* TODO */ req->qsource.flags = req->qsource.comm_flags; if (proxy) { @@ -359,18 +359,20 @@ static struct request_ctx *request_create(struct session *session, } req->qsource.stream_id = -1; -#if ENABLE_DOH2 - if (req->qsource.comm_flags.http) { - struct http_ctx *http_ctx = session_http_get_server_ctx(session); - struct http_stream stream = queue_head(http_ctx->streams); - req->qsource.stream_id = stream.id; - if (stream.headers) { - req->qsource.headers = *stream.headers; - free(stream.headers); - stream.headers = NULL; - } - } -#endif + + /* TODO: http */ +//#if ENABLE_DOH2 +// if (req->qsource.comm_flags.http) { +// struct http_ctx *http_ctx = session_http_get_server_ctx(session); +// struct http_stream stream = queue_head(http_ctx->streams); +// req->qsource.stream_id = stream.id; +// if (stream.headers) { +// req->qsource.headers = *stream.headers; +// free(stream.headers); +// stream.headers = NULL; +// } +// } +//#endif /* We need to store a copy of peer address. */ memcpy(&ctx->source.addr.ip, src_addr, kr_sockaddr_len(src_addr)); req->qsource.addr = &ctx->source.addr.ip; @@ -381,12 +383,13 @@ static struct request_ctx *request_create(struct session *session, req->qsource.comm_addr = &ctx->source.comm_addr.ip; if (!dst_addr) /* We wouldn't have to copy in this case, but for consistency. */ - dst_addr = session_get_sockname(session); + dst_addr = session2_get_sockname(session); memcpy(&ctx->source.dst_addr.ip, dst_addr, kr_sockaddr_len(dst_addr)); req->qsource.dst_addr = &ctx->source.dst_addr.ip; } - req->selection_context.is_tls_capable = is_tls_capable; +// req->selection_context.is_tls_capable = is_tls_capable; + req->selection_context.is_tls_capable = false; req->selection_context.is_tcp_connected = is_tcp_connected; req->selection_context.is_tcp_waiting = is_tcp_waiting; array_init(req->selection_context.forwarding_targets); @@ -518,12 +521,12 @@ static void qr_task_free(struct qr_task *task) } /*@ Register new qr_task within session. */ -static int qr_task_register(struct qr_task *task, struct session *session) +static int qr_task_register(struct qr_task *task, struct session2 *session) { - if (kr_fails_assert(!session_flags(session)->outgoing && session_get_handle(session)->type == UV_TCP)) + if (kr_fails_assert(!session->outgoing && session2_get_handle(session)->type == UV_TCP)) return kr_error(EINVAL); - session_tasklist_add(session, task); + session2_tasklist_add(session, task); struct request_ctx *ctx = task->ctx; if (kr_fails_assert(ctx && (ctx->source.session == NULL || ctx->source.session == session))) @@ -534,10 +537,10 @@ static int qr_task_register(struct qr_task *task, struct session *session) * an in effect shrink TCP window size. To get more precise throttling, * we would need to copy remainder of the unread buffer and reassemble * when resuming reading. This is NYI. */ - if (session_tasklist_get_len(session) >= the_worker->tcp_pipeline_max && - !session_flags(session)->throttled && !session_flags(session)->closing) { - session_stop_read(session); - session_flags(session)->throttled = true; + if (session2_tasklist_get_len(session) >= the_worker->tcp_pipeline_max && + !session->throttled && !session->closing) { + session2_stop_read(session); + session->throttled = true; } return 0; @@ -552,11 +555,11 @@ static void qr_task_complete(struct qr_task *task) kr_require(task->waiting.len == 0); kr_require(task->leading == false); - struct session *s = ctx->source.session; + struct session2 *s = ctx->source.session; if (s) { - kr_require(!session_flags(s)->outgoing && session_waitinglist_is_empty(s)); + kr_require(!s->outgoing && session2_waitinglist_is_empty(s)); ctx->source.session = NULL; - session_tasklist_del(s, task); + session2_tasklist_del(s, task); } /* Release primary reference to task. */ @@ -576,9 +579,9 @@ int qr_task_on_send(struct qr_task *task, const uv_handle_t *handle, int status) if (!handle || kr_fails_assert(handle->data)) return status; - struct session* s = handle->data; + struct session2* s = handle->data; - if (handle->type == UV_UDP && session_flags(s)->outgoing) { + if (handle->type == UV_UDP && s->outgoing) { // This should ensure that we are only dealing with our question to upstream if (kr_fails_assert(!knot_wire_get_qr(task->pktbuf->wire))) return status; @@ -587,7 +590,7 @@ int qr_task_on_send(struct qr_task *task, const uv_handle_t *handle, int status) if (kr_fails_assert(qry && task->transport)) return status; size_t timeout = task->transport->timeout; - int ret = session_timer_start(s, on_udp_timeout, timeout, 0); + int ret = session2_timer_start(s, timeout, 0, PROTOLAYER_UNWRAP); /* Start next step with timeout, fatal if can't start a timer. */ if (ret != 0) { subreq_finalize(task, &task->transport->address.ip, task->pktbuf); @@ -600,7 +603,7 @@ int qr_task_on_send(struct qr_task *task, const uv_handle_t *handle, int status) const struct kr_request *req = &task->ctx->req; if (kr_log_is_debug(WORKER, req)) { const char *peer_str = NULL; - if (!session_flags(s)->outgoing) { + if (!s->outgoing) { peer_str = "hidden"; // avoid logging downstream IPs } else if (task->transport) { peer_str = kr_straddr(&task->transport->address.ip); @@ -615,15 +618,15 @@ int qr_task_on_send(struct qr_task *task, const uv_handle_t *handle, int status) return status; } - if (session_flags(s)->outgoing || session_flags(s)->closing) + if (s->outgoing || s->closing) return status; - if (session_flags(s)->throttled && - session_tasklist_get_len(s) < the_worker->tcp_pipeline_max/2) { + if (s->throttled && + session2_tasklist_get_len(s) < the_worker->tcp_pipeline_max/2) { /* Start reading again if the session is throttled and * the number of outgoing requests is below watermark. */ - session_start_read(s); - session_flags(s)->throttled = false; + session2_start_read(s); + s->throttled = false; } } @@ -648,32 +651,40 @@ static void on_write(uv_write_t *req, int status) free(req); } -static int qr_task_send(struct qr_task *task, struct session *session, +static void qr_task_wrap_finished(int status, struct session2 *session, const void *target, void *baton) +{ + struct qr_task *task = baton; + qr_task_on_send(task, NULL, status); + qr_task_unref(task); + wire_buf_reset(&session->wire_buf); +} + +static int qr_task_send(struct qr_task *task, struct session2 *session, const struct sockaddr *addr, knot_pkt_t *pkt) { if (!session) return qr_task_on_send(task, NULL, kr_error(EIO)); int ret = 0; - struct request_ctx *ctx = task->ctx; + //struct request_ctx *ctx = task->ctx; /* TODO */ - uv_handle_t *handle = session_get_handle(session); + uv_handle_t *handle = session2_get_handle(session); if (kr_fails_assert(handle && handle->data == session)) return qr_task_on_send(task, NULL, kr_error(EINVAL)); const bool is_stream = handle->type == UV_TCP; kr_require(is_stream || handle->type == UV_UDP); if (addr == NULL) - addr = session_get_peer(session); + addr = session2_get_peer(session); if (pkt == NULL) pkt = worker_task_get_pktbuf(task); - if (session_flags(session)->outgoing && handle->type == UV_TCP) { - size_t try_limit = session_tasklist_get_len(session) + 1; + if (session->outgoing && handle->type == UV_TCP) { + size_t try_limit = session2_tasklist_get_len(session) + 1; uint16_t msg_id = knot_wire_get_id(pkt->wire); size_t try_count = 0; - while (session_tasklist_find_msgid(session, msg_id) && + while (session2_tasklist_find_msgid(session, msg_id) && try_count <= try_limit) { ++msg_id; ++try_count; @@ -687,73 +698,76 @@ static int qr_task_send(struct qr_task *task, struct session *session, task->send_time = kr_now(); task->recv_time = 0; // task structure is being reused so we have to zero this out here /* Send using given protocol */ - if (kr_fails_assert(!session_flags(session)->closing)) + if (kr_fails_assert(!session->closing)) return qr_task_on_send(task, NULL, kr_error(EIO)); - uv_handle_t *ioreq = malloc(is_stream ? sizeof(uv_write_t) : sizeof(uv_udp_send_t)); - if (!ioreq) - return qr_task_on_send(task, handle, kr_error(ENOMEM)); - /* Pending ioreq on current task */ qr_task_ref(task); - if (session_flags(session)->has_http) { -#if ENABLE_DOH2 - uv_write_t *write_req = (uv_write_t *)ioreq; - write_req->data = task; - ret = http_write(write_req, handle, pkt, ctx->req.qsource.stream_id, &on_write); -#else - ret = kr_error(ENOPROTOOPT); -#endif - } else if (session_flags(session)->has_tls) { - uv_write_t *write_req = (uv_write_t *)ioreq; - write_req->data = task; - ret = tls_write(write_req, handle, pkt, &on_write); - } else if (handle->type == UV_UDP) { - uv_udp_send_t *send_req = (uv_udp_send_t *)ioreq; - uv_buf_t buf = { (char *)pkt->wire, pkt->size }; - send_req->data = task; - ret = uv_udp_send(send_req, (uv_udp_t *)handle, &buf, 1, addr, &on_send); - } else if (handle->type == UV_TCP) { - uv_write_t *write_req = (uv_write_t *)ioreq; - /* We need to write message length in native byte order, - * but we don't have a convenient place to store those bytes. - * The problem is that all memory referenced from buf[] MUST retain - * its contents at least until on_write() is called, and I currently - * can't see any convenient place outside the `pkt` structure. - * So we use directly the *individual* bytes in pkt->size. - * The call to htonl() and the condition will probably be inlinable. */ - int lsbi, slsbi; /* (second) least significant byte index */ - if (htonl(1) == 1) { /* big endian */ - lsbi = sizeof(pkt->size) - 1; - slsbi = sizeof(pkt->size) - 2; - } else { - lsbi = 0; - slsbi = 1; - } - uv_buf_t buf[3] = { - { (char *)&pkt->size + slsbi, 1 }, - { (char *)&pkt->size + lsbi, 1 }, - { (char *)pkt->wire, pkt->size }, - }; - write_req->data = task; - ret = uv_write(write_req, (uv_stream_t *)handle, buf, 3, &on_write); - } else { - kr_assert(false); - } - - if (ret == 0) { - session_touch(session); - if (session_flags(session)->outgoing) { - session_tasklist_add(session, task); + /* TODO */ +// if (session_flags(session)->has_http) { +//#if ENABLE_DOH2 +// uv_write_t *write_req = (uv_write_t *)ioreq; +// write_req->data = task; +// ret = http_write(write_req, handle, pkt, ctx->req.qsource.stream_id, &on_write); +//#else +// ret = kr_error(ENOPROTOOPT); +//#endif +// } else if (session_flags(session)->has_tls) { +// uv_write_t *write_req = (uv_write_t *)ioreq; +// write_req->data = task; +// ret = tls_write(write_req, handle, pkt, &on_write); +// } else if (handle->type == UV_UDP) { +// uv_udp_send_t *send_req = (uv_udp_send_t *)ioreq; +// uv_buf_t buf = { (char *)pkt->wire, pkt->size }; +// send_req->data = task; +// ret = uv_udp_send(send_req, (uv_udp_t *)handle, &buf, 1, addr, &on_send); +// } else if (handle->type == UV_TCP) { +// uv_write_t *write_req = (uv_write_t *)ioreq; +// /* We need to write message length in native byte order, +// * but we don't have a convenient place to store those bytes. +// * The problem is that all memory referenced from buf[] MUST retain +// * its contents at least until on_write() is called, and I currently +// * can't see any convenient place outside the `pkt` structure. +// * So we use directly the *individual* bytes in pkt->size. +// * The call to htonl() and the condition will probably be inlinable. */ +// int lsbi, slsbi; /* (second) least significant byte index */ +// if (htonl(1) == 1) { /* big endian */ +// lsbi = sizeof(pkt->size) - 1; +// slsbi = sizeof(pkt->size) - 2; +// } else { +// lsbi = 0; +// slsbi = 1; +// } +// uv_buf_t buf[3] = { +// { (char *)&pkt->size + slsbi, 1 }, +// { (char *)&pkt->size + lsbi, 1 }, +// { (char *)pkt->wire, pkt->size }, +// }; +// write_req->data = task; +// ret = uv_write(write_req, (uv_stream_t *)handle, buf, 3, &on_write); +// } else { +// kr_assert(false); +// } + + /* Pending '_finished' callback on current task */ + qr_task_ref(task); + ret = session2_wrap(session, + protolayer_buffer((char *)pkt->wire, pkt->size), + addr, qr_task_wrap_finished, task); + + if (ret >= 0) { + session2_touch(session); + if (session->outgoing) { + session2_tasklist_add(session, task); } if (the_worker->too_many_open && the_worker->stats.rconcurrent < the_worker->rconcurrent_highwatermark - 10) { the_worker->too_many_open = false; } + ret = kr_ok(); } else { - free(ioreq); qr_task_unref(task); if (ret == UV_EMFILE) { the_worker->too_many_open = true; @@ -761,9 +775,11 @@ static int qr_task_send(struct qr_task *task, struct session *session, ret = kr_error(UV_EMFILE); } - if (session_flags(session)->has_http) - the_worker->stats.err_http += 1; - else if (session_flags(session)->has_tls) + /* TODO */ +// if (session_flags(session)->has_http) +// the_worker->stats.err_http += 1; +// else + if (session->secure) the_worker->stats.err_tls += 1; else if (handle->type == UV_UDP) the_worker->stats.err_udp += 1; @@ -772,8 +788,8 @@ static int qr_task_send(struct qr_task *task, struct session *session, } /* Update outgoing query statistics */ - if (session_flags(session)->outgoing && addr) { - if (session_flags(session)->has_tls) + if (session->outgoing && addr) { + if (session->secure) the_worker->stats.tls += 1; else if (handle->type == UV_UDP) the_worker->stats.udp += 1; @@ -797,129 +813,132 @@ static struct kr_query *task_get_last_pending_query(struct qr_task *task) return array_tail(task->ctx->req.rplan.pending); } -static int session_tls_hs_cb(struct session *session, int status) -{ - if (kr_fails_assert(session_flags(session)->outgoing)) - return kr_error(EINVAL); - struct sockaddr *peer = session_get_peer(session); - int deletion_res = worker_del_tcp_waiting(peer); - int ret = kr_ok(); - - if (status) { - struct qr_task *task = session_waitinglist_get(session); - if (task) { - // TLS handshake failed, report it to server selection - struct kr_query *qry = array_tail(task->ctx->req.rplan.pending); - qry->server_selection.error(qry, task->transport, KR_SELECTION_TLS_HANDSHAKE_FAILED); - } -#ifndef NDEBUG - else { - /* Task isn't in the list of tasks - * waiting for connection to upstream. - * So that it MUST be unsuccessful rehandshake. - * Check it. */ - kr_require(deletion_res != 0); - struct kr_sockaddr_key_storage key; - ssize_t keylen = kr_sockaddr_key(&key, peer); - if (keylen < 0) - return keylen; - trie_val_t *val; - kr_require((val = trie_get_try(the_worker->tcp_connected, key.bytes, keylen)) && *val); - } -#endif - return ret; - } - - /* handshake was completed successfully */ - struct tls_client_ctx *tls_client_ctx = session_tls_get_client_ctx(session); - tls_client_param_t *tls_params = tls_client_ctx->params; - gnutls_session_t tls_session = tls_client_ctx->c.tls_session; - if (gnutls_session_is_resumed(tls_session) != 0) { - kr_log_debug(TLSCLIENT, "TLS session has resumed\n"); - } else { - kr_log_debug(TLSCLIENT, "TLS session has not resumed\n"); - /* session wasn't resumed, delete old session data ... */ - if (tls_params->session_data.data != NULL) { - gnutls_free(tls_params->session_data.data); - tls_params->session_data.data = NULL; - tls_params->session_data.size = 0; - } - /* ... and get the new session data */ - gnutls_datum_t tls_session_data = { NULL, 0 }; - ret = gnutls_session_get_data2(tls_session, &tls_session_data); - if (ret == 0) { - tls_params->session_data = tls_session_data; - } - } - - struct session *s = worker_find_tcp_connected(peer); - ret = kr_ok(); - if (deletion_res == kr_ok()) { - /* peer was in the waiting list, add to the connected list. */ - if (s) { - /* Something went wrong, - * peer already is in the connected list. */ - ret = kr_error(EINVAL); - } else { - ret = worker_add_tcp_connected(peer, session); - } - } else { - /* peer wasn't in the waiting list. - * It can be - * 1) either successful rehandshake; in this case peer - * must be already in the connected list. - * 2) or successful handshake with session, which was timed out - * by on_tcp_connect_timeout(); after successful tcp connection; - * in this case peer isn't in the connected list. - **/ - if (!s || s != session) { - ret = kr_error(EINVAL); - } - } - if (ret == kr_ok()) { - while (!session_waitinglist_is_empty(session)) { - struct qr_task *t = session_waitinglist_get(session); - ret = qr_task_send(t, session, NULL, NULL); - if (ret != 0) { - break; - } - session_waitinglist_pop(session, true); - } - } else { - ret = kr_error(EINVAL); - } - - if (ret != kr_ok()) { - /* Something went wrong. - * Either addition to the list of connected sessions - * or write to upstream failed. */ - worker_del_tcp_connected(peer); - session_waitinglist_finalize(session, KR_STATE_FAIL); - session_tasklist_finalize(session, KR_STATE_FAIL); - session_close(session); - } else { - session_timer_stop(session); - session_timer_start(session, tcp_timeout_trigger, - MAX_TCP_INACTIVITY, MAX_TCP_INACTIVITY); - } - return kr_ok(); -} - -static int send_waiting(struct session *session) +/* TODO: tls */ +//static int session_tls_hs_cb(struct session2 *session, int status) +//{ +// if (kr_fails_assert(session->outgoing)) +// return kr_error(EINVAL); +// struct sockaddr *peer = session2_get_peer(session); +// int deletion_res = worker_del_tcp_waiting(peer); +// int ret = kr_ok(); +// +// if (status) { +// struct qr_task *task = session2_waitinglist_get(session); +// if (task) { +// // TLS handshake failed, report it to server selection +// struct kr_query *qry = array_tail(task->ctx->req.rplan.pending); +// qry->server_selection.error(qry, task->transport, KR_SELECTION_TLS_HANDSHAKE_FAILED); +// } +//#ifndef NDEBUG +// else { +// /* Task isn't in the list of tasks +// * waiting for connection to upstream. +// * So that it MUST be unsuccessful rehandshake. +// * Check it. */ +// kr_require(deletion_res != 0); +// struct kr_sockaddr_key_storage key; +// ssize_t keylen = kr_sockaddr_key(&key, peer); +// if (keylen < 0) +// return keylen; +// trie_val_t *val; +// kr_require((val = trie_get_try(the_worker->tcp_connected, key.bytes, keylen)) && *val); +// } +//#endif +// return ret; +// } +// +// /* handshake was completed successfully */ +// struct tls_client_ctx *tls_client_ctx = session_tls_get_client_ctx(session); +// tls_client_param_t *tls_params = tls_client_ctx->params; +// gnutls_session_t tls_session = tls_client_ctx->c.tls_session; +// if (gnutls_session_is_resumed(tls_session) != 0) { +// kr_log_debug(TLSCLIENT, "TLS session has resumed\n"); +// } else { +// kr_log_debug(TLSCLIENT, "TLS session has not resumed\n"); +// /* session wasn't resumed, delete old session data ... */ +// if (tls_params->session_data.data != NULL) { +// gnutls_free(tls_params->session_data.data); +// tls_params->session_data.data = NULL; +// tls_params->session_data.size = 0; +// } +// /* ... and get the new session data */ +// gnutls_datum_t tls_session_data = { NULL, 0 }; +// ret = gnutls_session_get_data2(tls_session, &tls_session_data); +// if (ret == 0) { +// tls_params->session_data = tls_session_data; +// } +// } +// +// struct session2 *s = worker_find_tcp_connected(peer); +// ret = kr_ok(); +// if (deletion_res == kr_ok()) { +// /* peer was in the waiting list, add to the connected list. */ +// if (s) { +// /* Something went wrong, +// * peer already is in the connected list. */ +// ret = kr_error(EINVAL); +// } else { +// ret = worker_add_tcp_connected(peer, session); +// } +// } else { +// /* peer wasn't in the waiting list. +// * It can be +// * 1) either successful rehandshake; in this case peer +// * must be already in the connected list. +// * 2) or successful handshake with session, which was timed out +// * by on_tcp_connect_timeout(); after successful tcp connection; +// * in this case peer isn't in the connected list. +// **/ +// if (!s || s != session) { +// ret = kr_error(EINVAL); +// } +// } +// if (ret == kr_ok()) { +// while (!session_waitinglist_is_empty(session)) { +// struct qr_task *t = session_waitinglist_get(session); +// ret = qr_task_send(t, session, NULL, NULL); +// if (ret != 0) { +// break; +// } +// session_waitinglist_pop(session, true); +// } +// } else { +// ret = kr_error(EINVAL); +// } +// +// if (ret != kr_ok()) { +// /* Something went wrong. +// * Either addition to the list of connected sessions +// * or write to upstream failed. */ +// worker_del_tcp_connected(peer); +// session_waitinglist_finalize(session, KR_STATE_FAIL); +// session_tasklist_finalize(session, KR_STATE_FAIL); +// session_close(session); +// } else { +// session_timer_stop(session); +// session_timer_start(session, tcp_timeout_trigger, +// MAX_TCP_INACTIVITY, MAX_TCP_INACTIVITY); +// } +// return kr_ok(); +//} + +static int send_waiting(struct session2 *session) { int ret = 0; - while (!session_waitinglist_is_empty(session)) { - struct qr_task *t = session_waitinglist_get(session); + while (!session2_waitinglist_is_empty(session)) { + struct qr_task *t = session2_waitinglist_get(session); ret = qr_task_send(t, session, NULL, NULL); if (ret != 0) { - struct sockaddr *peer = session_get_peer(session); - session_waitinglist_finalize(session, KR_STATE_FAIL); - session_tasklist_finalize(session, KR_STATE_FAIL); + struct sockaddr *peer = session2_get_peer(session); + session2_waitinglist_finalize(session, KR_STATE_FAIL); + session2_tasklist_finalize(session, KR_STATE_FAIL); worker_del_tcp_connected(peer); - session_close(session); + session2_unwrap(session, + protolayer_event_nd(PROTOLAYER_EVENT_CLOSE), + NULL, NULL, NULL); break; } - session_waitinglist_pop(session, true); + session2_waitinglist_pop(session, true); } return ret; } @@ -928,16 +947,16 @@ static void on_connect(uv_connect_t *req, int status) { kr_require(the_worker); uv_stream_t *handle = req->handle; - struct session *session = handle->data; - struct sockaddr *peer = session_get_peer(session); + struct session2 *session = handle->data; + struct sockaddr *peer = session2_get_peer(session); free(req); - if (kr_fails_assert(session_flags(session)->outgoing)) + if (kr_fails_assert(session->outgoing)) return; - if (session_flags(session)->closing) { + if (session->closing) { worker_del_tcp_waiting(peer); - kr_assert(session_is_empty(session)); + kr_assert(session2_is_empty(session)); return; } @@ -947,7 +966,7 @@ static void on_connect(uv_connect_t *req, int status) * If no, most likely this is timed out connection * which was removed from waiting list by * on_tcp_connect_timeout() callback. */ - struct session *s = worker_find_tcp_waiting(peer); + struct session2 *s = worker_find_tcp_waiting(peer); if (!s || s != session) { /* session isn't on the waiting list. * it's timed out session. */ @@ -957,9 +976,11 @@ static void on_connect(uv_connect_t *req, int status) "is already timed out, close\n", peer_str ? peer_str : ""); } - kr_assert(session_tasklist_is_empty(session)); - session_waitinglist_retry(session, false); - session_close(session); + kr_assert(session2_tasklist_is_empty(session)); + session2_waitinglist_retry(session, false); + session2_unwrap(session, + protolayer_event_nd(PROTOLAYER_EVENT_CLOSE), + NULL, NULL, NULL); return; } @@ -974,9 +995,11 @@ static void on_connect(uv_connect_t *req, int status) "is already connected, close\n", peer_str ? peer_str : ""); } - kr_assert(session_tasklist_is_empty(session)); - session_waitinglist_retry(session, false); - session_close(session); + kr_assert(session2_tasklist_is_empty(session)); + session2_waitinglist_retry(session, false); + session2_unwrap(session, + protolayer_event_nd(PROTOLAYER_EVENT_CLOSE), + NULL, NULL, NULL); return; } @@ -987,7 +1010,7 @@ static void on_connect(uv_connect_t *req, int status) peer_str ? peer_str : "", uv_strerror(status)); } worker_del_tcp_waiting(peer); - struct qr_task *task = session_waitinglist_get(session); + struct qr_task *task = session2_waitinglist_get(session); if (task && status != UV_ETIMEDOUT) { /* Penalize upstream. * In case of UV_ETIMEDOUT upstream has been @@ -995,21 +1018,25 @@ static void on_connect(uv_connect_t *req, int status) struct kr_query *qry = array_tail(task->ctx->req.rplan.pending); qry->server_selection.error(qry, task->transport, KR_SELECTION_TCP_CONNECT_FAILED); } - kr_assert(session_tasklist_is_empty(session)); - session_waitinglist_retry(session, false); - session_close(session); + kr_assert(session2_tasklist_is_empty(session)); + session2_waitinglist_retry(session, false); + session2_unwrap(session, + protolayer_event_nd(PROTOLAYER_EVENT_CLOSE), + NULL, NULL, NULL); return; } - if (!session_flags(session)->has_tls) { + if (!session->secure) { /* if there is a TLS, session still waiting for handshake, * otherwise remove it from waiting list */ if (worker_del_tcp_waiting(peer) != 0) { /* session isn't in list of waiting queries, * * something gone wrong */ - session_waitinglist_finalize(session, KR_STATE_FAIL); - kr_assert(session_tasklist_is_empty(session)); - session_close(session); + session2_waitinglist_finalize(session, KR_STATE_FAIL); + kr_assert(session2_tasklist_is_empty(session)); + session2_unwrap(session, + protolayer_event_nd(PROTOLAYER_EVENT_CLOSE), + NULL, NULL, NULL); return; } } @@ -1019,46 +1046,48 @@ static void on_connect(uv_connect_t *req, int status) kr_log_debug(WORKER, "=> connected to '%s'\n", peer_str ? peer_str : ""); } - session_flags(session)->connected = true; - session_start_read(session); + /* TODO */ +// session->connected = true; + session2_start_read(session); int ret = kr_ok(); - if (session_flags(session)->has_tls) { - struct tls_client_ctx *tls_ctx = session_tls_get_client_ctx(session); - ret = tls_client_connect_start(tls_ctx, session, session_tls_hs_cb); - if (ret == kr_error(EAGAIN)) { - session_timer_stop(session); - session_timer_start(session, tcp_timeout_trigger, - MAX_TCP_INACTIVITY, MAX_TCP_INACTIVITY); - return; - } - } else { +// if (session->secure) { +// struct tls_client_ctx *tls_ctx = session_tls_get_client_ctx(session); +// ret = tls_client_connect_start(tls_ctx, session, session_tls_hs_cb); +// if (ret == kr_error(EAGAIN)) { +// session_timer_stop(session); +// session_timer_start(session, tcp_timeout_trigger, +// MAX_TCP_INACTIVITY, MAX_TCP_INACTIVITY); +// return; +// } +// } else { worker_add_tcp_connected(peer, session); - } +// } ret = send_waiting(session); if (ret != 0) { return; } - session_timer_stop(session); - session_timer_start(session, tcp_timeout_trigger, - MAX_TCP_INACTIVITY, MAX_TCP_INACTIVITY); + session2_timer_stop(session); + session2_timer_start(session, + MAX_TCP_INACTIVITY, MAX_TCP_INACTIVITY, + PROTOLAYER_UNWRAP); } static void on_tcp_connect_timeout(uv_timer_t *timer) { - struct session *session = timer->data; + struct session2 *session = timer->data; uv_timer_stop(timer); kr_require(the_worker); - kr_assert(session_tasklist_is_empty(session)); + kr_assert(session2_tasklist_is_empty(session)); - struct sockaddr *peer = session_get_peer(session); + struct sockaddr *peer = session2_get_peer(session); worker_del_tcp_waiting(peer); - struct qr_task *task = session_waitinglist_get(session); + struct qr_task *task = session2_waitinglist_get(session); if (!task) { /* Normally shouldn't happen. */ const char *peer_str = kr_straddr(peer); @@ -1076,9 +1105,9 @@ static void on_tcp_connect_timeout(uv_timer_t *timer) qry->server_selection.error(qry, task->transport, KR_SELECTION_TCP_CONNECT_TIMEOUT); - the_worker->stats.timeout += session_waitinglist_get_len(session); - session_waitinglist_retry(session, true); - kr_assert(session_tasklist_is_empty(session)); + the_worker->stats.timeout += session2_waitinglist_get_len(session); + session2_waitinglist_retry(session, true); + kr_assert(session2_tasklist_is_empty(session)); /* uv_cancel() doesn't support uv_connect_t request, * so that we can't cancel it. * There still exists possibility of successful connection @@ -1092,14 +1121,14 @@ static void on_tcp_connect_timeout(uv_timer_t *timer) /* This is called when I/O timeouts */ static void on_udp_timeout(uv_timer_t *timer) { - struct session *session = timer->data; - kr_assert(session_get_handle(session)->data == session); - kr_assert(session_tasklist_get_len(session) == 1); - kr_assert(session_waitinglist_is_empty(session)); + struct session2 *session = timer->data; + kr_assert(session2_get_handle(session)->data == session); + kr_assert(session2_tasklist_get_len(session) == 1); + kr_assert(session2_waitinglist_is_empty(session)); uv_timer_stop(timer); - struct qr_task *task = session_tasklist_get_first(session); + struct qr_task *task = session2_tasklist_get_first(session); if (!task) return; @@ -1133,24 +1162,26 @@ static uv_handle_t *transmit(struct qr_task *task) if (kr_resolve_checkout(&ctx->req, NULL, transport, task->pktbuf) != 0) { return ret; } - ret = ioreq_spawn(SOCK_DGRAM, choice->sin6_family, false, false); + ret = ioreq_spawn(SOCK_DGRAM, choice->sin6_family, PROTOLAYER_GRP_DOUDP); if (!ret) { return ret; } struct sockaddr *addr = (struct sockaddr *)choice; - struct session *session = ret->data; - struct sockaddr *peer = session_get_peer(session); - kr_assert(peer->sa_family == AF_UNSPEC && session_flags(session)->outgoing); + struct session2 *session = ret->data; + struct sockaddr *peer = session2_get_peer(session); + kr_assert(peer->sa_family == AF_UNSPEC && session->outgoing); kr_require(addr->sa_family == AF_INET || addr->sa_family == AF_INET6); memcpy(peer, addr, kr_sockaddr_len(addr)); if (qr_task_send(task, session, (struct sockaddr *)choice, task->pktbuf) != 0) { - session_close(session); + session2_unwrap(session, + protolayer_event_nd(PROTOLAYER_EVENT_CLOSE), + NULL, NULL, NULL); ret = NULL; } else { task->pending[task->pending_count] = session; task->pending_count += 1; - session_start_read(session); /* Start reading answer */ + session2_start_read(session); /* Start reading answer */ } } return ret; @@ -1299,7 +1330,7 @@ static int qr_task_finalize(struct qr_task *task, int state) return kr_ok(); } struct request_ctx *ctx = task->ctx; - struct session *source_session = ctx->source.session; + struct session2 *source_session = ctx->source.session; kr_resolve_finish(&ctx->req, state); task->finished = true; @@ -1317,7 +1348,7 @@ static int qr_task_finalize(struct qr_task *task, int state) return kr_ok(); } - if (session_flags(source_session)->closing || + if (source_session->closing || ctx->source.addr.ip.sa_family == AF_UNSPEC) return kr_error(EINVAL); @@ -1326,7 +1357,8 @@ static int qr_task_finalize(struct qr_task *task, int state) /* Send back answer */ int ret; - const uv_handle_t *src_handle = session_get_handle(source_session); + const uv_handle_t *src_handle = session2_get_handle(source_session); + /* TODO: this should probably just be a _wrap? */ if (kr_fails_assert(src_handle->type == UV_UDP || src_handle->type == UV_TCP || src_handle->type == UV_POLL)) { ret = kr_error(EINVAL); @@ -1346,8 +1378,8 @@ static int qr_task_finalize(struct qr_task *task, int state) if (ret != kr_ok()) { (void) qr_task_on_send(task, NULL, kr_error(EIO)); /* Since source session is erroneous detach all tasks. */ - while (!session_tasklist_is_empty(source_session)) { - struct qr_task *t = session_tasklist_del_first(source_session, false); + while (!session2_tasklist_is_empty(source_session)) { + struct qr_task *t = session2_tasklist_del_first(source_session, false); struct request_ctx *c = t->ctx; kr_assert(c->source.session == source_session); c->source.session = NULL; @@ -1356,7 +1388,9 @@ static int qr_task_finalize(struct qr_task *task, int state) * (ie. task->leading is true) */ worker_task_unref(t); } - session_close(source_session); + session2_unwrap(source_session, + protolayer_event_nd(PROTOLAYER_EVENT_CLOSE), + NULL, NULL, NULL); } qr_task_unref(task); @@ -1388,22 +1422,22 @@ static int udp_task_step(struct qr_task *task, return kr_ok(); } -static int tcp_task_waiting_connection(struct session *session, struct qr_task *task) +static int tcp_task_waiting_connection(struct session2 *session, struct qr_task *task) { - if (kr_fails_assert(session_flags(session)->outgoing && !session_flags(session)->closing)) + if (kr_fails_assert(session->outgoing && !session->closing)) return kr_error(EINVAL); /* Add task to the end of list of waiting tasks. * It will be notified in on_connect() or qr_task_on_send(). */ - int ret = session_waitinglist_push(session, task); + int ret = session2_waitinglist_push(session, task); if (ret < 0) { return kr_error(EINVAL); } return kr_ok(); } -static int tcp_task_existing_connection(struct session *session, struct qr_task *task) +static int tcp_task_existing_connection(struct session2 *session, struct qr_task *task) { - if (kr_fails_assert(session_flags(session)->outgoing && !session_flags(session)->closing)) + if (kr_fails_assert(session->outgoing && !session->closing)) return kr_error(EINVAL); /* If there are any unsent queries, send it first. */ @@ -1413,7 +1447,7 @@ static int tcp_task_existing_connection(struct session *session, struct qr_task } /* No unsent queries at that point. */ - if (session_tasklist_get_len(session) >= the_worker->tcp_pipeline_max) { + if (session2_tasklist_get_len(session) >= the_worker->tcp_pipeline_max) { /* Too many outstanding queries, answer with SERVFAIL, */ return kr_error(EINVAL); } @@ -1423,9 +1457,11 @@ static int tcp_task_existing_connection(struct session *session, struct qr_task if (ret != 0) { /* Error, finalize task with SERVFAIL and * close connection to upstream. */ - session_tasklist_finalize(session, KR_STATE_FAIL); - worker_del_tcp_connected(session_get_peer(session)); - session_close(session); + session2_tasklist_finalize(session, KR_STATE_FAIL); + worker_del_tcp_connected(session2_get_peer(session)); + session2_unwrap(session, + protolayer_event_nd(PROTOLAYER_EVENT_CLOSE), + NULL, NULL, NULL); return kr_error(EINVAL); } @@ -1435,63 +1471,70 @@ static int tcp_task_existing_connection(struct session *session, struct qr_task static int tcp_task_make_connection(struct qr_task *task, const struct sockaddr *addr) { /* Check if there must be TLS */ - struct tls_client_ctx *tls_ctx = NULL; - tls_client_param_t *entry = tls_client_param_get( - the_network->tls_client_params, addr); - if (entry) { - /* Address is configured to be used with TLS. - * We need to allocate auxiliary data structure. */ - tls_ctx = tls_client_ctx_new(entry); - if (!tls_ctx) { - return kr_error(EINVAL); - } - } +// struct tls_client_ctx *tls_ctx = NULL; +// tls_client_param_t *entry = tls_client_param_get( +// the_network->tls_client_params, addr); +// if (entry) { +// /* Address is configured to be used with TLS. +// * We need to allocate auxiliary data structure. */ +// tls_ctx = tls_client_ctx_new(entry); +// if (!tls_ctx) { +// return kr_error(EINVAL); +// } +// } uv_connect_t *conn = malloc(sizeof(uv_connect_t)); if (!conn) { - tls_client_ctx_free(tls_ctx); +// tls_client_ctx_free(tls_ctx); return kr_error(EINVAL); } - bool has_http = false; - bool has_tls = (tls_ctx != NULL); - uv_handle_t *client = ioreq_spawn(SOCK_STREAM, addr->sa_family, has_tls, has_http); - if (!client) { - tls_client_ctx_free(tls_ctx); - free(conn); - return kr_error(EINVAL); - } - struct session *session = client->data; - if (kr_fails_assert(session_flags(session)->has_tls == has_tls)) { - tls_client_ctx_free(tls_ctx); - free(conn); - return kr_error(EINVAL); - } - if (has_tls) { - tls_client_ctx_set_session(tls_ctx, session); - session_tls_set_client_ctx(session, tls_ctx); - } +// bool has_tls = (tls_ctx != NULL); +// uv_handle_t *client = ioreq_spawn(SOCK_STREAM, addr->sa_family, +// (has_tls) ? PROTOLAYER_GRP_DOTLS : PROTOLAYER_GRP_DOTCP); + uv_handle_t *client = ioreq_spawn(SOCK_STREAM, addr->sa_family, + PROTOLAYER_GRP_DOTCP); +// if (!client) { +// tls_client_ctx_free(tls_ctx); +// free(conn); +// return kr_error(EINVAL); +// } + struct session2 *session = client->data; + /* TODO: tls */ +// if (kr_fails_assert(session->secure == has_tls)) { +// tls_client_ctx_free(tls_ctx); +// free(conn); +// return kr_error(EINVAL); +// } +// if (has_tls) { +// tls_client_ctx_set_session(tls_ctx, session); +// session_tls_set_client_ctx(session, tls_ctx); +// } /* Add address to the waiting list. * Now it "is waiting to be connected to." */ int ret = worker_add_tcp_waiting(addr, session); if (ret < 0) { free(conn); - session_close(session); + session2_unwrap(session, + protolayer_event_nd(PROTOLAYER_EVENT_CLOSE), + NULL, NULL, NULL); return kr_error(EINVAL); } conn->data = session; /* Store peer address for the session. */ - struct sockaddr *peer = session_get_peer(session); + struct sockaddr *peer = session2_get_peer(session); memcpy(peer, addr, kr_sockaddr_len(addr)); /* Start watchdog to catch eventual connection timeout. */ - ret = session_timer_start(session, on_tcp_connect_timeout, - KR_CONN_RTT_MAX, 0); + ret = session2_timer_start(session, + KR_CONN_RTT_MAX, 0, PROTOLAYER_UNWRAP); if (ret != 0) { worker_del_tcp_waiting(addr); free(conn); - session_close(session); + session2_unwrap(session, + protolayer_event_nd(PROTOLAYER_EVENT_CLOSE), + NULL, NULL, NULL); return kr_error(EINVAL); } @@ -1504,22 +1547,26 @@ static int tcp_task_make_connection(struct qr_task *task, const struct sockaddr /* Start connection process to upstream. */ ret = uv_tcp_connect(conn, (uv_tcp_t *)client, addr , on_connect); if (ret != 0) { - session_timer_stop(session); + session2_timer_stop(session); worker_del_tcp_waiting(addr); free(conn); - session_close(session); + session2_unwrap(session, + protolayer_event_nd(PROTOLAYER_EVENT_CLOSE), + NULL, NULL, NULL); qry->server_selection.error(qry, task->transport, KR_SELECTION_TCP_CONNECT_FAILED); return kr_error(EAGAIN); } /* Add task to the end of list of waiting tasks. * Will be notified either in on_connect() or in qr_task_on_send(). */ - ret = session_waitinglist_push(session, task); + ret = session2_waitinglist_push(session, task); if (ret < 0) { - session_timer_stop(session); + session2_timer_stop(session); worker_del_tcp_waiting(addr); free(conn); - session_close(session); + session2_unwrap(session, + protolayer_event_nd(PROTOLAYER_EVENT_CLOSE), + NULL, NULL, NULL); return kr_error(EINVAL); } @@ -1550,7 +1597,7 @@ static int tcp_task_step(struct qr_task *task, return qr_task_finalize(task, KR_STATE_FAIL); } int ret; - struct session* session = NULL; + struct session2* session = NULL; if ((session = worker_find_tcp_waiting(addr)) != NULL) { /* Connection is in the list of waiting connections. * It means that connection establishing is coming right now. */ @@ -1665,18 +1712,39 @@ static int qr_task_step(struct qr_task *task, } } -int worker_submit(struct session *session, struct io_comm_data *comm, +static int parse_packet(knot_pkt_t *query) +{ + if (!query){ + return kr_error(EINVAL); + } + + /* Parse query packet. */ + int ret = knot_pkt_parse(query, 0); + if (ret == KNOT_ETRAIL) { + /* Extra data after message end. */ + ret = kr_error(EMSGSIZE); + } else if (ret != KNOT_EOK) { + /* Malformed query. */ + ret = kr_error(EPROTO); + } else { + ret = kr_ok(); + } + + return ret; +} + +int worker_submit(struct session2 *session, struct comm_info *comm, const uint8_t *eth_from, const uint8_t *eth_to, knot_pkt_t *pkt) { if (!session || !pkt) return kr_error(EINVAL); - uv_handle_t *handle = session_get_handle(session); + uv_handle_t *handle = session2_get_handle(session); if (!handle || !handle->loop->data) return kr_error(EINVAL); const bool is_query = (knot_wire_get_qr(pkt->wire) == 0); - const bool is_outgoing = session_flags(session)->outgoing; + const bool is_outgoing = session->outgoing; int ret = knot_pkt_parse(pkt, 0); if (ret == KNOT_ETRAIL && is_outgoing && !kr_fails_assert(pkt->parsed < pkt->size)) @@ -1684,13 +1752,14 @@ int worker_submit(struct session *session, struct io_comm_data *comm, struct http_ctx *http_ctx = NULL; #if ENABLE_DOH2 - http_ctx = session_http_get_server_ctx(session); - - /* Badly formed query when using DoH leads to a Bad Request */ - if (http_ctx && !is_outgoing && ret) { - http_send_status(session, HTTP_STATUS_BAD_REQUEST); - return kr_error(ret); - } + /* TODO: devise a way to do this... don't know yet */ +// http_ctx = session_http_get_server_ctx(session); +// +// /* Badly formed query when using DoH leads to a Bad Request */ +// if (http_ctx && !is_outgoing && ret) { +// http_send_status(session, HTTP_STATUS_BAD_REQUEST); +// return ret; +// } #endif if (!is_outgoing && http_ctx && queue_len(http_ctx->streams) <= 0) @@ -1737,31 +1806,31 @@ int worker_submit(struct session *session, struct io_comm_data *comm, } } else { /* response from upstream */ const uint16_t id = knot_wire_get_id(pkt->wire); - task = session_tasklist_del_msgid(session, id); + task = session2_tasklist_del_msgid(session, id); if (task == NULL) { VERBOSE_MSG(NULL, "=> ignoring packet with mismatching ID %d\n", (int)id); return kr_error(ENOENT); } - if (kr_fails_assert(!session_flags(session)->closing)) + if (kr_fails_assert(!session->closing)) return kr_error(EINVAL); addr = (comm) ? comm->src_addr : NULL; /* Note receive time for RTT calculation */ task->recv_time = kr_now(); } - if (kr_fails_assert(!uv_is_closing(session_get_handle(session)))) + if (kr_fails_assert(!uv_is_closing(session2_get_handle(session)))) return kr_error(EINVAL); /* Packet was successfully parsed. * Task was created (found). */ - session_touch(session); + session2_touch(session); /* Consume input and produce next message */ return qr_task_step(task, addr, pkt); } static int trie_add_tcp_session(trie_t *trie, const struct sockaddr *addr, - struct session *session) + struct session2 *session) { if (kr_fails_assert(trie && addr)) return kr_error(EINVAL); @@ -1788,7 +1857,7 @@ static int trie_del_tcp_session(trie_t *trie, const struct sockaddr *addr) return ret ? kr_error(ENOENT) : kr_ok(); } -static struct session *trie_find_tcp_session(trie_t *trie, +static struct session2 *trie_find_tcp_session(trie_t *trie, const struct sockaddr *addr) { if (kr_fails_assert(trie && addr)) @@ -1801,7 +1870,7 @@ static struct session *trie_find_tcp_session(trie_t *trie, return val ? *val : NULL; } -int worker_add_tcp_connected(const struct sockaddr* addr, struct session *session) +int worker_add_tcp_connected(const struct sockaddr* addr, struct session2 *session) { return trie_add_tcp_session(the_worker->tcp_connected, addr, session); } @@ -1811,13 +1880,13 @@ int worker_del_tcp_connected(const struct sockaddr* addr) return trie_del_tcp_session(the_worker->tcp_connected, addr); } -struct session* worker_find_tcp_connected(const struct sockaddr* addr) +struct session2* worker_find_tcp_connected(const struct sockaddr* addr) { return trie_find_tcp_session(the_worker->tcp_connected, addr); } static int worker_add_tcp_waiting(const struct sockaddr* addr, - struct session *session) + struct session2 *session) { return trie_add_tcp_session(the_worker->tcp_waiting, addr, session); } @@ -1827,41 +1896,28 @@ int worker_del_tcp_waiting(const struct sockaddr* addr) return trie_del_tcp_session(the_worker->tcp_waiting, addr); } -struct session* worker_find_tcp_waiting(const struct sockaddr* addr) +struct session2* worker_find_tcp_waiting(const struct sockaddr* addr) { return trie_find_tcp_session(the_worker->tcp_waiting, addr); } -int worker_end_tcp(struct session *session) +int worker_end_tcp(struct session2 *session) { if (!session) return kr_error(EINVAL); - session_timer_stop(session); + session2_timer_stop(session); - struct sockaddr *peer = session_get_peer(session); + struct sockaddr *peer = session2_get_peer(session); worker_del_tcp_waiting(peer); worker_del_tcp_connected(peer); - session_flags(session)->connected = false; - - struct tls_client_ctx *tls_client_ctx = session_tls_get_client_ctx(session); - if (tls_client_ctx) { - /* Avoid gnutls_bye() call */ - tls_set_hs_state(&tls_client_ctx->c, TLS_HS_NOT_STARTED); - } - struct tls_ctx *tls_ctx = session_tls_get_server_ctx(session); - if (tls_ctx) { - /* Avoid gnutls_bye() call */ - tls_set_hs_state(&tls_ctx->c, TLS_HS_NOT_STARTED); - } - - while (!session_waitinglist_is_empty(session)) { - struct qr_task *task = session_waitinglist_pop(session, false); + while (!session2_waitinglist_is_empty(session)) { + struct qr_task *task = session2_waitinglist_pop(session, false); kr_assert(task->refs > 1); - session_tasklist_del(session, task); - if (session_flags(session)->outgoing) { + session2_tasklist_del(session, task); + if (session->outgoing) { if (task->ctx->req.options.FORWARD) { /* We are in TCP_FORWARD mode. * To prevent failing at kr_resolve_consume() @@ -1879,9 +1935,9 @@ int worker_end_tcp(struct session *session) } worker_task_unref(task); } - while (!session_tasklist_is_empty(session)) { - struct qr_task *task = session_tasklist_del_first(session, false); - if (session_flags(session)->outgoing) { + while (!session2_tasklist_is_empty(session)) { + struct qr_task *task = session2_tasklist_del_first(session, false); + if (session->outgoing) { if (task->ctx->req.options.FORWARD) { struct kr_request *req = &task->ctx->req; struct kr_rplan *rplan = &req->rplan; @@ -1895,8 +1951,25 @@ int worker_end_tcp(struct session *session) } worker_task_unref(task); } - session_close(session); + return kr_ok(); + +// session_flags(session)->connected = false; +// +// struct tls_client_ctx *tls_client_ctx = session_tls_get_client_ctx(session); +// if (tls_client_ctx) { +// /* Avoid gnutls_bye() call */ +// tls_set_hs_state(&tls_client_ctx->c, TLS_HS_NOT_STARTED); +// } +// +// struct tls_ctx *tls_ctx = session_tls_get_server_ctx(session); +// if (tls_ctx) { +// /* Avoid gnutls_bye() call */ +// tls_set_hs_state(&tls_ctx->c, TLS_HS_NOT_STARTED); +// } +// +// session_close(session); +// return kr_ok(); } knot_pkt_t *worker_resolve_mk_pkt_dname(knot_dname_t *qname, uint16_t qtype, uint16_t qclass, @@ -2044,7 +2117,7 @@ struct request_ctx *worker_task_get_request(struct qr_task *task) return task->ctx; } -struct session *worker_request_get_source_session(const struct kr_request *req) +struct session2 *worker_request_get_source_session(const struct kr_request *req) { static_assert(offsetof(struct request_ctx, req) == 0, "Bad struct request_ctx definition."); @@ -2112,12 +2185,253 @@ void worker_deinit(void) the_worker = NULL; } +static inline knot_pkt_t *produce_packet_dgram(char *buf, size_t buf_len) +{ + return knot_pkt_new(buf, buf_len, &the_worker->pkt_pool); +} + +static enum protolayer_cb_result pl_dns_dgram_unwrap( + struct protolayer_data *layer, struct protolayer_cb_ctx *ctx) +{ + struct session2 *session = ctx->manager->session; + + if (ctx->payload.type == PROTOLAYER_PAYLOAD_EVENT) { + /* pass thru */ + return protolayer_continue(ctx); + } + + if (ctx->payload.type == PROTOLAYER_PAYLOAD_IOVEC) { + int ret = kr_ok(); + for (int i = 0; i < ctx->payload.iovec.cnt; i++) { + struct iovec *iov = &ctx->payload.iovec.iov[i]; + knot_pkt_t *pkt = produce_packet_dgram( + iov->iov_base, iov->iov_len); + if (!pkt) { + ret = KNOT_EMALF; + break; + } + + ret = worker_submit(session, &ctx->comm, NULL, NULL, pkt); + if (ret) + break; + } + + return protolayer_break(ctx, ret); + } else if (ctx->payload.type == PROTOLAYER_PAYLOAD_BUFFER) { + knot_pkt_t *pkt = produce_packet_dgram( + ctx->payload.buffer.buf, + ctx->payload.buffer.len); + if (!pkt) + return protolayer_break(ctx, KNOT_EMALF); + + int ret = worker_submit(session, &ctx->comm, NULL, NULL, pkt); + return protolayer_break(ctx, ret); + } else if (ctx->payload.type == PROTOLAYER_PAYLOAD_WIRE_BUF) { + knot_pkt_t *pkt = produce_packet_dgram( + wire_buf_data(ctx->payload.wire_buf), + wire_buf_data_length(ctx->payload.wire_buf)); + if (!pkt) + return protolayer_break(ctx, KNOT_EMALF); + + int ret = worker_submit(session, &ctx->comm, NULL, NULL, pkt); + wire_buf_reset(ctx->payload.wire_buf); + return protolayer_break(ctx, ret); + } else { + kr_assert(false && "Invalid payload"); + return protolayer_break(ctx, kr_error(EINVAL)); + } +} + +struct pl_dns_stream_sess_data { + bool single : 1; /**< True: Stream only allows a single packet */ + bool produced : 1; /**< True: At least one packet has been produced */ +}; + +struct pl_dns_stream_iter_data { + struct { + knot_mm_t *pool; + void *mem; + } sent; +}; + +static void pl_dns_stream_sess_init_common(struct pl_dns_stream_sess_data *stream, + bool single) +{ + *stream = (struct pl_dns_stream_sess_data){ + .single = single + }; +} + +static int pl_dns_mstream_sess_init(struct protolayer_manager *manager, + struct protolayer_data *layer) +{ + struct pl_dns_stream_sess_data *stream = protolayer_sess_data(layer); + pl_dns_stream_sess_init_common(stream, false); + return kr_ok(); +} + +static int pl_dns_sstream_sess_init(struct protolayer_manager *manager, + struct protolayer_data *layer) +{ + struct pl_dns_stream_sess_data *stream = protolayer_sess_data(layer); + pl_dns_stream_sess_init_common(stream, true); + return kr_ok(); +} + +static int pl_dns_stream_iter_init(struct protolayer_manager *manager, + struct protolayer_data *layer) +{ + struct pl_dns_stream_iter_data *stream = protolayer_iter_data(layer); + *stream = (struct pl_dns_stream_iter_data){0}; + return kr_ok(); +} + +static int pl_dns_stream_iter_deinit(struct protolayer_manager *manager, + struct protolayer_data *layer) +{ + struct pl_dns_stream_iter_data *stream = protolayer_iter_data(layer); + mm_free(stream->sent.pool, stream->sent.mem); + return kr_ok(); +} + +static enum protolayer_cb_result pl_dns_stream_unwrap( + struct protolayer_data *layer, struct protolayer_cb_ctx *ctx) +{ + if (ctx->payload.type == PROTOLAYER_PAYLOAD_EVENT) { + /* pass thru */ + return protolayer_continue(ctx); + } + + if (kr_fails_assert(ctx->payload.type == PROTOLAYER_PAYLOAD_WIRE_BUF)) { + /* DNS stream only works with a wire buffer */ + return protolayer_break(ctx, kr_error(EINVAL)); + } + + struct pl_dns_stream_sess_data *stream = protolayer_sess_data(layer); + + if (stream->single && stream->produced) { + if (kr_log_is_debug(WORKER, NULL)) { + kr_log_debug(WORKER, "Unexpected extra data from %s\n", + kr_straddr(ctx->comm.src_addr)); + } + return protolayer_break(ctx, KNOT_EMALF); + } + + struct wire_buf *wb = ctx->payload.wire_buf; + size_t pkt_len = ntohs(*(uint16_t *)wire_buf_data(wb)); + if (wire_buf_data_length(wb) < pkt_len + sizeof(uint16_t)) + return protolayer_wait(ctx); + + wire_buf_trim(wb, sizeof(uint16_t)); + knot_pkt_t *pkt = produce_packet_dgram(wire_buf_data(wb), pkt_len); + wire_buf_trim(wb, pkt_len); + stream->produced = true; + if (!pkt) + return protolayer_break(ctx, KNOT_EMALF); + + int ret = worker_submit(ctx->manager->session, &ctx->comm, NULL, NULL, pkt); + return protolayer_break(ctx, ret); +} + +struct sized_iovs { + uint16_t nlen; + struct iovec iovs[]; +}; + +static enum protolayer_cb_result pl_dns_stream_wrap( + struct protolayer_data *layer, struct protolayer_cb_ctx *ctx) +{ + if (ctx->payload.type == PROTOLAYER_PAYLOAD_EVENT) { + /* pass thru */ + return protolayer_continue(ctx); + } + + struct pl_dns_stream_iter_data *stream = protolayer_iter_data(layer); + struct session2 *s = ctx->manager->session; + + if (kr_fails_assert(!stream->sent.mem)) + return protolayer_break(ctx, kr_error(EINVAL)); + + if (ctx->payload.type == PROTOLAYER_PAYLOAD_BUFFER) { + if (kr_fails_assert(ctx->payload.buffer.len <= UINT16_MAX)) + return protolayer_break(ctx, kr_error(EMSGSIZE)); + + const int iovcnt = 2; + struct sized_iovs *siov = mm_alloc(&s->pool, + sizeof(*siov) + iovcnt * sizeof(struct iovec)); + kr_require(siov); + siov->nlen = htons(ctx->payload.buffer.len); + siov->iovs[0] = (struct iovec){ + .iov_base = &siov->nlen, + .iov_len = sizeof(siov->nlen) + }; + siov->iovs[1] = (struct iovec){ + .iov_base = ctx->payload.buffer.buf, + .iov_len = ctx->payload.buffer.len + }; + + stream->sent.mem = siov; + stream->sent.pool = &s->pool; + + ctx->payload = protolayer_iovec(siov->iovs, iovcnt); + return protolayer_continue(ctx); + } else if (ctx->payload.type == PROTOLAYER_PAYLOAD_IOVEC) { + const int iovcnt = 1 + ctx->payload.iovec.cnt; + struct sized_iovs *siov = mm_alloc(&s->pool, + sizeof(*siov) + iovcnt * sizeof(struct iovec)); + kr_require(siov); + + size_t total_len = 0; + for (int i = 0; i < ctx->payload.iovec.cnt; i++) { + const struct iovec *iov = &ctx->payload.iovec.iov[i]; + total_len += iov->iov_len; + siov->iovs[i + 1] = *iov; + } + + if (kr_fails_assert(total_len <= UINT16_MAX)) + return protolayer_break(ctx, kr_error(EMSGSIZE)); + siov->nlen = htons(total_len); + siov->iovs[0] = (struct iovec){ + .iov_base = &siov->nlen, + .iov_len = sizeof(siov->nlen) + }; + + stream->sent.mem = siov; + stream->sent.pool = &s->pool; + + ctx->payload = protolayer_iovec(siov->iovs, iovcnt); + return protolayer_continue(ctx); + } else { + kr_assert(false && "Invalid payload"); + return protolayer_break(ctx, kr_error(EINVAL)); + } +} + + int worker_init(void) { if (kr_fails_assert(the_worker == NULL)) return kr_error(EINVAL); kr_bindings_register(the_engine->L); // TODO move + /* DNS protocol layers */ + protolayer_globals[PROTOLAYER_DNS_DGRAM] = (struct protolayer_globals){ + .unwrap = pl_dns_dgram_unwrap, + }; + const struct protolayer_globals stream_common = { + .sess_size = sizeof(struct pl_dns_stream_sess_data), + .sess_init = NULL, /* replaced in specific layers below */ + .iter_size = sizeof(struct pl_dns_stream_iter_data), + .iter_init = pl_dns_stream_iter_init, + .iter_deinit = pl_dns_stream_iter_deinit, + .unwrap = pl_dns_stream_unwrap, + .wrap = pl_dns_stream_wrap + }; + protolayer_globals[PROTOLAYER_DNS_MSTREAM] = stream_common; + protolayer_globals[PROTOLAYER_DNS_MSTREAM].sess_init = pl_dns_mstream_sess_init; + protolayer_globals[PROTOLAYER_DNS_SSTREAM] = stream_common; + protolayer_globals[PROTOLAYER_DNS_SSTREAM].sess_init = pl_dns_sstream_sess_init; + /* Create main worker. */ the_worker = &the_worker_value; memset(the_worker, 0, sizeof(*the_worker)); diff --git a/daemon/worker.h b/daemon/worker.h index ee9677c44..40e1df0c2 100644 --- a/daemon/worker.h +++ b/daemon/worker.h @@ -11,14 +11,14 @@ /** Query resolution task (opaque). */ struct qr_task; -/** Worker state (opaque). */ +/** Worker state. */ struct worker_ctx; /** Transport session (opaque). */ -struct session; +struct session2; /** Zone import context (opaque). */ struct zone_import_ctx; /** Data about the communication (defined in io.h). */ -struct io_comm_data; +struct comm_info; /** Pointer to the singleton worker. NULL if not initialized. */ KR_EXPORT extern struct worker_ctx *the_worker; @@ -39,14 +39,14 @@ void worker_deinit(); * @param pkt the packet, or NULL (an error from the transport layer) * @return 0 or an error code */ -int worker_submit(struct session *session, struct io_comm_data *comm, +int worker_submit(struct session2 *session, struct comm_info *comm, const uint8_t *eth_from, const uint8_t *eth_to, knot_pkt_t *pkt); /** * End current DNS/TCP session, this disassociates pending tasks from this session * which may be freely closed afterwards. */ -int worker_end_tcp(struct session *session); +int worker_end_tcp(struct session2 *session); KR_EXPORT knot_pkt_t *worker_resolve_mk_pkt_dname(knot_dname_t *qname, uint16_t qtype, uint16_t qclass, const struct kr_qflags *options); @@ -93,17 +93,17 @@ void worker_task_unref(struct qr_task *task); void worker_task_timeout_inc(struct qr_task *task); -int worker_add_tcp_connected(const struct sockaddr *addr, struct session *session); +int worker_add_tcp_connected(const struct sockaddr *addr, struct session2 *session); int worker_del_tcp_connected(const struct sockaddr *addr); int worker_del_tcp_waiting(const struct sockaddr* addr); -struct session* worker_find_tcp_waiting(const struct sockaddr* addr); -struct session* worker_find_tcp_connected(const struct sockaddr* addr); +struct session2* worker_find_tcp_waiting(const struct sockaddr* addr); +struct session2* worker_find_tcp_connected(const struct sockaddr* addr); knot_pkt_t *worker_task_get_pktbuf(const struct qr_task *task); struct request_ctx *worker_task_get_request(struct qr_task *task); /** Note: source session is NULL in case the request hasn't come over network. */ -KR_EXPORT struct session *worker_request_get_source_session(const struct kr_request *req); +KR_EXPORT struct session2 *worker_request_get_source_session(const struct kr_request *req); uint16_t worker_task_pkt_get_msgid(struct qr_task *task); void worker_task_pkt_set_msgid(struct qr_task *task, uint16_t msgid); @@ -163,8 +163,6 @@ struct worker_ctx { struct sockaddr_in out_addr4; struct sockaddr_in6 out_addr6; - uint8_t wire_buf[RECVMMSG_BATCH * KNOT_WIRE_MAX_PKTSIZE]; - struct worker_stats stats; bool too_many_open; diff --git a/modules/dnstap/dnstap.c b/modules/dnstap/dnstap.c index 757266728..ab52bca3d 100644 --- a/modules/dnstap/dnstap.c +++ b/modules/dnstap/dnstap.c @@ -10,7 +10,7 @@ #include "modules/dnstap/dnstap.pb-c.h" #include "contrib/cleanup.h" -#include "daemon/session.h" +#include "daemon/session2.h" #include "daemon/worker.h" #include "lib/layer.h" #include "lib/resolve.h" @@ -116,7 +116,7 @@ static int get_tcp_info(const struct kr_request *req, struct tcp_info *info) if (!req->qsource.dst_addr || !req->qsource.flags.tcp) /* not TCP-based */ return -abs(ENOENT); /* First obtain the file-descriptor. */ - uv_handle_t *h = session_get_handle(worker_request_get_source_session(req)); + uv_handle_t *h = session2_get_handle(worker_request_get_source_session(req)); uv_os_fd_t fd; int ret = uv_fileno(h, &fd); if (ret)