From 16a6bc5a7a5da2482d96f7dc43da360ceab1c320 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Wed, 7 Dec 2022 23:39:56 +0900 Subject: [PATCH] resolve: dedup entries in /etc/hosts This improves the performance of parsing the file and reduces memory pressure. Running 'fuzz-etc-hosts timeout-strv' with valgrind, Before: total heap usage: 321,020 allocs, 321,020 frees, 15,820,387,193 bytes allocated real 0m23.531s user 0m21.458s sys 0m1.961s After: total heap usage: 112,408 allocs, 112,408 frees, 7,297,480 bytes allocated real 0m8.664s user 0m8.545s sys 0m0.065s Hopefully fixes oss-fuzz#47708 (https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=47708). --- src/resolve/resolved-etc-hosts.c | 69 +++++++++++++++--------- src/resolve/resolved-etc-hosts.h | 8 +-- src/resolve/test-resolved-etc-hosts.c | 45 +++++++--------- test/fuzz/fuzz-etc-hosts/oss-fuzz-47708 | Bin 0 -> 314200 bytes 4 files changed, 64 insertions(+), 58 deletions(-) create mode 100644 test/fuzz/fuzz-etc-hosts/oss-fuzz-47708 diff --git a/src/resolve/resolved-etc-hosts.c b/src/resolve/resolved-etc-hosts.c index 2c002a3be40..334778706a9 100644 --- a/src/resolve/resolved-etc-hosts.c +++ b/src/resolve/resolved-etc-hosts.c @@ -22,7 +22,7 @@ static EtcHostsItemByAddress *etc_hosts_item_by_address_free(EtcHostsItemByAddre if (!item) return NULL; - strv_free(item->names); + set_free(item->names); return mfree(item); } @@ -41,7 +41,7 @@ static EtcHostsItemByName *etc_hosts_item_by_name_free(EtcHostsItemByName *item) return NULL; free(item->name); - free(item->addresses); + set_free(item->addresses); return mfree(item); } @@ -153,20 +153,21 @@ static int parse_line(EtcHosts *hosts, unsigned nr, const char *line) { continue; } - r = strv_extend_with_size(&item->names, &item->n_names, name); - if (r < 0) - return log_oom(); - bn = hashmap_get(hosts->by_name, name); if (!bn) { _cleanup_(etc_hosts_item_by_name_freep) EtcHostsItemByName *new_item = NULL; + _cleanup_free_ char *name_copy = NULL; + + name_copy = strdup(name); + if (!name_copy) + return log_oom(); new_item = new(EtcHostsItemByName, 1); if (!new_item) return log_oom(); *new_item = (EtcHostsItemByName) { - .name = TAKE_PTR(name), + .name = TAKE_PTR(name_copy), }; r = hashmap_ensure_put(&hosts->by_name, &by_name_hash_ops, new_item->name, new_item); @@ -176,10 +177,21 @@ static int parse_line(EtcHosts *hosts, unsigned nr, const char *line) { bn = TAKE_PTR(new_item); } - if (!GREEDY_REALLOC(bn->addresses, bn->n_addresses + 1)) - return log_oom(); + if (!set_contains(bn->addresses, &address)) { + _cleanup_free_ struct in_addr_data *address_copy = NULL; + + address_copy = newdup(struct in_addr_data, &address, 1); + if (!address_copy) + return log_oom(); - bn->addresses[bn->n_addresses++] = &item->address; + r = set_ensure_consume(&bn->addresses, &in_addr_data_hash_ops_free, TAKE_PTR(address_copy)); + if (r < 0) + return log_oom(); + } + + r = set_ensure_consume(&item->names, &dns_name_hash_ops_free, TAKE_PTR(name)); + if (r < 0) + return log_oom(); } if (!found) @@ -217,6 +229,7 @@ static void strip_localhost(EtcHosts *hosts) { for (size_t j = 0; j < ELEMENTSOF(local_in_addrs); j++) { bool all_localhost, all_local_address; EtcHostsItemByAddress *item; + const char *name; item = hashmap_get(hosts->by_address, local_in_addrs + j); if (!item) @@ -224,8 +237,8 @@ static void strip_localhost(EtcHosts *hosts) { /* Check whether all hostnames the loopback address points to are localhost ones */ all_localhost = true; - STRV_FOREACH(i, item->names) - if (!is_localhost(*i)) { + SET_FOREACH(name, item->names) + if (!is_localhost(name)) { all_localhost = false; break; } @@ -236,17 +249,18 @@ static void strip_localhost(EtcHosts *hosts) { /* Now check if the names listed for this address actually all point back just to this * address (or the other loopback address). If not, let's stay away from this too. */ all_local_address = true; - STRV_FOREACH(i, item->names) { + SET_FOREACH(name, item->names) { EtcHostsItemByName *n; + struct in_addr_data *a; - n = hashmap_get(hosts->by_name, *i); + n = hashmap_get(hosts->by_name, name); if (!n) /* No reverse entry? Then almost certainly the entry already got deleted from * the previous iteration of this loop, i.e. via the other protocol */ break; /* Now check if the addresses of this item are all localhost addresses */ - for (size_t m = 0; m < n->n_addresses; m++) - if (!in_addr_is_localhost(n->addresses[m]->family, &n->addresses[m]->address)) { + SET_FOREACH(a, n->addresses) + if (!in_addr_is_localhost(a->family, &a->address)) { all_local_address = false; break; } @@ -258,8 +272,8 @@ static void strip_localhost(EtcHosts *hosts) { if (!all_local_address) continue; - STRV_FOREACH(i, item->names) - etc_hosts_item_by_name_free(hashmap_remove(hosts->by_name, *i)); + SET_FOREACH(name, item->names) + etc_hosts_item_by_name_free(hashmap_remove(hosts->by_name, name)); assert_se(hashmap_remove(hosts->by_address, local_in_addrs + j) == item); etc_hosts_item_by_address_free(item); @@ -397,18 +411,20 @@ static int etc_hosts_lookup_by_address( } if (found_ptr) { - r = dns_answer_reserve(answer, item->n_names); + const char *n; + + r = dns_answer_reserve(answer, set_size(item->names)); if (r < 0) return r; - STRV_FOREACH(n, item->names) { + SET_FOREACH(n, item->names) { _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL; rr = dns_resource_record_new(found_ptr); if (!rr) return -ENOMEM; - rr->ptr.name = strdup(*n); + rr->ptr.name = strdup(n); if (!rr->ptr.name) return -ENOMEM; @@ -428,6 +444,7 @@ static int etc_hosts_lookup_by_name( DnsAnswer **answer) { bool found_a = false, found_aaaa = false; + const struct in_addr_data *a; EtcHostsItemByName *item; DnsResourceKey *t; int r; @@ -439,7 +456,7 @@ static int etc_hosts_lookup_by_name( item = hashmap_get(hosts->by_name, name); if (item) { - r = dns_answer_reserve(answer, item->n_addresses); + r = dns_answer_reserve(answer, set_size(item->addresses)); if (r < 0) return r; } else { @@ -469,14 +486,14 @@ static int etc_hosts_lookup_by_name( break; } - for (unsigned i = 0; item && i < item->n_addresses; i++) { + SET_FOREACH(a, item->addresses) { _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL; - if ((!found_a && item->addresses[i]->family == AF_INET) || - (!found_aaaa && item->addresses[i]->family == AF_INET6)) + if ((!found_a && a->family == AF_INET) || + (!found_aaaa && a->family == AF_INET6)) continue; - r = dns_resource_record_new_address(&rr, item->addresses[i]->family, &item->addresses[i]->address, item->name); + r = dns_resource_record_new_address(&rr, a->family, &a->address, item->name); if (r < 0) return r; diff --git a/src/resolve/resolved-etc-hosts.h b/src/resolve/resolved-etc-hosts.h index 55f884746fc..e1a7249f298 100644 --- a/src/resolve/resolved-etc-hosts.h +++ b/src/resolve/resolved-etc-hosts.h @@ -7,16 +7,12 @@ typedef struct EtcHostsItemByAddress { struct in_addr_data address; - - char **names; - size_t n_names; + Set *names; } EtcHostsItemByAddress; typedef struct EtcHostsItemByName { char *name; - - struct in_addr_data **addresses; - size_t n_addresses; + Set *addresses; } EtcHostsItemByName; int etc_hosts_parse(EtcHosts *hosts, FILE *f); diff --git a/src/resolve/test-resolved-etc-hosts.c b/src/resolve/test-resolved-etc-hosts.c index a0f712cbb64..19b2991a356 100644 --- a/src/resolve/test-resolved-etc-hosts.c +++ b/src/resolve/test-resolved-etc-hosts.c @@ -27,13 +27,11 @@ TEST(parse_etc_hosts_system) { assert_se(etc_hosts_parse(&hosts, f) == 0); } -#define address_equal_4(_addr, _address) \ - ((_addr)->family == AF_INET && \ - !memcmp(&(_addr)->address.in, &(struct in_addr) { .s_addr = (_address) }, 4)) +#define has_4(_set, _address_str) \ + set_contains(_set, &(struct in_addr_data) { .family = AF_INET, .address.in = { .s_addr = inet_addr(_address_str) } }) -#define address_equal_6(_addr, ...) \ - ((_addr)->family == AF_INET6 && \ - !memcmp(&(_addr)->address.in6, &(struct in6_addr) { .s6_addr = __VA_ARGS__}, 16) ) +#define has_6(_set, ...) \ + set_contains(_set, &(struct in_addr_data) { .family = AF_INET6, .address.in6 = { .s6_addr = __VA_ARGS__ } }) TEST(parse_etc_hosts) { _cleanup_(unlink_tempfilep) char @@ -72,33 +70,29 @@ TEST(parse_etc_hosts) { EtcHostsItemByName *bn; assert_se(bn = hashmap_get(hosts.by_name, "some.where")); - assert_se(bn->n_addresses == 3); - assert_se(MALLOC_ELEMENTSOF(bn->addresses) >= 3); - assert_se(address_equal_4(bn->addresses[0], inet_addr("1.2.3.4"))); - assert_se(address_equal_4(bn->addresses[1], inet_addr("1.2.3.5"))); - assert_se(address_equal_6(bn->addresses[2], {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5})); + assert_se(set_size(bn->addresses) == 3); + assert_se(has_4(bn->addresses, "1.2.3.4")); + assert_se(has_4(bn->addresses, "1.2.3.5")); + assert_se(has_6(bn->addresses, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5})); assert_se(bn = hashmap_get(hosts.by_name, "dash")); - assert_se(bn->n_addresses == 1); - assert_se(MALLOC_ELEMENTSOF(bn->addresses) >= 1); - assert_se(address_equal_4(bn->addresses[0], inet_addr("1.2.3.6"))); + assert_se(set_size(bn->addresses) == 1); + assert_se(has_4(bn->addresses, "1.2.3.6")); assert_se(bn = hashmap_get(hosts.by_name, "dash-dash.where-dash")); - assert_se(bn->n_addresses == 1); - assert_se(MALLOC_ELEMENTSOF(bn->addresses) >= 1); - assert_se(address_equal_4(bn->addresses[0], inet_addr("1.2.3.6"))); + assert_se(set_size(bn->addresses) == 1); + assert_se(has_4(bn->addresses, "1.2.3.6")); /* See https://tools.ietf.org/html/rfc1035#section-2.3.1 */ FOREACH_STRING(s, "bad-dash-", "-bad-dash", "-bad-dash.bad-") assert_se(!hashmap_get(hosts.by_name, s)); assert_se(bn = hashmap_get(hosts.by_name, "before.comment")); - assert_se(bn->n_addresses == 4); - assert_se(MALLOC_ELEMENTSOF(bn->addresses) >= 4); - assert_se(address_equal_4(bn->addresses[0], inet_addr("1.2.3.9"))); - assert_se(address_equal_4(bn->addresses[1], inet_addr("1.2.3.10"))); - assert_se(address_equal_4(bn->addresses[2], inet_addr("1.2.3.11"))); - assert_se(address_equal_4(bn->addresses[3], inet_addr("1.2.3.12"))); + assert_se(set_size(bn->addresses) == 4); + assert_se(has_4(bn->addresses, "1.2.3.9")); + assert_se(has_4(bn->addresses, "1.2.3.10")); + assert_se(has_4(bn->addresses, "1.2.3.11")); + assert_se(has_4(bn->addresses, "1.2.3.12")); assert_se(!hashmap_get(hosts.by_name, "within.comment")); assert_se(!hashmap_get(hosts.by_name, "within.comment2")); @@ -113,9 +107,8 @@ TEST(parse_etc_hosts) { assert_se(!set_contains(hosts.no_address, "multi.colon")); assert_se(bn = hashmap_get(hosts.by_name, "some.other")); - assert_se(bn->n_addresses == 1); - assert_se(MALLOC_ELEMENTSOF(bn->addresses) >= 1); - assert_se(address_equal_6(bn->addresses[0], {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5})); + assert_se(set_size(bn->addresses) == 1); + assert_se(has_6(bn->addresses, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5})); assert_se( set_contains(hosts.no_address, "some.where")); assert_se( set_contains(hosts.no_address, "some.other")); diff --git a/test/fuzz/fuzz-etc-hosts/oss-fuzz-47708 b/test/fuzz/fuzz-etc-hosts/oss-fuzz-47708 new file mode 100644 index 0000000000000000000000000000000000000000..c0f59de6c9481fb527dca38fb0b89c594b04882c GIT binary patch literal 314200 zc-rmUL245L0EOW!x@Z@u&}VOu&}VOu&}VOu&}VOu&}VOu&}VOu&^r~F*htM zEG#T6EG#T6EG+DEV7;3V78Vv378Vv37Iwo}JHo=k!otGB!otGB!otGB!otGB!otGB z!otGB!otGB!otGB!otGB!otGB!otGB!otGB!otGB!otGB!otGB!otGB!ooI-?OGNV z_OTNmSr!%+78Vv378Vv3c3DzwSy)(DSXfwCSlHHLOVh%_!otGB!otGB!otGBE_-4N z!NS7A!otGB!otGB!otGB!otGB!otGB!otGB!oqF~Ye!gESXfwCSXfwCSXfwCSXfwC zSXfwCSXfwCSXfwCSXfwCSXfwCSXfwCSXfwCSXfwCSXfwCSXfwCSXfwCSXfwCSXkI* zv0cl;!otGB!otGB!otGB!mf?2Eei{~CbpkCQ}e>Y!otGB?k~19Ei5c7EG#T6EG#T6 zEG#T6EG#T6EG#T6EG#T6EG#VSYFLZG!otGB!otGB!otGB!otGB!otGB!otGB!otGB z!otEH6t-0pi7mF+Vv8;IGyWul&OmH;c5|_7u|3Ac7F%qw#THv^vBef!Y_Y`_TWqn# z7F%qw#THv^vBef!Y_Y`_TWqn#7F%qwcOtg;guueW!otGB!otGB!otGB!Y;nI7%VI- zEG#T6EG#T6EG#T6ENrW=XPw1|We*8!v7NxymW73dg@uKMg@tV`wlpm)EG#T6EG#T6 zEG+DyV~;JnF|37PVPRomVPRomVPRomVPRomVPRomVPRomVPRomVYh{~BP=W|EG#T6 zEG#T6EG#T6EG#T6EG#T6EG#T6EG#T6EG#T6EG#T6EG#T6EG#T6EG#T6EG#T6EG#T6 zEG#T6EG#T6ENrvbu4Q3iVPRomVPRomVPRn##Fp^78Vv378Vv378Vv378Vv378Vv378Vv378Vv3_Mot> znn-N1#THv^v7hlL8FU6>yR(~%U5o88F1FZWi!HX;Vv8-d*kX$1Pcob3kwSi3kwSi3kwSi3kwSi3kwSi3kwSi3k$m~tQ}!tVPRomVPRomVPRom zVPRomVPRomVPRomVPRomVPRomVPRomVPRomVPRomVPRomVPRomVPRomVPRomVPRom zVPRpL#da+V3kwSi3kwSi3kwSi+c360W3>GmY}J)7w%B5eEwkTo>befNu-liG}?=JV;-=QnQW$HI0u|1Pq5pI96 z#TMJM-WB2Y7yH0s=gaS-FXaAzY_B>gJEaGJE8>yZTCaZAoK(GD`R8w=_Ud8(3n&yq ABLDyZ literal 0 Hc-jL100001 -- 2.47.3