]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
resolve: dedup entries in /etc/hosts 25658/head
authorYu Watanabe <watanabe.yu+github@gmail.com>
Wed, 7 Dec 2022 14:39:56 +0000 (23:39 +0900)
committerYu Watanabe <watanabe.yu+github@gmail.com>
Tue, 13 Dec 2022 11:37:48 +0000 (20:37 +0900)
This improves the performance of parsing the file and reduces memory pressure.

Running 'fuzz-etc-hosts timeout-strv' with valgrind,

Before:
total heap usage: 321,020 allocs, 321,020 frees, 15,820,387,193 bytes allocated
real    0m23.531s
user    0m21.458s
sys     0m1.961s

After:
total heap usage: 112,408 allocs, 112,408 frees, 7,297,480 bytes allocated
real    0m8.664s
user    0m8.545s
sys     0m0.065s

Hopefully fixes oss-fuzz#47708 (https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=47708).

src/resolve/resolved-etc-hosts.c
src/resolve/resolved-etc-hosts.h
src/resolve/test-resolved-etc-hosts.c
test/fuzz/fuzz-etc-hosts/oss-fuzz-47708 [new file with mode: 0644]

index 2c002a3be40dec1e408aa5203b21d39b657424f9..334778706a9a1c639084572be62bbc2d9b1e2937 100644 (file)
@@ -22,7 +22,7 @@ static EtcHostsItemByAddress *etc_hosts_item_by_address_free(EtcHostsItemByAddre
         if (!item)
                 return NULL;
 
-        strv_free(item->names);
+        set_free(item->names);
         return mfree(item);
 }
 
@@ -41,7 +41,7 @@ static EtcHostsItemByName *etc_hosts_item_by_name_free(EtcHostsItemByName *item)
                 return NULL;
 
         free(item->name);
-        free(item->addresses);
+        set_free(item->addresses);
         return mfree(item);
 }
 
@@ -153,20 +153,21 @@ static int parse_line(EtcHosts *hosts, unsigned nr, const char *line) {
                         continue;
                 }
 
-                r = strv_extend_with_size(&item->names, &item->n_names, name);
-                if (r < 0)
-                        return log_oom();
-
                 bn = hashmap_get(hosts->by_name, name);
                 if (!bn) {
                         _cleanup_(etc_hosts_item_by_name_freep) EtcHostsItemByName *new_item = NULL;
+                        _cleanup_free_ char *name_copy = NULL;
+
+                        name_copy = strdup(name);
+                        if (!name_copy)
+                                return log_oom();
 
                         new_item = new(EtcHostsItemByName, 1);
                         if (!new_item)
                                 return log_oom();
 
                         *new_item = (EtcHostsItemByName) {
-                                .name = TAKE_PTR(name),
+                                .name = TAKE_PTR(name_copy),
                         };
 
                         r = hashmap_ensure_put(&hosts->by_name, &by_name_hash_ops, new_item->name, new_item);
@@ -176,10 +177,21 @@ static int parse_line(EtcHosts *hosts, unsigned nr, const char *line) {
                         bn = TAKE_PTR(new_item);
                 }
 
-                if (!GREEDY_REALLOC(bn->addresses, bn->n_addresses + 1))
-                        return log_oom();
+                if (!set_contains(bn->addresses, &address)) {
+                        _cleanup_free_ struct in_addr_data *address_copy = NULL;
+
+                        address_copy = newdup(struct in_addr_data, &address, 1);
+                        if (!address_copy)
+                                return log_oom();
 
-                bn->addresses[bn->n_addresses++] = &item->address;
+                        r = set_ensure_consume(&bn->addresses, &in_addr_data_hash_ops_free, TAKE_PTR(address_copy));
+                        if (r < 0)
+                                return log_oom();
+                }
+
+                r = set_ensure_consume(&item->names, &dns_name_hash_ops_free, TAKE_PTR(name));
+                if (r < 0)
+                        return log_oom();
         }
 
         if (!found)
@@ -217,6 +229,7 @@ static void strip_localhost(EtcHosts *hosts) {
         for (size_t j = 0; j < ELEMENTSOF(local_in_addrs); j++) {
                 bool all_localhost, all_local_address;
                 EtcHostsItemByAddress *item;
+                const char *name;
 
                 item = hashmap_get(hosts->by_address, local_in_addrs + j);
                 if (!item)
@@ -224,8 +237,8 @@ static void strip_localhost(EtcHosts *hosts) {
 
                 /* Check whether all hostnames the loopback address points to are localhost ones */
                 all_localhost = true;
-                STRV_FOREACH(i, item->names)
-                        if (!is_localhost(*i)) {
+                SET_FOREACH(name, item->names)
+                        if (!is_localhost(name)) {
                                 all_localhost = false;
                                 break;
                         }
@@ -236,17 +249,18 @@ static void strip_localhost(EtcHosts *hosts) {
                 /* Now check if the names listed for this address actually all point back just to this
                  * address (or the other loopback address). If not, let's stay away from this too. */
                 all_local_address = true;
-                STRV_FOREACH(i, item->names) {
+                SET_FOREACH(name, item->names) {
                         EtcHostsItemByName *n;
+                        struct in_addr_data *a;
 
-                        n = hashmap_get(hosts->by_name, *i);
+                        n = hashmap_get(hosts->by_name, name);
                         if (!n) /* No reverse entry? Then almost certainly the entry already got deleted from
                                  * the previous iteration of this loop, i.e. via the other protocol */
                                 break;
 
                         /* Now check if the addresses of this item are all localhost addresses */
-                        for (size_t m = 0; m < n->n_addresses; m++)
-                                if (!in_addr_is_localhost(n->addresses[m]->family, &n->addresses[m]->address)) {
+                        SET_FOREACH(a, n->addresses)
+                                if (!in_addr_is_localhost(a->family, &a->address)) {
                                         all_local_address = false;
                                         break;
                                 }
@@ -258,8 +272,8 @@ static void strip_localhost(EtcHosts *hosts) {
                 if (!all_local_address)
                         continue;
 
-                STRV_FOREACH(i, item->names)
-                        etc_hosts_item_by_name_free(hashmap_remove(hosts->by_name, *i));
+                SET_FOREACH(name, item->names)
+                        etc_hosts_item_by_name_free(hashmap_remove(hosts->by_name, name));
 
                 assert_se(hashmap_remove(hosts->by_address, local_in_addrs + j) == item);
                 etc_hosts_item_by_address_free(item);
@@ -397,18 +411,20 @@ static int etc_hosts_lookup_by_address(
         }
 
         if (found_ptr) {
-                r = dns_answer_reserve(answer, item->n_names);
+                const char *n;
+
+                r = dns_answer_reserve(answer, set_size(item->names));
                 if (r < 0)
                         return r;
 
-                STRV_FOREACH(n, item->names) {
+                SET_FOREACH(n, item->names) {
                         _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
 
                         rr = dns_resource_record_new(found_ptr);
                         if (!rr)
                                 return -ENOMEM;
 
-                        rr->ptr.name = strdup(*n);
+                        rr->ptr.name = strdup(n);
                         if (!rr->ptr.name)
                                 return -ENOMEM;
 
@@ -428,6 +444,7 @@ static int etc_hosts_lookup_by_name(
                 DnsAnswer **answer) {
 
         bool found_a = false, found_aaaa = false;
+        const struct in_addr_data *a;
         EtcHostsItemByName *item;
         DnsResourceKey *t;
         int r;
@@ -439,7 +456,7 @@ static int etc_hosts_lookup_by_name(
 
         item = hashmap_get(hosts->by_name, name);
         if (item) {
-                r = dns_answer_reserve(answer, item->n_addresses);
+                r = dns_answer_reserve(answer, set_size(item->addresses));
                 if (r < 0)
                         return r;
         } else {
@@ -469,14 +486,14 @@ static int etc_hosts_lookup_by_name(
                         break;
         }
 
-        for (unsigned i = 0; item && i < item->n_addresses; i++) {
+        SET_FOREACH(a, item->addresses) {
                 _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
 
-                if ((!found_a && item->addresses[i]->family == AF_INET) ||
-                    (!found_aaaa && item->addresses[i]->family == AF_INET6))
+                if ((!found_a && a->family == AF_INET) ||
+                    (!found_aaaa && a->family == AF_INET6))
                         continue;
 
-                r = dns_resource_record_new_address(&rr, item->addresses[i]->family, &item->addresses[i]->address, item->name);
+                r = dns_resource_record_new_address(&rr, a->family, &a->address, item->name);
                 if (r < 0)
                         return r;
 
index 55f884746fc922b8716588a8673e39eb67e0fc1d..e1a7249f29866344239e35b8da353e50cbec3e29 100644 (file)
@@ -7,16 +7,12 @@
 
 typedef struct EtcHostsItemByAddress {
         struct in_addr_data address;
-
-        char **names;
-        size_t n_names;
+        Set *names;
 } EtcHostsItemByAddress;
 
 typedef struct EtcHostsItemByName {
         char *name;
-
-        struct in_addr_data **addresses;
-        size_t n_addresses;
+        Set *addresses;
 } EtcHostsItemByName;
 
 int etc_hosts_parse(EtcHosts *hosts, FILE *f);
index a0f712cbb64cb91bf70f7ec22beeab0ecead521d..19b2991a356ae26ec00b2c07ed40d2ee1332a21f 100644 (file)
@@ -27,13 +27,11 @@ TEST(parse_etc_hosts_system) {
         assert_se(etc_hosts_parse(&hosts, f) == 0);
 }
 
-#define address_equal_4(_addr, _address)                                \
-        ((_addr)->family == AF_INET &&                                  \
-         !memcmp(&(_addr)->address.in, &(struct in_addr) { .s_addr = (_address) }, 4))
+#define has_4(_set, _address_str)                                       \
+        set_contains(_set, &(struct in_addr_data) { .family = AF_INET, .address.in = { .s_addr = inet_addr(_address_str) } })
 
-#define address_equal_6(_addr, ...)                                     \
-        ((_addr)->family == AF_INET6 &&                                 \
-         !memcmp(&(_addr)->address.in6, &(struct in6_addr) { .s6_addr = __VA_ARGS__}, 16) )
+#define has_6(_set, ...)                                           \
+        set_contains(_set, &(struct in_addr_data) { .family = AF_INET6, .address.in6 = { .s6_addr = __VA_ARGS__ } })
 
 TEST(parse_etc_hosts) {
         _cleanup_(unlink_tempfilep) char
@@ -72,33 +70,29 @@ TEST(parse_etc_hosts) {
 
         EtcHostsItemByName *bn;
         assert_se(bn = hashmap_get(hosts.by_name, "some.where"));
-        assert_se(bn->n_addresses == 3);
-        assert_se(MALLOC_ELEMENTSOF(bn->addresses) >= 3);
-        assert_se(address_equal_4(bn->addresses[0], inet_addr("1.2.3.4")));
-        assert_se(address_equal_4(bn->addresses[1], inet_addr("1.2.3.5")));
-        assert_se(address_equal_6(bn->addresses[2], {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5}));
+        assert_se(set_size(bn->addresses) == 3);
+        assert_se(has_4(bn->addresses, "1.2.3.4"));
+        assert_se(has_4(bn->addresses, "1.2.3.5"));
+        assert_se(has_6(bn->addresses, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5}));
 
         assert_se(bn = hashmap_get(hosts.by_name, "dash"));
-        assert_se(bn->n_addresses == 1);
-        assert_se(MALLOC_ELEMENTSOF(bn->addresses) >= 1);
-        assert_se(address_equal_4(bn->addresses[0], inet_addr("1.2.3.6")));
+        assert_se(set_size(bn->addresses) == 1);
+        assert_se(has_4(bn->addresses, "1.2.3.6"));
 
         assert_se(bn = hashmap_get(hosts.by_name, "dash-dash.where-dash"));
-        assert_se(bn->n_addresses == 1);
-        assert_se(MALLOC_ELEMENTSOF(bn->addresses) >= 1);
-        assert_se(address_equal_4(bn->addresses[0], inet_addr("1.2.3.6")));
+        assert_se(set_size(bn->addresses) == 1);
+        assert_se(has_4(bn->addresses, "1.2.3.6"));
 
         /* See https://tools.ietf.org/html/rfc1035#section-2.3.1 */
         FOREACH_STRING(s, "bad-dash-", "-bad-dash", "-bad-dash.bad-")
                 assert_se(!hashmap_get(hosts.by_name, s));
 
         assert_se(bn = hashmap_get(hosts.by_name, "before.comment"));
-        assert_se(bn->n_addresses == 4);
-        assert_se(MALLOC_ELEMENTSOF(bn->addresses) >= 4);
-        assert_se(address_equal_4(bn->addresses[0], inet_addr("1.2.3.9")));
-        assert_se(address_equal_4(bn->addresses[1], inet_addr("1.2.3.10")));
-        assert_se(address_equal_4(bn->addresses[2], inet_addr("1.2.3.11")));
-        assert_se(address_equal_4(bn->addresses[3], inet_addr("1.2.3.12")));
+        assert_se(set_size(bn->addresses) == 4);
+        assert_se(has_4(bn->addresses, "1.2.3.9"));
+        assert_se(has_4(bn->addresses, "1.2.3.10"));
+        assert_se(has_4(bn->addresses, "1.2.3.11"));
+        assert_se(has_4(bn->addresses, "1.2.3.12"));
 
         assert_se(!hashmap_get(hosts.by_name, "within.comment"));
         assert_se(!hashmap_get(hosts.by_name, "within.comment2"));
@@ -113,9 +107,8 @@ TEST(parse_etc_hosts) {
         assert_se(!set_contains(hosts.no_address, "multi.colon"));
 
         assert_se(bn = hashmap_get(hosts.by_name, "some.other"));
-        assert_se(bn->n_addresses == 1);
-        assert_se(MALLOC_ELEMENTSOF(bn->addresses) >= 1);
-        assert_se(address_equal_6(bn->addresses[0], {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5}));
+        assert_se(set_size(bn->addresses) == 1);
+        assert_se(has_6(bn->addresses, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5}));
 
         assert_se( set_contains(hosts.no_address, "some.where"));
         assert_se( set_contains(hosts.no_address, "some.other"));
diff --git a/test/fuzz/fuzz-etc-hosts/oss-fuzz-47708 b/test/fuzz/fuzz-etc-hosts/oss-fuzz-47708
new file mode 100644 (file)
index 0000000..c0f59de
Binary files /dev/null and b/test/fuzz/fuzz-etc-hosts/oss-fuzz-47708 differ