From: Nick Alcock Date: Tue, 3 Jun 2025 11:01:45 +0000 (+0100) Subject: libctf, dedup: reclaim space wasted by duplicate hidden types X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=d4e9d956572a7bbf96a5500d70ddfe5a33f23ced;p=thirdparty%2Fbinutils-gdb.git libctf, dedup: reclaim space wasted by duplicate hidden types In normal deduplicating links, we insert every type (identified by its unique hash) precisely once. But conflicting types appear in multiple dicts, so for those, we loop, inserting them into every target dict in turn (each corresponding to an input dict that type appears in). But in cu-mapped links, some of those dicts may have been merged into one: now that we are hiding duplicate conflicting types more aggressively in such links, we are getting duplicate identical hidden types turning up in large numbers. Fix this by eliminating them in cu-mapping phase 1 (the phase in which this merging takes place), by checking to see if a type with this hash has already been inserted in this dict and skipping it if so. This is redundant and a waste of time in other cu-mapping phases and in normal links, but in cu-mapped links it saves a few tens to hundreds of kilobytes in kernel-sized links. libctf/ PR libctf/33047 * ctf-dedup.c (ctf_dedup_emit_type): Check for already-emitted types in cu-mapping phase 1. --- diff --git a/libctf/ctf-dedup.c b/libctf/ctf-dedup.c index 0b1bfc27c40..c7c2eddc2b2 100644 --- a/libctf/ctf-dedup.c +++ b/libctf/ctf-dedup.c @@ -2773,6 +2773,34 @@ ctf_dedup_emit_type (const char *hval, ctf_dict_t *output, ctf_dict_t **inputs, output_num = input_num; } + if (!target->ctf_dedup.cd_output_emission_hashes) + if ((target->ctf_dedup.cd_output_emission_hashes + = ctf_dynhash_create (ctf_hash_string, ctf_hash_eq_string, + NULL, NULL)) == NULL) + goto oom_hash; + + if (!target->ctf_dedup.cd_output_emission_conflicted_forwards) + if ((target->ctf_dedup.cd_output_emission_conflicted_forwards + = ctf_dynhash_create (ctf_hash_string, ctf_hash_eq_string, + NULL, NULL)) == NULL) + goto oom_hash; + + /* When cu-mapping mode is turned on, we merge types derived from multiple CUs + into one target dict: in phase 1, by merging them according to the mapping; + in phase 2, as a consequence of taking the merged results from phase 1. + Any given type appears only once in the type mapping, but in + ctf_dedup_rwalk_output_mapping we loop inserting conflicting types into a + child dict corresponding to every input dict they came from. This means + that if those dicts are mapped together, in phase 1 we can attempt to + insert them *multiple times* into the same dict, which then causes them to + be duplicated in phase 2 as well. Avoid this by making sure this hval + isn't already present in the emission hash in phase 1: if it is, we in + effect already visited this type, and can return as we did above. */ + + if (cu_mapping_phase == 1 + && ctf_dynhash_lookup (target->ctf_dedup.cd_output_emission_hashes, hval)) + return 0; + real_input = input; if ((tp = ctf_lookup_by_id (&real_input, type)) == NULL) { @@ -2841,18 +2869,6 @@ ctf_dedup_emit_type (const char *hval, ctf_dict_t *output, ctf_dict_t **inputs, ctf_dprintf ("%i: Emitting type with hash %s (%s), into target %i/%p\n", depth, hval, name ? name : "", input_num, (void *) target); - if (!target->ctf_dedup.cd_output_emission_hashes) - if ((target->ctf_dedup.cd_output_emission_hashes - = ctf_dynhash_create (ctf_hash_string, ctf_hash_eq_string, - NULL, NULL)) == NULL) - goto oom_hash; - - if (!target->ctf_dedup.cd_output_emission_conflicted_forwards) - if ((target->ctf_dedup.cd_output_emission_conflicted_forwards - = ctf_dynhash_create (ctf_hash_string, ctf_hash_eq_string, - NULL, NULL)) == NULL) - goto oom_hash; - switch (kind) { case CTF_K_UNKNOWN: