From: Nick Alcock Date: Sun, 16 Feb 2025 19:39:41 +0000 (+0000) Subject: libctf: move string deduplication into ctf-archive X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=beccf36b88069571698d9f0c51055b3aaee4a380;p=thirdparty%2Fbinutils-gdb.git libctf: move string deduplication into ctf-archive This means that any archive containing dicts can get its strings dedupped together, rather than only those that are ctf_linked. (For now, we are still constrained to ctf_linked archives, since fixing that requires further changes to ctf_dedup_strings: but this gives us the first half of what is necessary.) libctf/ * ctf-link.c (ctf_link_write): Move string dedup into... * ctf-archive.c (ctf_arc_preserialize): ... this new function. (ctf_arc_write_fd): Call it. --- diff --git a/libctf/ctf-api.c b/libctf/ctf-api.c index b3064887814..479dcf6a350 100644 --- a/libctf/ctf-api.c +++ b/libctf/ctf-api.c @@ -152,7 +152,7 @@ static ctf_list_t open_errors; open errors list if NULL): if ERR is nonzero it is the errno to report to the debug stream instead of that recorded on fp. */ _libctf_printflike_ (4, 5) -extern void +void ctf_err_warn (ctf_dict_t *fp, int is_warning, int err, const char *format, ...) { @@ -203,6 +203,18 @@ ctf_err_warn_to_open (ctf_dict_t *fp) ctf_list_splice (&open_errors, &fp->ctf_errs_warnings); } +/* Copy all the errors/warnings from one fp to another one, and the error code + as well. */ +void +ctf_err_copy (ctf_dict_t *dest, ctf_dict_t *src) +{ + ctf_err_warning_t *cew; + for (cew = ctf_list_next (&src->ctf_errs_warnings); cew != NULL; + cew = ctf_list_next (cew)) + ctf_err_warn (dest, cew->cew_is_warning, 0, cew->cew_text); + ctf_set_errno (dest, ctf_errno (src)); +} + /* Error-warning reporting: an 'iterator' that returns errors and warnings from the error/warning list, in order of emission. Errors and warnings are popped after return: the caller must free the returned error-text pointer. diff --git a/libctf/ctf-archive.c b/libctf/ctf-archive.c index 739d0349fab..686fc72e886 100644 --- a/libctf/ctf-archive.c +++ b/libctf/ctf-archive.c @@ -51,6 +51,64 @@ static int ctf_arc_import_parent (const ctf_archive_t *arc, ctf_dict_t *fp, and ctfi_symnamedicts. Never initialized. */ static ctf_dict_t enosym; +/* Prepare to serialize everything. Members of archives have dependencies on + each other, because the strtabs and type IDs of children depend on the + parent: so we have to work over the archive as a whole to prepare for final + serialization. + + Returns zero on success, or an errno, or an ECTF_* value. + + Updates the first dict in the archive with the errno value. */ + +static int +ctf_arc_preserialize (ctf_dict_t **ctf_dicts, ssize_t ctf_dict_cnt) +{ + uint64_t old_parent_strlen, all_strlens = 0; + ssize_t i; + int err; + + ctf_dprintf ("Preserializing dicts.\n"); + + /* Preserialize everything, doing everything but strtab generation and things + that depend on that. */ + for (i = 0; i < ctf_dict_cnt; i++) + if (ctf_preserialize (ctf_dicts[i]) < 0) + goto err; + + ctf_dprintf ("Deduplicating strings.\n"); + + for (i = 0; i < ctf_dict_cnt; i++) + all_strlens += ctf_dicts[i]->ctf_str[0].cts_len + + ctf_dicts[i]->ctf_str_prov_len; + + /* If linking, deduplicate strings against the children in every dict that has + any. (String deduplication is not yet implemented for non-linked dicts.) */ + for (i = 0; i < ctf_dict_cnt; i++) + if (ctf_dicts[i]->ctf_flags & LCTF_LINKING && ctf_dicts[i]->ctf_link_outputs) + { + old_parent_strlen = ctf_dicts[i]->ctf_str[0].cts_len + + ctf_dicts[i]->ctf_str_prov_len; + + if (ctf_dedup_strings (ctf_dicts[i]) < 0) + goto err; + + ctf_dprintf ("Deduplicated strings in archive member %zi: " + "original parent strlen: %zu; original lengths: %zu; " + "final length: %zu.\n", i, (size_t) old_parent_strlen, + (size_t) all_strlens, + (size_t) ctf_dicts[i]->ctf_str_prov_len); + } + + return 0; + + err: + err = ctf_errno (ctf_dicts[i]); + ctf_err_copy (ctf_dicts[0], ctf_dicts[i]); + for (i--; i >= 0; i--) + ctf_depreserialize (ctf_dicts[i]); + return err; +} + /* Write out a CTF archive to the start of the file referenced by the passed-in fd. The entries in CTF_DICTS are referenced by name: the names are passed in the names array, which must have CTF_DICTS entries. @@ -70,8 +128,15 @@ ctf_arc_write_fd (int fd, ctf_dict_t **ctf_dicts, size_t ctf_dict_cnt, char *nametbl = NULL; /* The name table. */ char *np; off_t nameoffs; + int err; struct ctf_archive_modent *modent; + /* Prepare by serializing everything. Done first because it allocates a lot + of space and thus is more likely to fail. */ + if (ctf_dict_cnt > 0 && + (err = ctf_arc_preserialize (ctf_dicts, ctf_dict_cnt)) < 0) + return err; + ctf_dprintf ("Writing CTF archive with %lu files\n", (unsigned long) ctf_dict_cnt); diff --git a/libctf/ctf-impl.h b/libctf/ctf-impl.h index 62fe88d1b8b..095c0a1ecde 100644 --- a/libctf/ctf-impl.h +++ b/libctf/ctf-impl.h @@ -792,6 +792,7 @@ _libctf_printflike_ (4, 5) extern void ctf_err_warn (ctf_dict_t *, int is_warning, int err, const char *, ...); extern void ctf_err_warn_to_open (ctf_dict_t *); +extern void ctf_err_copy (ctf_dict_t *dest, ctf_dict_t *src); extern void ctf_assert_fail_internal (ctf_dict_t *, const char *, size_t, const char *); extern const char *ctf_link_input_name (ctf_dict_t *); diff --git a/libctf/ctf-link.c b/libctf/ctf-link.c index 2081b610daa..8292ec4bfb1 100644 --- a/libctf/ctf-link.c +++ b/libctf/ctf-link.c @@ -2052,7 +2052,6 @@ ctf_link_write (ctf_dict_t *fp, size_t *size, size_t threshold) long fsize; const char *errloc; unsigned char *buf = NULL; - uint64_t old_parent_strlen, all_strlens = 0; memset (&arg, 0, sizeof (ctf_name_list_accum_cb_arg_t)); arg.fp = fp; @@ -2123,41 +2122,6 @@ ctf_link_write (ctf_dict_t *fp, size_t *size, size_t threshold) memmove (&(arg.files[1]), arg.files, sizeof (ctf_dict_t *) * (arg.i)); arg.files[0] = fp; - /* Preserialize everything, doing everything but strtab generation and things that - depend on that. */ - for (i = 0; i < arg.i + 1; i++) - { - if (ctf_preserialize (arg.files[i]) < 0) - { - errno = ctf_errno (arg.files[i]); - for (i--; i >= 0; i--) - ctf_depreserialize (arg.files[i]); - errloc = "preserialization"; - goto err_no; - } - } - - ctf_dprintf ("Deduplicating strings.\n"); - - for (i = 0; i < arg.i; i++) - all_strlens += arg.files[i]->ctf_str[0].cts_len - + arg.files[i]->ctf_str_prov_len; - old_parent_strlen = arg.files[0]->ctf_str[0].cts_len - + arg.files[0]->ctf_str_prov_len; - - if (ctf_dedup_strings (fp) < 0) - { - for (i = 0; i < arg.i + 1; i++) - ctf_depreserialize (arg.files[i]); - errloc = "string deduplication"; - goto err_str_dedup; - } - - ctf_dprintf ("Deduplicated strings: original parent strlen: %zu; " - "original lengths: %zu; final length: %zu.\n", - (size_t) old_parent_strlen, (size_t) all_strlens, - (size_t) arg.files[0]->ctf_str_prov_len); - if ((f = tmpfile ()) == NULL) { errloc = "tempfile creation"; @@ -2168,9 +2132,8 @@ ctf_link_write (ctf_dict_t *fp, size_t *size, size_t threshold) (const char **) arg.names, threshold)) < 0) { - errloc = "archive writing"; - errno = err; - goto err_no; + errloc = NULL; /* errno is set for us. */ + goto err_set; } if (fseek (f, 0, SEEK_END) < 0) @@ -2229,7 +2192,7 @@ ctf_link_write (ctf_dict_t *fp, size_t *size, size_t threshold) err_no: ctf_set_errno (fp, errno); - err_str_dedup: + err_set: /* Turn off the is-linking flag on all the dicts in this link, as above. */ for (i = 0; i < arg.i; i++) { @@ -2251,7 +2214,8 @@ ctf_link_write (ctf_dict_t *fp, size_t *size, size_t threshold) free (arg.dynames[i]); free (arg.dynames); } - ctf_err_warn (fp, 0, 0, _("cannot write archive in link: %s failure"), - errloc); + if (errloc) + ctf_err_warn (fp, 0, 0, _("cannot write archive in link: %s failure"), + errloc); return NULL; }