From: Nick Alcock Date: Fri, 25 Apr 2025 11:20:36 +0000 (+0100) Subject: libctf, serialize: preparatory steps X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=f782340ba5e0d94e7d74190ec0da7dc0e395e5d0;p=thirdparty%2Fbinutils-gdb.git libctf, serialize: preparatory steps The new serializer is quite a lot more customizable than the old, because it can write out BTF as well as CTF: you can ask to write out BTF or fail, write out CTF if required to avoid information loss, otherwise BTF, or always write out CTF. Callers often need to find out whether a dict could be written out as BTF before deciding how to write it out (because a dict can never be written out as BTF if it is compressed, a caller might well want to ask if there is anything else that prevents BTF writeout -- say, slices, conflicting types, or CTF_K_BIG -- before deciding whether to compress it). GNU ld will do this whenever it is passed only BTF sections on the input. Figuring out whether a dict can be written out as BTF is quite expensive: we have to traverse all the types and check them, including every member of every struct. So we'd rather do that work only once. This means making a lot of state once private to ctf_preserialize public enough that another function can initialize it; and since the whole API is available after calling this function and before serializing, we should probably arrange that if we do things we know will invalidate the results of all this checking, we are forced to do it again. This commit does that, moving all the existing serialization state into a new ctf_serialize_t and adding to it. Several functions grow force_ctf arguments that allow the caller to force CTF emission even if the type section looks BTFish: the writeout code and archive creation use this to force CTF emission if we are compressing, and archive creation uses it to force CTF emission if a CTF multi-member archive is in use, because BTF doesn't support archives at all so there's no point maintaining BTF compatibility in that case. The ctf_write* functions gain support for writing out BTF headers as well as CTF, depending on whether what was ultimately written out was actually BTF or not. Even more than most commits in this series, there is no way this is going to compile right now: we're in the middle of a major transition, completed in the next few commits. --- diff --git a/libctf/ctf-archive.c b/libctf/ctf-archive.c index 5a27570b9f7..e3c7a5f55a9 100644 --- a/libctf/ctf-archive.c +++ b/libctf/ctf-archive.c @@ -61,7 +61,8 @@ static ctf_dict_t enosym; Updates the first dict in the archive with the errno value. */ static int -ctf_arc_preserialize (ctf_dict_t **ctf_dicts, ssize_t ctf_dict_cnt) +ctf_arc_preserialize (ctf_dict_t **ctf_dicts, ssize_t ctf_dict_cnt, + size_t threshold) { uint64_t old_parent_strlen, all_strlens = 0; ssize_t i; @@ -72,7 +73,8 @@ ctf_arc_preserialize (ctf_dict_t **ctf_dicts, ssize_t ctf_dict_cnt) /* Preserialize everything, doing everything but strtab generation and things that depend on that. */ for (i = 0; i < ctf_dict_cnt; i++) - if (ctf_preserialize (ctf_dicts[i]) < 0) + if (ctf_preserialize (ctf_dicts[i], threshold != (size_t) -1 + || ctf_dict_cnt > 1) < 0) goto err; ctf_dprintf ("Deduplicating strings.\n"); @@ -134,7 +136,7 @@ ctf_arc_write_fd (int fd, ctf_dict_t **ctf_dicts, size_t ctf_dict_cnt, /* Prepare by serializing everything. Done first because it allocates a lot of space and thus is more likely to fail. */ if (ctf_dict_cnt > 0 && - (err = ctf_arc_preserialize (ctf_dicts, ctf_dict_cnt)) < 0) + (err = ctf_arc_preserialize (ctf_dicts, ctf_dict_cnt, threshold)) < 0) return err; ctf_dprintf ("Writing CTF archive with %lu files\n", diff --git a/libctf/ctf-create.c b/libctf/ctf-create.c index b9859c57f05..5d1057eafd4 100644 --- a/libctf/ctf-create.c +++ b/libctf/ctf-create.c @@ -963,6 +963,8 @@ ctf_set_conflicting (ctf_dict_t *fp, ctf_id_t type, const char *cuname) dtd->dtd_vlen_size < 65536 ? dtd->dtd_vlen_size : 0); + fp->ctf_serialize.cs_initialized = 0; + return 0; } @@ -1844,6 +1846,8 @@ ctf_add_member_bitfield (ctf_dict_t *fp, ctf_id_t souid, const char *name, dtd->dtd_last_offset += bit_offset; + fp->ctf_serialize.cs_initialized = 0; + return 0; } @@ -2129,6 +2133,8 @@ ctf_add_funcobjt_sym (ctf_dict_t *fp, int is_function, const char *name, ctf_id_ if (ctf_lookup_by_sym_or_name (fp, 0, name, 0, is_function) != CTF_ERR) return (ctf_set_errno (fp, ECTF_DUPLICATE)); + fp->ctf_serialize.cs_initialized = 0; + return ctf_add_funcobjt_sym_forced (fp, is_function, name, id); } diff --git a/libctf/ctf-dedup.c b/libctf/ctf-dedup.c index f69c959e28e..9404e196584 100644 --- a/libctf/ctf-dedup.c +++ b/libctf/ctf-dedup.c @@ -3167,6 +3167,9 @@ ctf_dedup_strings (ctf_dict_t *fp) ctf_next_t *i = NULL; void *dict; + if (!fp->ctf_serialize.cs_initialized) + return ctf_set_errno (fp, ECTF_NOTSERIALIZED); + str_counts = ctf_dynhash_create (ctf_hash_string, ctf_hash_eq_string, NULL, NULL); if (!str_counts) diff --git a/libctf/ctf-impl.h b/libctf/ctf-impl.h index a90a672826c..df59a6107a8 100644 --- a/libctf/ctf-impl.h +++ b/libctf/ctf-impl.h @@ -356,6 +356,19 @@ typedef struct ctf_dedup ctf_dict_t *cd_output; } ctf_dedup_t; +/* Serializer state. + + This connects the various stages of serialization (ctf_link_output_is_btf, + ctf_preserialize, ctf_serialize). */ + +typedef struct ctf_serialize +{ + int cs_initialized; /* If 0, needs reinitialization. */ + unsigned char *cs_buf; /* CTF buffer in mid-serialization. */ + size_t cs_buf_size; /* Length of that buffer. */ + int cs_is_btf; +} ctf_serialize_t; + /* The ctf_dict is the structure used to represent a CTF dictionary to library clients, who see it only as an opaque pointer. Modifications can therefore be made freely to this structure without regard to client versioning. The @@ -402,9 +415,7 @@ struct ctf_dict unsigned char *ctf_buf; /* Uncompressed CTF data buffer, including CTFv4 header portion. */ size_t ctf_size; /* Size of CTF header + uncompressed data. */ - unsigned char *ctf_serializing_buf; /* CTF buffer in mid-serialization. */ - size_t ctf_serializing_buf_size; /* Length of that buffer. */ - size_t ctf_serializing_nvars; /* Number of those vars. */ + ctf_serialize_t ctf_serialize; /* State internal to ctf-serialize.c. */ uint32_t *ctf_sxlate; /* Translation table for unindexed symtypetab entries. */ unsigned long ctf_nsyms; /* Number of entries in symtab xlate table. */ @@ -782,7 +793,7 @@ extern void ctf_str_purge_refs (ctf_dict_t *fp); extern void ctf_str_rollback (ctf_dict_t *, ctf_snapshot_id_t); extern const ctf_strs_writable_t *ctf_str_write_strtab (ctf_dict_t *); -extern int ctf_preserialize (ctf_dict_t *fp); +extern int ctf_preserialize (ctf_dict_t *fp, int force_ctf); extern void ctf_depreserialize (ctf_dict_t *fp); extern struct ctf_archive_internal * diff --git a/libctf/ctf-link.c b/libctf/ctf-link.c index b8f4d7e2f56..e0cbc3f3d68 100644 --- a/libctf/ctf-link.c +++ b/libctf/ctf-link.c @@ -1460,6 +1460,8 @@ ctf_link_add_strtab (ctf_dict_t *fp, ctf_link_strtab_string_f *add_string, err = iter_arg.err; } + fp->ctf_serialize.cs_initialized = 0; + if (err) ctf_set_errno (fp, err); @@ -1623,6 +1625,9 @@ ctf_link_shuffle_syms (ctf_dict_t *fp) ctf_err_warn (fp, 0, err, _("error iterating over shuffled symbols")); goto err; } + + fp->ctf_serialize.cs_initialized = 0; + return 0; err: diff --git a/libctf/ctf-open.c b/libctf/ctf-open.c index 4dfa57807cb..6cf525b9659 100644 --- a/libctf/ctf-open.c +++ b/libctf/ctf-open.c @@ -2438,7 +2438,7 @@ ctf_dict_close (ctf_dict_t *fp) free (fp->ctf_txlate); free (fp->ctf_ptrtab); free (fp->ctf_pptrtab); - free (fp->ctf_serializing_buf); + free (fp->ctf_serialize.cs_buf); free (fp->ctf_header); free (fp); diff --git a/libctf/ctf-serialize.c b/libctf/ctf-serialize.c index e2d1bb02c31..1c516a888d1 100644 --- a/libctf/ctf-serialize.c +++ b/libctf/ctf-serialize.c @@ -76,7 +76,7 @@ typedef struct emit_symtypetab_state updated later on to change the type ID recorded in this location. The ref may not be emitted if the value is already known and cannot change. - All refs must point within the ctf_serializing_buf. */ + All refs must point within the ctf_serialize.cs_buf. */ static int ctf_type_add_ref (ctf_dict_t *fp, uint32_t *ref) @@ -92,9 +92,9 @@ ctf_type_add_ref (ctf_dict_t *fp, uint32_t *ref) if (!ctf_assert (fp, dtd)) return 0; - if (!ctf_assert (fp, fp->ctf_serializing_buf != NULL - && (unsigned char *) ref > fp->ctf_serializing_buf - && (unsigned char *) ref < fp->ctf_serializing_buf + fp->ctf_serializing_buf_size)) + if (!ctf_assert (fp, fp->ctf_serialize.cs_buf != NULL + && (unsigned char *) ref > fp->ctf_serialize.cs_buf + && (unsigned char *) ref < fp->ctf_serialize.cs_buf + fp->ctf_serialize.cs_buf_size)) return -1; /* Simple case: final ID different from what is recorded, but already known. @@ -1017,14 +1017,19 @@ ctf_emit_type_sect (ctf_dict_t *fp, unsigned char **tptr) /* Overall serialization. */ -/* Do all aspects of serialization up to strtab writeout and variable table - sorting, including final type ID assignment. The resulting dict will have - the LCTF_PRESERIALIZED flag on and must not be modified in any way before - serialization. (This is only lightly enforced, as this feature is internal- - only, employed by the linker machinery.) */ int -ctf_preserialize (ctf_dict_t *fp) +/* Do all aspects of serialization up to strtab writeout, including final type + ID assignment. The resulting dict will have the LCTF_PRESERIALIZED flag on + and must not be modified in any way before serialization. (This is only + lightly enforced, as this feature is internal-only, employed by the linker + machinery.) + + If FORCE_CTF is enabled, always emit CTF in LIBCTF_BTM_POSSIBLE mode, and + error in LIBCTF_BTM_BTF mode. */ + +int +ctf_preserialize (ctf_dict_t *fp, int force_ctf) { ctf_header_t hdr, *hdrp; ctf_dvdef_t *dvd; @@ -1182,8 +1187,8 @@ ctf_preserialize (ctf_dict_t *fp) if ((buf = malloc (buf_size)) == NULL) return (ctf_set_errno (fp, EAGAIN)); - fp->ctf_serializing_buf = buf; - fp->ctf_serializing_buf_size = buf_size; + fp->ctf_serialize.cs_buf = buf; + fp->ctf_serialize.cs_buf_size = buf_size; memcpy (buf, &hdr, sizeof (ctf_header_t)); t = (unsigned char *) buf + sizeof (ctf_header_t) + hdr.cth_objtoff; @@ -1253,8 +1258,9 @@ ctf_preserialize (ctf_dict_t *fp) return 0; err: - fp->ctf_serializing_buf = NULL; - fp->ctf_serializing_buf_size = 0; + fp->ctf_serialize.cs_initialized = 0; + fp->ctf_serialize.cs_buf = NULL; + fp->ctf_serialize.cs_buf_size = 0; free (buf); ctf_str_purge_refs (fp); @@ -1270,11 +1276,10 @@ ctf_depreserialize (ctf_dict_t *fp) ctf_str_purge_refs (fp); ctf_type_purge_refs (fp); - free (fp->ctf_serializing_buf); - fp->ctf_serializing_buf = NULL; - fp->ctf_serializing_vars = NULL; - fp->ctf_serializing_buf_size = 0; - fp->ctf_serializing_nvars = 0; + fp->ctf_serialize.cs_initialized = 0; + free (fp->ctf_serialize.cs_buf); + fp->ctf_serialize.cs_buf = NULL; + fp->ctf_serialize.cs_buf_size = 0; fp->ctf_flags &= ~(LCTF_NO_STR | LCTF_NO_TYPE); } @@ -1289,11 +1294,11 @@ ctf_depreserialize (ctf_dict_t *fp) on visible operation). */ static unsigned char * -ctf_serialize (ctf_dict_t *fp, size_t *bufsiz) +ctf_serialize (ctf_dict_t *fp, size_t *bufsiz, int force_ctf) { const ctf_strs_writable_t *strtab; unsigned char *buf, *newbuf; - ctf_header_t *hdrp; + ctf_btf_header_t *hdrp; /* Stop unstable file formats (subject to change) getting out into the wild. */ @@ -1308,13 +1313,11 @@ ctf_serialize (ctf_dict_t *fp, size_t *bufsiz) /* Preserialize, if we need to. */ - if (!fp->ctf_serializing_buf) - if (ctf_preserialize (fp) < 0) + if (!fp->ctf_serialize.cs_buf) + if (ctf_preserialize (fp, force_ctf) < 0) return NULL; /* errno is set for us. */ - /* UPTODO: prevent writing of BTF dicts when upgrading from CTFv3. */ - - /* Allow string lookup again, now we need it to sort the vartab. */ + /* Allow string lookup again. */ fp->ctf_flags &= ~LCTF_NO_STR; /* Construct the final string table and fill out all the string refs with the @@ -1327,33 +1330,38 @@ ctf_serialize (ctf_dict_t *fp, size_t *bufsiz) if ((fp->ctf_flags & LCTF_LINKING) && fp->ctf_parent) fp->ctf_header->cth_parent_strlen = fp->ctf_parent->ctf_str[CTF_STRTAB_0].cts_len; - hdrp = (ctf_header_t *) fp->ctf_serializing_buf; - ctf_dprintf ("Writing strtab for %s\n", ctf_cuname (fp)); strtab = ctf_str_write_strtab (fp); if (strtab == NULL) goto err; - if ((newbuf = realloc (fp->ctf_serializing_buf, fp->ctf_serializing_buf_size + if ((newbuf = realloc (fp->ctf_serialize.cs_buf, fp->ctf_serialize.cs_buf_size + strtab->cts_len)) == NULL) goto oom; - fp->ctf_serializing_buf = newbuf; - memcpy (fp->ctf_serializing_buf + fp->ctf_serializing_buf_size, strtab->cts_strs, + fp->ctf_serialize.cs_buf = newbuf; + memcpy (fp->ctf_serialize.cs_buf + fp->ctf_serialize.cs_buf_size, strtab->cts_strs, strtab->cts_len); - hdrp = (ctf_header_t *) fp->ctf_serializing_buf; - hdrp->cth_strlen = strtab->cts_len; - hdrp->cth_parent_strlen = fp->ctf_header->cth_parent_strlen; - fp->ctf_serializing_buf_size += hdrp->cth_strlen; - *bufsiz = fp->ctf_serializing_buf_size; - - buf = fp->ctf_serializing_buf; - - fp->ctf_serializing_buf = NULL; - fp->ctf_serializing_vars = NULL; - fp->ctf_serializing_buf_size = 0; - fp->ctf_serializing_nvars = 0; + + hdrp = (ctf_btf_header_t *) fp->ctf_serialize.cs_buf; + hdrp->bth_str_len = strtab->cts_len; + fp->ctf_serialize.cs_buf_size += hdrp->bth_str_len; + + if (!fp->ctf_serialize.cs_is_btf) + { + ctf_header_t *ctf_hdrp; + + ctf_hdrp = (ctf_header_t *) (void *) hdrp; + ctf_hdrp->cth_parent_strlen = fp->ctf_header->cth_parent_strlen; + } + + *bufsiz = fp->ctf_serialize.cs_buf_size; + + buf = fp->ctf_serialize.cs_buf; + + fp->ctf_serialize.cs_buf = NULL; + fp->ctf_serialize.cs_buf_size = 0; fp->ctf_flags &= ~LCTF_NO_TYPE; return buf; @@ -1373,7 +1381,7 @@ err: the header uncompressed, and the CTF opening functions work on them without manual decompression.) - No support for (testing-only) endian-flipping. */ + No support for (testing-only) endian-flipping or pure BTF writing. */ int ctf_gzwrite (ctf_dict_t *fp, gzFile fd) { @@ -1382,7 +1390,7 @@ ctf_gzwrite (ctf_dict_t *fp, gzFile fd) size_t bufsiz; size_t len, written = 0; - if ((buf = ctf_serialize (fp, &bufsiz)) == NULL) + if ((buf = ctf_serialize (fp, &bufsiz, 1)) == NULL) return -1; /* errno is set for us. */ p = buf; @@ -1414,26 +1422,33 @@ ctf_write_mem (ctf_dict_t *fp, size_t *size, size_t threshold) unsigned char *src; size_t rawbufsiz; size_t alloc_len = 0; + size_t hdrlen; int uncompressed = 0; int flip_endian; int rc; flip_endian = getenv ("LIBCTF_WRITE_FOREIGN_ENDIAN") != NULL; - if ((rawbuf = ctf_serialize (fp, &rawbufsiz)) == NULL) + if ((rawbuf = ctf_serialize (fp, &rawbufsiz, + threshold != (size_t) -1)) == NULL) return NULL; /* errno is set for us. */ - if (!ctf_assert (fp, rawbufsiz >= sizeof (ctf_header_t))) + if (fp->ctf_serialize.cs_is_btf) + hdrlen = sizeof (ctf_btf_header_t); + else + hdrlen = sizeof (ctf_header_t); + + if (!ctf_assert (fp, rawbufsiz >= hdrlen)) goto err; - if (rawbufsiz >= threshold) + if (rawbufsiz >= threshold && !fp->ctf_serialize.cs_is_btf) alloc_len = compressBound (rawbufsiz - sizeof (ctf_header_t)) + sizeof (ctf_header_t); /* Trivial operation if the buffer is too small to bother compressing, and we're not doing a forced write-time flip. */ - if (rawbufsiz < threshold) + if (rawbufsiz < threshold || fp->ctf_serialize.cs_is_btf) { alloc_len = rawbufsiz; uncompressed = 1; @@ -1455,28 +1470,31 @@ ctf_write_mem (ctf_dict_t *fp, size_t *size, size_t threshold) rawhp = (ctf_header_t *) rawbuf; hp = (ctf_header_t *) buf; - memcpy (hp, rawbuf, sizeof (ctf_header_t)); - bp = buf + sizeof (ctf_header_t); - *size = sizeof (ctf_header_t); - if (!uncompressed) + memcpy (hp, rawbuf, hdrlen); + bp = buf + hdrlen; + *size = hdrlen; + + if (!uncompressed && !fp->ctf_serialize.cs_is_btf) hp->cth_flags |= CTF_F_COMPRESS; - src = rawbuf + sizeof (ctf_header_t); + src = rawbuf + hdrlen; if (flip_endian) { - ctf_flip_header (hp); - if (ctf_flip (fp, rawhp, src, 1) < 0) + if (ctf_flip_header (hp, fp->ctf_serialize.cs_is_btf, 0) < 0) + goto err; /* errno is set for us. */ + if (ctf_flip (fp, rawhp, src, fp->ctf_serialize.cs_is_btf, 1) < 0) goto err; /* errno is set for us. */ } + /* Must be CTFv4. */ if (!uncompressed) { size_t compress_len = alloc_len - sizeof (ctf_header_t); if ((rc = compress (bp, (uLongf *) &compress_len, - src, rawbufsiz - sizeof (ctf_header_t))) != Z_OK) + src, rawbufsiz - hdrlen)) != Z_OK) { ctf_set_errno (fp, ECTF_COMPRESS); ctf_err_warn (fp, 0, 0, _("zlib deflate err: %s"), zError (rc)); @@ -1486,8 +1504,8 @@ ctf_write_mem (ctf_dict_t *fp, size_t *size, size_t threshold) } else { - memcpy (bp, src, rawbufsiz - sizeof (ctf_header_t)); - *size += rawbufsiz - sizeof (ctf_header_t); + memcpy (bp, src, rawbufsiz - hdrlen); + *size += rawbufsiz - hdrlen; } free (rawbuf);