From: Joel Rosdahl Date: Sat, 8 Jun 2019 11:25:49 +0000 (+0200) Subject: Improve how is represented X-Git-Tag: v4.0~948 X-Git-Url: http://git.ipfire.org/gitweb/gitweb.cgi?a=commitdiff_plain;h=10bce6078e74c19dfea9f19d15daff5ca7ab2a2c;p=thirdparty%2Fccache.git Improve how is represented Internally, the tuple ,which is the key used for cached results and manifests, was represented as 16 bytes + 1 uint32_t. Externally, i.e. in file names, it was represented as -, with being 32 hex digits and being the number of hashed bytes in human-readable form. This commits changes the internal representation to 20 bytes, where the last 4 bytes are the number of hashed bytes in big-endian order. The external representation has been changed to match this, i.e. to be 40 hex digits. This makes the code slightly less complex and more consistent. Also, the code that converts the key into string form has been rewritten to not allocate on the heap but to just write the output into a buffer owned by the caller. struct file_hash (16 bytes + 1 uint32_t) has been renamed to struct digest (20 bytes) in order to emphasize that it represents the output of a hash algorithm that not necessarily gets file content as its input. The documentation of the manifest format has been updated to reflect the logical change of keys, even though the actual serialized content of manifest files hasn’t changed. While at it, reading of the obsolete “hash_size” and “reserved” fields has been removed. (Future changes in the manifest format will be handled by just stepping the version.) --- diff --git a/configure.ac b/configure.ac index 70fb38cb8..bb0f2be9e 100644 --- a/configure.ac +++ b/configure.ac @@ -82,14 +82,12 @@ AC_CHECK_TYPES(long long) AC_CHECK_HEADERS(ctype.h pwd.h stdlib.h string.h strings.h sys/time.h sys/mman.h) AC_CHECK_HEADERS(syslog.h) -AC_CHECK_HEADERS(arpa/inet.h) AC_CHECK_HEADERS(termios.h) AC_CHECK_FUNCS(gethostname) AC_CHECK_FUNCS(getopt_long) AC_CHECK_FUNCS(getpwuid) AC_CHECK_FUNCS(gettimeofday) -AC_CHECK_FUNCS(htonl) AC_CHECK_FUNCS(localtime_r) AC_CHECK_FUNCS(mkstemp) AC_CHECK_FUNCS(realpath) diff --git a/doc/MANUAL.adoc b/doc/MANUAL.adoc index 600d3c0df..219370f20 100644 --- a/doc/MANUAL.adoc +++ b/doc/MANUAL.adoc @@ -127,8 +127,10 @@ compiler options apply and you should refer to the compiler's documentation. *`--hash-file`*=_PATH_:: - Print the hash (in format `-`) of the file at PATH. This is only - useful when debugging ccache and its behavior. + Print the hash of the file at PATH in the format ``, where + `` is 32 hex digits and `` is 8 hex digits (representing the + size of the file modulo 2^32). This is only useful when debugging ccache + and its behavior. *`-h, --help`*:: diff --git a/src/ccache.c b/src/ccache.c index d4d1e5ffa..c47c2f2de 100644 --- a/src/ccache.c +++ b/src/ccache.c @@ -83,7 +83,8 @@ static const char USAGE_TEXT[] = "Options for scripting or debugging:\n" " --dump-manifest=PATH dump manifest file at PATH in text format\n" " -k, --get-config=K print the value of configuration key K\n" - " --hash-file=PATH print the hash (-) of the file at PATH\n" + " --hash-file=PATH print the hash () of the\n" + " file at PATH\n" " --print-stats print statistics counter IDs and corresponding\n" " values in machine-parsable format\n" " -o, --set-config=K=V set configuration item K to value V\n" @@ -134,9 +135,9 @@ static const char *actual_language; static size_t arch_args_size = 0; static char *arch_args[MAX_ARCH_ARGS] = {NULL}; -// Name (represented as a struct file_hash) of the file containing the cached +// Name (represented as a struct digest) of the file containing the cached // object code. -static struct file_hash *cached_obj_hash; +static struct digest *cached_obj_hash; // Full path to the file containing the result // (cachedir/a/b/cdef[...]-size.result). @@ -150,8 +151,8 @@ static char *manifest_path; // compilation. time_t time_of_compilation; -// Files included by the preprocessor and their hashes/sizes. Key: file path. -// Value: struct file_hash. +// Files included by the preprocessor and their hashes. Key: file path. Value: +// struct digest. static struct hashtable *included_files = NULL; // Uses absolute path for some include files. @@ -692,9 +693,9 @@ remember_include_file(char *path, struct hash *cpp_hash, bool system, goto failure; } hash_delimiter(cpp_hash, using_pch_sum ? "pch_sum_hash" : "pch_hash"); - char *pch_hash_result = hash_result(fhash); - hash_string(cpp_hash, pch_hash_result); - free(pch_hash_result); + char pch_digest[DIGEST_STRING_BUFFER_SIZE]; + hash_result_as_string(fhash, pch_digest); + hash_string(cpp_hash, pch_digest); } if (conf->direct_mode) { @@ -718,17 +719,16 @@ remember_include_file(char *path, struct hash *cpp_hash, bool system, } } - struct file_hash *h = x_malloc(sizeof(*h)); - hash_result_as_bytes(fhash, h->hash); - h->hsize = hash_input_size(fhash); - hashtable_insert(included_files, path, h); + struct digest *d = x_malloc(sizeof(*d)); + hash_result_as_bytes(fhash, d); + hashtable_insert(included_files, path, d); path = NULL; // Ownership transferred to included_files. if (depend_mode_hash) { hash_delimiter(depend_mode_hash, "include"); - char *result = format_hash_as_string(h->hash, h->hsize); - hash_string(depend_mode_hash, result); - free(result); + char digest[DIGEST_STRING_BUFFER_SIZE]; + digest_as_string(d, digest); + hash_string(depend_mode_hash, digest); } } @@ -1109,7 +1109,7 @@ out: // Extract the used includes from the dependency file. Note that we cannot // distinguish system headers from other includes here. -static struct file_hash * +static struct digest * object_hash_from_depfile(const char *depfile, struct hash *hash) { FILE *f = fopen(depfile, "r"); @@ -1154,10 +1154,9 @@ object_hash_from_depfile(const char *depfile, struct hash *hash) print_included_files(stdout); } - struct file_hash *result = x_malloc(sizeof(*result)); - hash_result_as_bytes(hash, result->hash); - result->hsize = hash_input_size(hash); - return result; + struct digest *digest = x_malloc(sizeof(*digest)); + hash_result_as_bytes(hash, digest); + return digest; } // Send cached stderr, if any, to stderr. @@ -1204,14 +1203,13 @@ update_manifest_file(void) } static void -update_cached_result_globals(struct file_hash *hash) +update_cached_result_globals(struct digest *hash) { - char *object_name = format_hash_as_string(hash->hash, hash->hsize); + char object_name[DIGEST_STRING_BUFFER_SIZE]; + digest_as_string(hash, object_name); cached_obj_hash = hash; cached_result = get_path_in_cache(object_name, ".result"); - stats_file = format("%s/%c/stats", conf->cache_dir, object_name[0]); - free(object_name); } // Run the real compiler and put the result in cache. @@ -1351,7 +1349,7 @@ to_cache(struct args *args, struct hash *depend_mode_hash) } if (conf->depend_mode) { - struct file_hash *object_hash = + struct digest *object_hash = object_hash_from_depfile(output_dep, depend_mode_hash); if (!object_hash) { failed(); @@ -1456,7 +1454,7 @@ to_cache(struct args *args, struct hash *depend_mode_hash) // Find the object file name by running the compiler in preprocessor mode. // Returns the hash as a heap-allocated hex string. -static struct file_hash * +static struct digest * get_object_name_from_cpp(struct args *args, struct hash *hash) { time_of_compilation = time(NULL); @@ -1561,10 +1559,9 @@ get_object_name_from_cpp(struct args *args, struct hash *hash) hash_string(hash, "false"); } - struct file_hash *result = x_malloc(sizeof(*result)); - hash_result_as_bytes(hash, result->hash); - result->hsize = hash_input_size(hash); - return result; + struct digest *name = x_malloc(sizeof(*name)); + hash_result_as_bytes(hash, name); + return name; } // Hash mtime or content of a file, or the output of a command, according to @@ -1803,7 +1800,7 @@ calculate_common_hash(struct args *args, struct hash *hash) // Update a hash sum with information specific to the direct and preprocessor // modes and calculate the object hash. Returns the object hash on success, // otherwise NULL. Caller frees. -static struct file_hash * +static struct digest * calculate_object_hash(struct args *args, struct hash *hash, int direct_mode) { bool found_ccbin = false; @@ -2009,7 +2006,7 @@ calculate_object_hash(struct args *args, struct hash *hash, int direct_mode) hash_string(hash, arch_args[i]); } - struct file_hash *object_hash = NULL; + struct digest *object_hash = NULL; if (direct_mode) { // Hash environment variables that affect the preprocessor output. const char *envvars[] = { @@ -2046,11 +2043,11 @@ calculate_object_hash(struct args *args, struct hash *hash, int direct_mode) return NULL; } - char *manifest_name = hash_result(hash); + char manifest_name[DIGEST_STRING_BUFFER_SIZE]; + hash_result_as_string(hash, manifest_name); manifest_path = get_path_in_cache(manifest_name, ".manifest"); manifest_stats_file = format("%s/%c/stats", conf->cache_dir, manifest_name[0]); - free(manifest_name); cc_log("Looking for object file hash in %s", manifest_path); MTR_BEGIN("manifest", "manifest_get"); @@ -3740,8 +3737,8 @@ ccache(int argc, char *argv[]) direct_hash, output_obj, 'd', "DIRECT MODE", debug_text_file); bool put_object_in_manifest = false; - struct file_hash *object_hash = NULL; - struct file_hash *object_hash_from_manifest = NULL; + struct digest *object_hash = NULL; + struct digest *object_hash_from_manifest = NULL; if (conf->direct_mode) { cc_log("Trying direct lookup"); MTR_BEGIN("hash", "direct_hash"); @@ -3785,7 +3782,7 @@ ccache(int argc, char *argv[]) update_cached_result_globals(object_hash); if (object_hash_from_manifest - && !file_hashes_equal(object_hash_from_manifest, object_hash)) { + && !digests_equal(object_hash_from_manifest, object_hash)) { // The hash from manifest differs from the hash of the preprocessor // output. This could be because: // @@ -3889,9 +3886,9 @@ ccache_main_options(int argc, char *argv[]) } else { hash_file(hash, optarg); } - char *result = hash_result(hash); - puts(result); - free(result); + char digest[DIGEST_STRING_BUFFER_SIZE]; + hash_result_as_string(hash, digest); + puts(digest); hash_free(hash); break; } diff --git a/src/ccache.h b/src/ccache.h index c0acb6da4..4ab060868 100644 --- a/src/ccache.h +++ b/src/ccache.h @@ -156,12 +156,9 @@ int create_dir(const char *dir); int create_parent_dirs(const char *path); const char *get_hostname(void); const char *tmp_string(void); -char *format_hash_as_string(const unsigned char *hash, int size); -typedef uint32_t binary[5]; // 20 bytes: 16 for hash + 4 for size -void format_hash_as_binary(binary result, const unsigned char *hash, int size); int create_cachedirtag(const char *dir); char *format(const char *format, ...) ATTR_FORMAT(printf, 1, 2); -char *format_hex(unsigned char *data, size_t size); +void format_hex(const uint8_t *data, size_t size, char *buffer); void reformat(char **ptr, const char *format, ...) ATTR_FORMAT(printf, 2, 3); char *x_strdup(const char *s); char *x_strndup(const char *s, size_t n); diff --git a/src/hash.c b/src/hash.c index bf067620a..8f5efcedd 100644 --- a/src/hash.c +++ b/src/hash.c @@ -27,6 +27,18 @@ struct hash { FILE *debug_text; }; +void +digest_as_string(const struct digest *d, char *buffer) +{ + format_hex(d->bytes, DIGEST_SIZE, buffer); +} + +bool +digests_equal(const struct digest *d1, const struct digest *d2) +{ + return memcmp(d1->bytes, d2->bytes, DIGEST_SIZE) == 0; +} + static void do_hash_buffer(struct hash *hash, const void *s, size_t len) { @@ -83,12 +95,6 @@ void hash_enable_debug( do_debug_text(hash, " ===\n", 5); } -size_t -hash_input_size(struct hash *hash) -{ - return hash->md.totalN + hash->md.tail_len; -} - void hash_buffer(struct hash *hash, const void *s, size_t len) { @@ -96,29 +102,22 @@ hash_buffer(struct hash *hash, const void *s, size_t len) do_debug_text(hash, s, len); } -char * -hash_result(struct hash *hash) -{ - unsigned char sum[16]; - - hash_result_as_bytes(hash, sum); - return format_hash_as_string(sum, hash_input_size(hash)); -} - void -hash_result_as_bytes(struct hash *hash, unsigned char *out) +hash_result_as_bytes(struct hash *hash, struct digest *digest) { - mdfour_result(&hash->md, out); + mdfour_result(&hash->md, digest->bytes); + size_t input_size = hash->md.totalN + hash->md.tail_len; + for (size_t i = 0; i < 4; i++) { + digest->bytes[16 + i] = (input_size >> ((3 - i) * 8)) & 0xFF; + } } -bool -hash_equal(struct hash *hash1, struct hash *hash2) +void +hash_result_as_string(struct hash *hash, char *buffer) { - unsigned char sum1[16]; - hash_result_as_bytes(hash1, sum1); - unsigned char sum2[16]; - hash_result_as_bytes(hash2, sum2); - return memcmp(sum1, sum2, sizeof(sum1)) == 0; + struct digest d; + hash_result_as_bytes(hash, &d); + digest_as_string(&d, buffer); } void diff --git a/src/hash.h b/src/hash.h index 06a698a04..95793ba4f 100644 --- a/src/hash.h +++ b/src/hash.h @@ -1,4 +1,4 @@ -// Copyright (C) 2018 Joel Rosdahl +// Copyright (C) 2018-2019 Joel Rosdahl // // This program is free software; you can redistribute it and/or modify it // under the terms of the GNU General Public License as published by the Free @@ -19,15 +19,33 @@ #include "system.h" +#define DIGEST_SIZE 20 +#define DIGEST_STRING_BUFFER_SIZE (2 * DIGEST_SIZE + 1) + +// struct digest represents the binary form of the final digest (AKA hash or +// checksum) produced by the hash algorithm. +struct digest +{ + uint8_t bytes[DIGEST_SIZE]; +}; + +// Format the digest as a NUL-terminated hex string. The string buffer must +// contain at least DIGEST_STRING_BUFFER_SIZE bytes. +void digest_as_string(const struct digest *d, char *buffer); + +// Return true if d1 and d2 are equal, else false. +bool digests_equal(const struct digest *d1, const struct digest *d2); + +// struct hash represents the hash algorithm's inner state. struct hash; -// Create a new hash. +// Create a new hash state. struct hash *hash_init(void); -// Create a new hash from an existing hash state. +// Create a new hash state from an existing hash state. struct hash *hash_copy(struct hash *hash); -// Free a hash created by hash_init or hash_copy. +// Free a hash state created by hash_init or hash_copy. void hash_free(struct hash *hash); // Enable debug logging of hashed input to a binary and a text file. @@ -35,17 +53,12 @@ void hash_enable_debug( struct hash *hash, const char *section_name, FILE *debug_binary, FILE *debug_text); -// Return how many bytes have been hashed. -size_t hash_input_size(struct hash *hash); - -// Return the hash result as a hex string. Caller frees. -char *hash_result(struct hash *hash); - -// Return the hash result as 16 binary bytes. -void hash_result_as_bytes(struct hash *hash, unsigned char *out); +// Retrieve the digest as bytes. +void hash_result_as_bytes(struct hash *hash, struct digest *digest); -// Return whether hash1 and hash2 are equal. -bool hash_equal(struct hash *hash1, struct hash *hash2); +// Retrieve the digest as a NUL-terminated hex string. The string buffer must +// contain at least DIGEST_STRING_BUFFER_SIZE bytes. +void hash_result_as_string(struct hash *hash, char *buffer); // Hash some data that is unlikely to occur in the input. The idea is twofold: // diff --git a/src/hashutil.c b/src/hashutil.c index ed69b1566..610070c42 100644 --- a/src/hashutil.c +++ b/src/hashutil.c @@ -37,12 +37,6 @@ strings_equal(void *str1, void *str2) return str_eq((const char *)str1, (const char *)str2); } -int -file_hashes_equal(struct file_hash *fh1, struct file_hash *fh2) -{ - return memcmp(fh1->hash, fh2->hash, 16) == 0 && fh1->hsize == fh2->hsize; -} - // Search for the strings "__DATE__" and "__TIME__" in str. // // Returns a bitmask with HASH_SOURCE_CODE_FOUND_DATE and diff --git a/src/hashutil.h b/src/hashutil.h index 5ff2d3912..6015d66e2 100644 --- a/src/hashutil.h +++ b/src/hashutil.h @@ -21,16 +21,9 @@ #include "hash.h" #include -struct file_hash -{ - uint8_t hash[16]; - uint32_t hsize; -}; - unsigned hash_from_string(void *str); unsigned hash_from_int(int i); int strings_equal(void *str1, void *str2); -int file_hashes_equal(struct file_hash *fh1, struct file_hash *fh2); #define HASH_SOURCE_CODE_OK 0 #define HASH_SOURCE_CODE_ERROR 1 diff --git a/src/manifest.c b/src/manifest.c index 6672f4027..1de248a45 100644 --- a/src/manifest.c +++ b/src/manifest.c @@ -24,45 +24,35 @@ // Sketchy specification of the manifest data format: // -// magic number (4 bytes: cCmF) -// file format version (1 byte unsigned int) -// size of the hash fields (in bytes) (1 byte unsigned int) -// reserved for future use (2 bytes) +// magic number (4 bytes: cCmF) +// file format version (1 byte unsigned int) +// not used (3 bytes) // ---------------------------------------------------------------------------- -// number of include file paths (4 bytes unsigned int) -// path to include file (NUL-terminated string, -// ... at most 1024 bytes) +// number of include file paths (4 bytes unsigned int) +// include file path (NUL-terminated string, +// ... at most 1024 bytes) // // ---------------------------------------------------------------------------- -// number of include file hash entries (4 bytes unsigned int) -// index of include file path (4 bytes unsigned int) -// hash of include file ( bytes) -// hashed bytes (4 bytes unsigned int) -// size of include file (8 bytes unsigned int) -// mtime of include file (8 bytes signed int) -// mtime of include file (8 bytes signed int) +// number of include file entries (4 bytes unsigned int) +// include file path index (4 bytes unsigned int) +// include file digest (DIGEST_SIZE bytes) +// include file size (8 bytes unsigned int) +// include file mtime (8 bytes signed int) +// include file ctime (8 bytes signed int) // ... // -// -// +// // // // // ---------------------------------------------------------------------------- -// number of object name entries (4 bytes unsigned int) -// number of include file hash indexes (4 bytes unsigned int) -// include file hash index (4 bytes unsigned int) +// number of result entries (4 bytes unsigned int) +// number of include file entry indexes (4 bytes unsigned int) +// include file entry index (4 bytes unsigned int) // ... // -// hash part of object name ( bytes) -// size part of object name (4 bytes unsigned int) +// result name (DIGEST_SIZE bytes) // ... -// number of include file hash indexes -// include file hash index -// ... -// -// -// static const uint32_t MAGIC = 0x63436d46U; // cCmF static const uint32_t MAX_MANIFEST_ENTRIES = 100; @@ -74,10 +64,8 @@ static const uint32_t MAX_MANIFEST_FILE_INFO_ENTRIES = 10000; struct file_info { // Index to n_files. uint32_t index; - // Hash of referenced file. - uint8_t hash[16]; - // Hashed content byte count. - uint32_t hsize; + // Digest of referenced file. + struct digest digest; // Size of referenced file. uint64_t fsize; // mtime of referenced file. @@ -91,20 +79,14 @@ struct object { uint32_t n_file_info_indexes; // Indexes to file_infos. uint32_t *file_info_indexes; - // Hash of the object itself. - struct file_hash hash; + // Name of the object itself. + struct digest name; }; struct manifest { // Version of decoded file. uint8_t version; - // Reserved for future use. - uint16_t reserved; - - // Size of hash fields (in bytes). - uint8_t hash_size; - // Referenced include files. uint32_t n_files; char **files; @@ -137,8 +119,7 @@ file_infos_equal(void *key1, void *key2) struct file_info *fi1 = (struct file_info *)key1; struct file_info *fi2 = (struct file_info *)key2; return fi1->index == fi2->index - && memcmp(fi1->hash, fi2->hash, 16) == 0 - && fi1->hsize == fi2->hsize + && digests_equal(&fi1->digest, &fi2->digest) && fi1->fsize == fi2->fsize && fi1->mtime == fi2->mtime && fi1->ctime == fi2->ctime; @@ -217,7 +198,6 @@ static struct manifest * create_empty_manifest(void) { struct manifest *mf = x_malloc(sizeof(*mf)); - mf->hash_size = 16; mf->n_files = 0; mf->files = NULL; mf->n_file_infos = 0; @@ -250,15 +230,9 @@ read_manifest(gzFile f, char **errmsg) goto error; } - READ_BYTE(mf->hash_size); - if (mf->hash_size != 16) { - // Temporary measure until we support different hash algorithms. - *errmsg = - format("Manifest file has unsupported hash size %u", mf->hash_size); - goto error; - } - - READ_INT(2, mf->reserved); + char dummy[3]; // Legacy "hash size" + "reserved". TODO: Remove. + READ_BYTES(3, dummy); + (void)dummy; READ_INT(4, mf->n_files); mf->files = x_calloc(mf->n_files, sizeof(*mf->files)); @@ -270,8 +244,7 @@ read_manifest(gzFile f, char **errmsg) mf->file_infos = x_calloc(mf->n_file_infos, sizeof(*mf->file_infos)); for (uint32_t i = 0; i < mf->n_file_infos; i++) { READ_INT(4, mf->file_infos[i].index); - READ_BYTES(mf->hash_size, mf->file_infos[i].hash); - READ_INT(4, mf->file_infos[i].hsize); + READ_BYTES(DIGEST_SIZE, mf->file_infos[i].digest.bytes); READ_INT(8, mf->file_infos[i].fsize); READ_INT(8, mf->file_infos[i].mtime); READ_INT(8, mf->file_infos[i].ctime); @@ -287,8 +260,7 @@ read_manifest(gzFile f, char **errmsg) for (uint32_t j = 0; j < mf->objects[i].n_file_info_indexes; j++) { READ_INT(4, mf->objects[i].file_info_indexes[j]); } - READ_BYTES(mf->hash_size, mf->objects[i].hash.hash); - READ_INT(4, mf->objects[i].hash.hsize); + READ_BYTES(DIGEST_SIZE, mf->objects[i].name.bytes); } return mf; @@ -336,8 +308,8 @@ write_manifest(gzFile f, const struct manifest *mf) { WRITE_INT(4, MAGIC); WRITE_INT(1, MANIFEST_VERSION); - WRITE_INT(1, 16); - WRITE_INT(2, 0); + WRITE_INT(1, 16); // Legacy hash size field. TODO: Remove. + WRITE_INT(2, 0); // Legacy "reserved" field. TODO: Remove. WRITE_INT(4, mf->n_files); for (uint32_t i = 0; i < mf->n_files; i++) { @@ -347,8 +319,7 @@ write_manifest(gzFile f, const struct manifest *mf) WRITE_INT(4, mf->n_file_infos); for (uint32_t i = 0; i < mf->n_file_infos; i++) { WRITE_INT(4, mf->file_infos[i].index); - WRITE_BYTES(mf->hash_size, mf->file_infos[i].hash); - WRITE_INT(4, mf->file_infos[i].hsize); + WRITE_BYTES(DIGEST_SIZE, mf->file_infos[i].digest.bytes); WRITE_INT(8, mf->file_infos[i].fsize); WRITE_INT(8, mf->file_infos[i].mtime); WRITE_INT(8, mf->file_infos[i].ctime); @@ -360,8 +331,7 @@ write_manifest(gzFile f, const struct manifest *mf) for (uint32_t j = 0; j < mf->objects[i].n_file_info_indexes; j++) { WRITE_INT(4, mf->objects[i].file_info_indexes[j]); } - WRITE_BYTES(mf->hash_size, mf->objects[i].hash.hash); - WRITE_INT(4, mf->objects[i].hash.hsize); + WRITE_BYTES(DIGEST_SIZE, mf->objects[i].name.bytes); } return 1; @@ -423,7 +393,7 @@ verify_object(struct conf *conf, struct manifest *mf, struct object *obj, } } - struct file_hash *actual = hashtable_search(hashed_files, path); + struct digest *actual = hashtable_search(hashed_files, path); if (!actual) { struct hash *hash = hash_init(); int result = hash_source_code_file(conf, hash, path); @@ -436,15 +406,14 @@ verify_object(struct conf *conf, struct manifest *mf, struct object *obj, hash_free(hash); return 0; } - actual = x_malloc(sizeof(*actual)); - hash_result_as_bytes(hash, actual->hash); - actual->hsize = hash_input_size(hash); + actual = malloc(sizeof(*actual)); + hash_result_as_bytes(hash, actual); + hashtable_insert(hashed_files, x_strdup(path), actual); hash_free(hash); } - if (memcmp(fi->hash, actual->hash, mf->hash_size) != 0 - || fi->hsize != actual->hsize) { - return 0; + if (!digests_equal(&fi->digest, actual)) { + return false; } } @@ -498,14 +467,13 @@ get_include_file_index(struct manifest *mf, char *path, static uint32_t get_file_hash_index(struct manifest *mf, char *path, - struct file_hash *file_hash, + struct digest *digest, struct hashtable *mf_files, struct hashtable *mf_file_infos) { struct file_info fi; fi.index = get_include_file_index(mf, path, mf_files); - memcpy(fi.hash, file_hash->hash, sizeof(fi.hash)); - fi.hsize = file_hash->hsize; + fi.digest = *digest; // file_stat.st_{m,c}time has a resolution of 1 second, so we can cache the // file's mtime and ctime only if they're at least one second older than @@ -527,7 +495,7 @@ get_file_hash_index(struct manifest *mf, } else { fi.mtime = -1; fi.ctime = -1; - fi.fsize = file_hash->hsize; + fi.fsize = 0; } uint32_t *fi_index = hashtable_search(mf_file_infos, &fi); @@ -560,8 +528,8 @@ add_file_info_indexes(uint32_t *indexes, uint32_t size, uint32_t i = 0; do { char *path = hashtable_iterator_key(iter); - struct file_hash *file_hash = hashtable_iterator_value(iter); - indexes[i] = get_file_hash_index(mf, path, file_hash, mf_files, + struct digest *digest = hashtable_iterator_value(iter); + indexes[i] = get_file_hash_index(mf, path, digest, mf_files, mf_file_infos); i++; } while (hashtable_iterator_advance(iter)); @@ -573,7 +541,7 @@ add_file_info_indexes(uint32_t *indexes, uint32_t size, static void add_object_entry(struct manifest *mf, - struct file_hash *object_hash, + struct digest *object_hash, struct hashtable *included_files) { uint32_t n_objs = mf->n_objects; @@ -585,20 +553,19 @@ add_object_entry(struct manifest *mf, obj->n_file_info_indexes = n_fii; obj->file_info_indexes = x_malloc(n_fii * sizeof(*obj->file_info_indexes)); add_file_info_indexes(obj->file_info_indexes, n_fii, mf, included_files); - memcpy(obj->hash.hash, object_hash->hash, mf->hash_size); - obj->hash.hsize = object_hash->hsize; + obj->name = *object_hash; } // Try to get the object hash from a manifest file. Caller frees. Returns NULL // on failure. -struct file_hash * +struct digest * manifest_get(struct conf *conf, const char *manifest_path) { gzFile f = NULL; struct manifest *mf = NULL; - struct hashtable *hashed_files = NULL; // path --> struct file_hash + struct hashtable *hashed_files = NULL; // path --> struct digest struct hashtable *stated_files = NULL; // path --> struct file_stats - struct file_hash *fh = NULL; + struct digest *name = NULL; int fd = open(manifest_path, O_RDONLY | O_BINARY); if (fd == -1) { @@ -627,8 +594,8 @@ manifest_get(struct conf *conf, const char *manifest_path) for (uint32_t i = mf->n_objects; i > 0; i--) { if (verify_object(conf, mf, &mf->objects[i - 1], stated_files, hashed_files)) { - fh = x_malloc(sizeof(*fh)); - *fh = mf->objects[i - 1].hash; + name = x_malloc(sizeof(*name)); + *name = mf->objects[i - 1].name; goto out; } } @@ -646,17 +613,17 @@ out: if (mf) { free_manifest(mf); } - if (fh) { + if (name) { // Update modification timestamp to save files from LRU cleanup. update_mtime(manifest_path); } - return fh; + return name; } // Put the object name into a manifest file given a set of included files. // Returns true on success, otherwise false. bool -manifest_put(const char *manifest_path, struct file_hash *object_hash, +manifest_put(const char *manifest_path, struct digest *object_hash, struct hashtable *included_files) { int ret = 0; @@ -784,38 +751,32 @@ manifest_dump(const char *manifest_path, FILE *stream) (MAGIC >> 8) & 0xFF, MAGIC & 0xFF); fprintf(stream, "Version: %u\n", mf->version); - fprintf(stream, "Hash size: %u\n", (unsigned)mf->hash_size); - fprintf(stream, "Reserved field: %u\n", (unsigned)mf->reserved); fprintf(stream, "File paths (%u):\n", (unsigned)mf->n_files); for (unsigned i = 0; i < mf->n_files; ++i) { fprintf(stream, " %u: %s\n", i, mf->files[i]); } fprintf(stream, "File infos (%u):\n", (unsigned)mf->n_file_infos); for (unsigned i = 0; i < mf->n_file_infos; ++i) { - char *hash; + char digest[DIGEST_STRING_BUFFER_SIZE]; fprintf(stream, " %u:\n", i); fprintf(stream, " Path index: %u\n", mf->file_infos[i].index); - hash = format_hash_as_string(mf->file_infos[i].hash, -1); - fprintf(stream, " Hash: %s\n", hash); - free(hash); - fprintf(stream, " Hashed bytes: %u\n", mf->file_infos[i].hsize); + digest_as_string(&mf->file_infos[i].digest, digest); + fprintf(stream, " Hash: %s\n", digest); fprintf(stream, " File size: %" PRIu64 "\n", mf->file_infos[i].fsize); fprintf(stream, " Mtime: %lld\n", (long long)mf->file_infos[i].mtime); fprintf(stream, " Ctime: %lld\n", (long long)mf->file_infos[i].ctime); } fprintf(stream, "Results (%u):\n", (unsigned)mf->n_objects); for (unsigned i = 0; i < mf->n_objects; ++i) { - char *hash; + char name[DIGEST_STRING_BUFFER_SIZE]; fprintf(stream, " %u:\n", i); fprintf(stream, " File info indexes:"); for (unsigned j = 0; j < mf->objects[i].n_file_info_indexes; ++j) { fprintf(stream, " %u", mf->objects[i].file_info_indexes[j]); } fprintf(stream, "\n"); - hash = format_hash_as_string(mf->objects[i].hash.hash, -1); - fprintf(stream, " Hash: %s\n", hash); - free(hash); - fprintf(stream, " Size: %u\n", (unsigned)mf->objects[i].hash.hsize); + digest_as_string(&mf->objects[i].name, name); + fprintf(stream, " Hash: %s\n", name); } ret = true; diff --git a/src/manifest.h b/src/manifest.h index 98022d1ab..ff2aca622 100644 --- a/src/manifest.h +++ b/src/manifest.h @@ -7,8 +7,8 @@ #define MANIFEST_VERSION 2 -struct file_hash *manifest_get(struct conf *conf, const char *manifest_path); -bool manifest_put(const char *manifest_path, struct file_hash *object_hash, +struct digest *manifest_get(struct conf *conf, const char *manifest_path); +bool manifest_put(const char *manifest_path, struct digest *object_hash, struct hashtable *included_files); bool manifest_dump(const char *manifest_path, FILE *stream); diff --git a/src/util.c b/src/util.c index 23f56a966..07cf48ae6 100644 --- a/src/util.c +++ b/src/util.c @@ -27,10 +27,6 @@ #include #endif -#ifdef HAVE_ARPA_INET_H -#include -#endif - #ifdef _WIN32 #include #include @@ -512,37 +508,6 @@ tmp_string(void) return ret; } -// Return the hash result as a hex string. Size -1 means don't include size -// suffix. Caller frees. -char * -format_hash_as_string(const unsigned char *hash, int size) -{ - int i; - char *ret = x_malloc(53); - for (i = 0; i < 16; i++) { - sprintf(&ret[i*2], "%02x", (unsigned) hash[i]); - } - if (size >= 0) { - sprintf(&ret[i*2], "-%d", size); - } - return ret; -} - -// Return the hash result in binary. -void format_hash_as_binary(binary result, const unsigned char *hash, int size) -{ - memcpy(result, hash, 16); -#ifdef HAVE_HTONL - result[4] = htonl(size); // network byte order -#else - uint32_t i = size; - unsigned char *bytes = (unsigned char *) result; - for (int j = 0; j < 4; j++) { - bytes[16 + j] = (i >> ((3 - j) * 8)) & 0xff; // (big endian) - } -#endif -} - static char const CACHEDIR_TAG[] = "Signature: 8a477f597d28d172789f06886806bc55\n" "# This file is a cache directory tag created by ccache.\n" @@ -599,17 +564,15 @@ format(const char *format, ...) return ptr; } -// Construct a string representing data. Caller frees -char * -format_hex(unsigned char *data, size_t size) +// Construct a hexadecimal string representing binary data. The buffer must +// hold at least 2 * size + 1 bytes. +void +format_hex(const uint8_t *data, size_t size, char *buffer) { - size_t i; - char *ret = x_malloc(2 * size + 1); - for (i = 0; i < size; i++) { - sprintf(&ret[i*2], "%02x", (unsigned) data[i]); + for (size_t i = 0; i < size; i++) { + sprintf(&buffer[i*2], "%02x", (unsigned)data[i]); } - ret[2 * size] = '\0'; - return ret; + buffer[2 * size] = '\0'; } // This is like strdup() but dies if the malloc fails. diff --git a/test/suites/base.bash b/test/suites/base.bash index dc02939a0..bafb876a5 100644 --- a/test/suites/base.bash +++ b/test/suites/base.bash @@ -1058,12 +1058,11 @@ EOF # ------------------------------------------------------------------------- TEST "--hash-file" - >empty - $CCACHE --hash-file empty > hash.out + $CCACHE --hash-file /dev/null > hash.out printf "a" | $CCACHE --hash-file - >> hash.out - if grep '31d6cfe0d16ae931b73c59d7e0c089c0-0' hash.out >/dev/null 2>&1 && \ - grep 'bde52cb31de33e46245e05fbdbd6fb24-1' hash.out >/dev/null 2>&1; then + if grep '31d6cfe0d16ae931b73c59d7e0c089c000000000' hash.out >/dev/null && \ + grep 'bde52cb31de33e46245e05fbdbd6fb2400000001' hash.out >/dev/null; then : OK else test_failed "Unexpected output of --hash-file" diff --git a/test/suites/direct.bash b/test/suites/direct.bash index e3765f672..71810e933 100644 --- a/test/suites/direct.bash +++ b/test/suites/direct.bash @@ -883,9 +883,9 @@ EOF manifest=`find $CCACHE_DIR -name '*.manifest'` $CCACHE --dump-manifest $manifest >manifest.dump - if grep 'Hash: d4de2f956b4a386c6660990a7a1ab13f' manifest.dump >/dev/null 2>&1 && \ - grep 'Hash: e94ceb9f1b196c387d098a5f1f4fe862' manifest.dump >/dev/null 2>&1 && \ - grep 'Hash: ba753bebf9b5eb99524bb7447095e2e6' manifest.dump >/dev/null 2>&1; then + if grep 'Hash: d4de2f956b4a386c6660990a7a1ab13f0000001e' manifest.dump >/dev/null && \ + grep 'Hash: e94ceb9f1b196c387d098a5f1f4fe8620000000b' manifest.dump >/dev/null && \ + grep 'Hash: ba753bebf9b5eb99524bb7447095e2e60000000b' manifest.dump >/dev/null; then : OK else test_failed "Unexpected output of --dump-manifest" diff --git a/unittest/framework.c b/unittest/framework.c index 9e4a67df6..3939fd84b 100644 --- a/unittest/framework.c +++ b/unittest/framework.c @@ -210,23 +210,21 @@ cct_check_int_eq(const char *file, int line, const char *expression, } bool cct_check_data_eq(const char *file, int line, const char *expression, - void *expected, void *actual, size_t size) + void *expected, void *actual, size_t size) { - bool result; - - if (expected && actual && memcmp(actual, expected, size) == 0) { + if (memcmp(actual, expected, size) == 0) { cct_check_passed(file, line, expression); - result = true; + return true; } else { - char *exp_str = expected ? format_hex((unsigned char *) expected, size) : x_strdup("(null)"); - char *act_str = actual ? format_hex((unsigned char *) actual, size) : x_strdup("(null)"); + char *exp_str = x_malloc(2 * size + 1); + char *act_str = x_malloc(2 * size + 1); + format_hex(expected, size, exp_str); + format_hex(actual, size, act_str); cct_check_failed(file, line, expression, exp_str, act_str); free(exp_str); free(act_str); - result = false; + return false; } - - return result; } bool diff --git a/unittest/test_hash.c b/unittest/test_hash.c index 2e9502dfd..7e7df5c5e 100644 --- a/unittest/test_hash.c +++ b/unittest/test_hash.c @@ -24,24 +24,29 @@ TEST_SUITE(mdfour) TEST(test_vectors_from_rfc_1320_should_be_correct) { + char d[DIGEST_STRING_BUFFER_SIZE]; + { struct hash *h = hash_init(); hash_string(h, ""); - CHECK_STR_EQ_FREE2("31d6cfe0d16ae931b73c59d7e0c089c0-0", hash_result(h)); + hash_result_as_string(h, d); + CHECK_STR_EQ("31d6cfe0d16ae931b73c59d7e0c089c000000000", d); hash_free(h); } { struct hash *h = hash_init(); hash_string(h, "a"); - CHECK_STR_EQ_FREE2("bde52cb31de33e46245e05fbdbd6fb24-1", hash_result(h)); + hash_result_as_string(h, d); + CHECK_STR_EQ("bde52cb31de33e46245e05fbdbd6fb2400000001", d); hash_free(h); } { struct hash *h = hash_init(); hash_string(h, "message digest"); - CHECK_STR_EQ_FREE2("d9130a8164549fe818874806e1c7014b-14", hash_result(h)); + hash_result_as_string(h, d); + CHECK_STR_EQ("d9130a8164549fe818874806e1c7014b0000000e", d); hash_free(h); } @@ -51,30 +56,49 @@ TEST(test_vectors_from_rfc_1320_should_be_correct) h, "12345678901234567890123456789012345678901234567890123456789012345678901" "234567890"); - CHECK_STR_EQ_FREE2("e33b4ddc9c38f2199c3e7b164fcc0536-80", hash_result(h)); + hash_result_as_string(h, d); + CHECK_STR_EQ("e33b4ddc9c38f2199c3e7b164fcc053600000050", d); hash_free(h); } } TEST(hash_result_should_not_alter_state) { + char d[DIGEST_STRING_BUFFER_SIZE]; struct hash *h = hash_init(); hash_string(h, "message"); - free(hash_result(h)); + hash_result_as_string(h, d); hash_string(h, " digest"); - CHECK_STR_EQ_FREE2("d9130a8164549fe818874806e1c7014b-14", hash_result(h)); + hash_result_as_string(h, d); + CHECK_STR_EQ("d9130a8164549fe818874806e1c7014b0000000e", d); hash_free(h); } TEST(hash_result_should_be_idempotent) { + char d[DIGEST_STRING_BUFFER_SIZE]; struct hash *h = hash_init(); - hash_string(h, ""); - CHECK_STR_EQ_FREE2("31d6cfe0d16ae931b73c59d7e0c089c0-0", hash_result(h)); - CHECK_STR_EQ_FREE2("31d6cfe0d16ae931b73c59d7e0c089c0-0", hash_result(h)); + hash_result_as_string(h, d); + CHECK_STR_EQ("31d6cfe0d16ae931b73c59d7e0c089c000000000", d); + hash_result_as_string(h, d); + CHECK_STR_EQ("31d6cfe0d16ae931b73c59d7e0c089c000000000", d); hash_free(h); } +TEST(hash_result_as_bytes) +{ + struct hash *h = hash_init(); + hash_string(h, "message digest"); + struct digest d; + hash_result_as_bytes(h, &d); + uint8_t expected[sizeof(d.bytes)] = { + 0xd9, 0x13, 0x0a, 0x81, 0x64, 0x54, 0x9f, 0xe8, 0x18, 0x87, 0x48, 0x06, + 0xe1, 0xc7, 0x01, 0x4b, 0x00, 0x00, 0x00, 0x0e + }; + CHECK_DATA_EQ(d.bytes, expected, sizeof(d.bytes)); + hash_free(h); +} + TEST_SUITE_END diff --git a/unittest/test_hashutil.c b/unittest/test_hashutil.c index d05769e5f..0583f235e 100644 --- a/unittest/test_hashutil.c +++ b/unittest/test_hashutil.c @@ -25,12 +25,17 @@ TEST_SUITE(hashutil) TEST(hash_command_output_simple) { + char d1[DIGEST_STRING_BUFFER_SIZE]; + char d2[DIGEST_STRING_BUFFER_SIZE]; + struct hash *h1 = hash_init(); struct hash *h2 = hash_init(); CHECK(hash_command_output(h1, "echo", "not used")); CHECK(hash_command_output(h2, "echo", "not used")); - CHECK(hash_equal(h1, h2)); + hash_result_as_string(h1, d1); + hash_result_as_string(h2, d2); + CHECK_STR_EQ(d1, d2); hash_free(h2); hash_free(h1); @@ -38,12 +43,17 @@ TEST(hash_command_output_simple) TEST(hash_command_output_space_removal) { + char d1[DIGEST_STRING_BUFFER_SIZE]; + char d2[DIGEST_STRING_BUFFER_SIZE]; + struct hash *h1 = hash_init(); struct hash *h2 = hash_init(); CHECK(hash_command_output(h1, "echo", "not used")); CHECK(hash_command_output(h2, " echo ", "not used")); - CHECK(hash_equal(h1, h2)); + hash_result_as_string(h1, d1); + hash_result_as_string(h2, d2); + CHECK_STR_EQ(d1, d2); hash_free(h2); hash_free(h1); @@ -51,12 +61,17 @@ TEST(hash_command_output_space_removal) TEST(hash_command_output_hash_inequality) { + char d1[DIGEST_STRING_BUFFER_SIZE]; + char d2[DIGEST_STRING_BUFFER_SIZE]; + struct hash *h1 = hash_init(); struct hash *h2 = hash_init(); CHECK(hash_command_output(h1, "echo foo", "not used")); CHECK(hash_command_output(h2, "echo bar", "not used")); - CHECK(!hash_equal(h1, h2)); + hash_result_as_string(h1, d1); + hash_result_as_string(h2, d2); + CHECK(!str_eq(d1, d2)); hash_free(h2); hash_free(h1); @@ -64,12 +79,17 @@ TEST(hash_command_output_hash_inequality) TEST(hash_command_output_compiler_substitution) { + char d1[DIGEST_STRING_BUFFER_SIZE]; + char d2[DIGEST_STRING_BUFFER_SIZE]; + struct hash *h1 = hash_init(); struct hash *h2 = hash_init(); CHECK(hash_command_output(h1, "echo foo", "not used")); CHECK(hash_command_output(h2, "%compiler% foo", "echo")); - CHECK(hash_equal(h1, h2)); + hash_result_as_string(h1, d1); + hash_result_as_string(h2, d2); + CHECK_STR_EQ(d1, d2); hash_free(h2); hash_free(h1); @@ -77,6 +97,9 @@ TEST(hash_command_output_compiler_substitution) TEST(hash_command_output_stdout_versus_stderr) { + char d1[DIGEST_STRING_BUFFER_SIZE]; + char d2[DIGEST_STRING_BUFFER_SIZE]; + struct hash *h1 = hash_init(); struct hash *h2 = hash_init(); @@ -90,7 +113,9 @@ TEST(hash_command_output_stdout_versus_stderr) CHECK(hash_command_output(h1, "echo foo", "not used")); CHECK(hash_command_output(h2, "stderr.bat", "not used")); #endif - CHECK(hash_equal(h1, h2)); + hash_result_as_string(h1, d1); + hash_result_as_string(h2, d2); + CHECK_STR_EQ(d1, d2); hash_free(h2); hash_free(h1); @@ -98,6 +123,9 @@ TEST(hash_command_output_stdout_versus_stderr) TEST(hash_multicommand_output) { + char d1[DIGEST_STRING_BUFFER_SIZE]; + char d2[DIGEST_STRING_BUFFER_SIZE]; + struct hash *h1 = hash_init(); struct hash *h2 = hash_init(); @@ -111,7 +139,9 @@ TEST(hash_multicommand_output) CHECK(hash_multicommand_output(h2, "echo foo; echo bar", "not used")); CHECK(hash_multicommand_output(h1, "foo.bat", "not used")); #endif - CHECK(hash_equal(h1, h2)); + hash_result_as_string(h1, d1); + hash_result_as_string(h2, d2); + CHECK_STR_EQ(d1, d2); hash_free(h2); hash_free(h1); diff --git a/unittest/test_util.c b/unittest/test_util.c index b8e7384d7..07bd85c3b 100644 --- a/unittest/test_util.c +++ b/unittest/test_util.c @@ -1,4 +1,4 @@ -// Copyright (C) 2010-2018 Joel Rosdahl +// Copyright (C) 2010-2019 Joel Rosdahl // // This program is free software; you can redistribute it and/or modify it // under the terms of the GNU General Public License as published by the Free @@ -85,50 +85,6 @@ TEST(get_relative_path) #endif } -TEST(format_hash_as_string) -{ - unsigned char hash[16] = { - "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" - }; - - CHECK_STR_EQ_FREE2("00000000000000000000000000000000", - format_hash_as_string(hash, -1)); - CHECK_STR_EQ_FREE2("00000000000000000000000000000000-0", - format_hash_as_string(hash, 0)); - hash[0] = 17; - hash[15] = 42; - CHECK_STR_EQ_FREE2("1100000000000000000000000000002a-12345", - format_hash_as_string(hash, 12345)); -} - -TEST(format_hash_as_binary) -{ - unsigned char hash[16] = { - "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" - }; - - unsigned char data[20] = { - "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00" - }; - - binary buf; - CHECK_INT_EQ(sizeof(buf), 20); - - format_hash_as_binary(buf, hash, 0); - CHECK_DATA_EQ(data, buf, 20); - hash[0] = 17; - hash[15] = 42; - format_hash_as_binary(buf, hash, 12345); - // data[0:16] = hash - data[0] = 0x11; - data[15] = 0x2a; - // 12345 = 0x3039 BE - data[18] = 0x30; - data[19] = 0x39; - CHECK_DATA_EQ(data, buf, 20); -} - TEST(subst_env_in_string) { char *errmsg; @@ -237,13 +193,19 @@ TEST(format_command) TEST(format_hex) { - unsigned char none[] = ""; - unsigned char text[4] = "foo"; // incl. NUL - unsigned char data[4] = "\x00\x01\x02\x03"; + uint8_t none[] = ""; + uint8_t text[4] = "foo"; // incl. NUL + uint8_t data[4] = "\x00\x01\x02\x03"; + char result[2 * sizeof(data) + 1] = "."; + + format_hex(none, 0, result); + CHECK_STR_EQ("", result); + + format_hex(text, sizeof(text), result); + CHECK_STR_EQ("666f6f00", result); - CHECK_STR_EQ_FREE2("", format_hex(none, 0)); - CHECK_STR_EQ_FREE2("666f6f00", format_hex(text, sizeof(text))); - CHECK_STR_EQ_FREE2("00010203", format_hex(data, sizeof(data))); + format_hex(data, sizeof(data), result); + CHECK_STR_EQ("00010203", result); } TEST_SUITE_END