Revise disk format for results

author Joel Rosdahl <joel@rosdahl.net>

Thu, 30 May 2019 18:37:12 +0000 (20:37 +0200)

committer Joel Rosdahl <joel@rosdahl.net>

Tue, 4 Jun 2019 20:18:07 +0000 (22:18 +0200)
author Joel Rosdahl <joel@rosdahl.net>
Thu, 30 May 2019 18:37:12 +0000 (20:37 +0200)
committer Joel Rosdahl <joel@rosdahl.net>
Tue, 4 Jun 2019 20:18:07 +0000 (22:18 +0200)
diff --git a/src/ccache.c b/src/ccache.c

index 253268363f56ec623fa8d63ed68c04f669f1f2ac..b1fe0ff49e3e164880225e7b7d00a2461eab7da3 100644 (file)
--- a/src/ccache.c
+++ b/src/ccache.c
@@ -1387,7 +1387,7 @@ to_cache(struct args *args, struct hash *depend_mode_hash)
         }
         struct filelist *filelist = create_empty_filelist();
         if (st.st_size > 0) {
-               add_file_to_filelist(filelist, tmp_stderr, ".stderr");
+               add_file_to_filelist(filelist, tmp_stderr, "stderr");
         }
         add_file_to_filelist(filelist, output_obj, ".o");
         if (generating_dependencies) {
@@ -1812,6 +1812,9 @@ calculate_object_hash(struct args *args, struct hash *hash, int direct_mode)
  {
         bool found_ccbin = false;
  
+       hash_delimiter(hash, "result version");
+       hash_int(hash, RESULT_VERSION);
+
         if (direct_mode) {
                 hash_delimiter(hash, "manifest version");
                 hash_int(hash, MANIFEST_VERSION);
@@ -2151,7 +2154,7 @@ from_cache(enum fromcache_call_mode mode, bool put_object_in_manifest)
                         add_file_to_filelist(filelist, output_dwo, ".dwo");
                 }
         }
-       add_file_to_filelist(filelist, tmp_stderr, ".stderr");
+       add_file_to_filelist(filelist, tmp_stderr, "stderr");
         if (produce_dep_file) {
                 add_file_to_filelist(filelist, output_dep, ".d");
         }
diff --git a/src/manifest.c b/src/manifest.c

index 7b272b9c704bafabf0b9ec84940bc398ee11d93a..9d614f94b3f5b258fd02bde980641f60cbcd2283 100644 (file)
--- a/src/manifest.c
+++ b/src/manifest.c
@@ -22,9 +22,9 @@
  
  #include <zlib.h>
  
-// Sketchy specification of the manifest disk format:
+// Sketchy specification of the manifest data format:
  //
-// <magic>         magic number                        (4 bytes)
+// <magic>         magic number                        (4 bytes: cCmF)
  // <version>       file format version                 (1 byte unsigned int)
  // <hash_size>     size of the hash fields (in bytes)  (1 byte unsigned int)
  // <reserved>      reserved for future use             (2 bytes)
@@ -64,7 +64,7 @@
  // <hash[n-1]>
  // <size[n-1]>
  
-static const uint32_t MAGIC = 0x63436d46U;
+static const uint32_t MAGIC = 0x63436d46U; // cCmF
  static const uint32_t MAX_MANIFEST_ENTRIES = 100;
  static const uint32_t MAX_MANIFEST_FILE_INFO_ENTRIES = 10000;
  
diff --git a/src/result.c b/src/result.c

index 31dff3f0da0c18c5a3c298e89f4c1f72b6ab28ee..bc735208f3c9577c9407a826c436882c43a7f277 100644 (file)
--- a/src/result.c
+++ b/src/result.c
@@ -1,4 +1,4 @@
-// Copyright (C) 2009-2018 Joel Rosdahl
+// Copyright (C) 2019 Joel Rosdahl
  //
  // This program is free software; you can redistribute it and/or modify it
  // under the terms of the GNU General Public License as published by the Free
@@ -19,10 +19,63 @@
  
  #include <zlib.h>
  
-static const uint32_t MAGIC = 0x63436343U;
+// Result data format:
+//
+// <result>      ::= <header> <body> ; <body> is potentially compressed
+// <header>      ::= <magic> <version> <compr_type> <compr_level>
+// <body>        ::= <entry>* <eof_marker>
+// <eof_marker>  ::= 0 (uint8_t)
+// <magic>       ::= uint32_t ; "cCrS"
+// <version>     ::= uint8_t
+// <compr_type>  ::= <compr_none> | <compr_gzip>
+// <compr_none>  ::= 0
+// <compr_gzip>  ::= 1
+// <compr_level> ::= uint8_t
+// <entry>       ::= <file_entry> | <ref_entry>
+// <file_entry>  ::= <file_marker> <suffix_len> <suffix> <data_len> <data>
+// <file_marker> ::= 1 (uint8_t)
+// <suffix_len>  ::= uint8_t
+// <suffix>      ::= suffix_len bytes
+// <data_len>    ::= uint64_t
+// <data>        ::= data_len bytes
+// <ref_entry>   ::= <ref_marker> <key_len> <key>
+// <ref_marker>  ::= 2 (uint8_t)
+// <key_len>     ::= uint8_t
+// <key>         ::= key_len bytes
+//
+// Sketch of concrete layout:
+//
+// <magic>         4 bytes
+// <version>       1 byte
+// <compr_type>    1 byte
+// <compr_level>   1 byte
+// --- [potentially compressed from here ] -----------------------------------
+// <file_marker>   1 byte
+// <suffix_len>    1 byte
+// <suffix>        suffix_len bytes
+// <data_len>      8 bytes
+// <data>          data_len bytes
+// ...
+// <ref_marker>    1 byte
+// <key_len>       1 byte
+// <key>           key_len bytes
+// ...
+// <eof_marker>    1 byte
+
+static const char MAGIC[4] = "cCrS";
+
+enum {
+       EOF_MARKER = 0,
+       FILE_MARKER = 1,
+       REF_MARKER = 2
+};
+
+enum {
+       COMPR_TYPE_NONE = 0,
+       COMPR_TYPE_GZIP = 1
+};
  
  struct file {
-       uint32_t suffix_len;
         char *suffix;
         uint32_t path_len;
         char *path;
@@ -54,7 +107,6 @@ add_file_to_filelist(struct filelist *l, const char *path, const char *suffix)
         struct file *f = &l->files[l->n_files];
         l->n_files++;
  
-       f->suffix_len = strlen(suffix);
         f->suffix = x_strdup(suffix);
         f->path_len = strlen(path);
         f->path = x_strdup(path);
@@ -100,24 +152,11 @@ free_filelist(struct filelist *l)
                 (var) = u_; \
         } while (false)
  
-#define READ_STR(var) \
+#define READ_BYTES(length, buf) \
         do { \
-               char buf_[1024]; \
-               size_t i_; \
-               for (i_ = 0; i_ < sizeof(buf_); i_++) { \
-                       int ch_ = gzgetc(f); \
-                       if (ch_ == EOF) { \
-                               goto error; \
-                       } \
-                       buf_[i_] = ch_; \
-                       if (ch_ == '\0') { \
-                               break; \
-                       } \
-               } \
-               if (i_ == sizeof(buf_)) { \
+               if (gzread(f, buf, length) != length) { \
                         goto error; \
                 } \
-               (var) = x_strdup(buf_); \
         } while (false)
  
  #define READ_FILE(size, path) \
@@ -135,71 +174,122 @@ free_filelist(struct filelist *l)
                 fclose(f_); \
         } while (false)
  
-
-static struct filelist *
-read_cache(gzFile f, struct filelist *l, bool copy)
+static bool
+read_cache(const char *path, struct filelist *l, FILE *dump_stream)
  {
-       uint32_t magic;
-       READ_INT(4, magic);
-       if (magic != MAGIC) {
-               cc_log("Cache file has bad magic number %u", magic);
-               goto error;
+       int fd = open(path, O_RDONLY | O_BINARY);
+       if (fd == -1) {
+               // Cache miss.
+               cc_log("No such cache file");
+               return false;
+       }
+
+       char header[7];
+       if (read(fd, header, sizeof(header)) != (ssize_t)sizeof(header)) {
+               close(fd);
+               cc_log("Failed to read result file header");
+               return false;
+       }
+
+       if (memcmp(header, MAGIC, sizeof(MAGIC)) != 0) {
+               cc_log("Cache file has bad magic value 0x%x%x%x%x",
+                      header[0], header[1], header[2], header[3]);
+               // TODO: Return error message like read_manifest does.
+               return false;
         }
  
-       uint8_t version;
-       READ_BYTE(version);
-       (void)version;
+       // TODO: Verify version like read_manifest does.
+       const uint8_t version = header[4];
+       const uint8_t compr_type = header[5];
+       switch (compr_type) {
+       case COMPR_TYPE_NONE:
+       case COMPR_TYPE_GZIP:
+               break;
+
+       default:
+               cc_log("Unknown compression type: %u", compr_type);
+               return false;
+       }
  
-       uint8_t hash_size;
-       READ_INT(1, hash_size);
-       (void)hash_size;
+       if (dump_stream) {
+               const uint8_t compr_level = header[6];
+               fprintf(dump_stream, "Magic: %c%c%c%c\n",
+                       MAGIC[0], MAGIC[1], MAGIC[2], MAGIC[3]);
+               fprintf(dump_stream, "Version: %u\n", version);
+               fprintf(dump_stream, "Compression type: %s\n",
+                       compr_type == COMPR_TYPE_NONE ? "none" : "gzip");
+               fprintf(dump_stream, "Compression level: %u\n", compr_level);
+       }
  
-       uint16_t reserved;
-       READ_INT(2, reserved);
-       (void)reserved;
+       gzFile f = gzdopen(fd, "rb");
+       if (!f) {
+               close(fd);
+               cc_log("Failed to gzdopen result file");
+               return false;
+       }
  
-       uint32_t n_files;
-       READ_INT(4, n_files);
+       uint8_t marker;
+       for (uint32_t i = 0; ; i++) {
+               READ_BYTE(marker);
+               switch (marker) {
+               case EOF_MARKER:
+                       gzclose(f);
+                       return true;
+
+               case FILE_MARKER:
+                       break;
+
+               case REF_MARKER:
+                       // TODO: Implement.
+                       continue;
+
+               default:
+                       cc_log("Unknown entry type: %u", marker);
+                       goto error;
+               }
  
-       for (uint32_t i = 0; i < n_files; i++) {
-               uint32_t sufflen;
-               READ_INT(4, sufflen);
-               char *suffix;
-               READ_STR(suffix);
+               uint8_t suffix_len;
+               READ_BYTE(suffix_len);
  
-               uint32_t filelen;
-               READ_INT(4, filelen);
+               char suffix[256 + 1];
+               READ_BYTES(suffix_len, suffix);
+               suffix[suffix_len] = '\0';
  
-               cc_log("Reading file #%d: %s (%u)", i, suffix, filelen);
+               uint64_t filelen;
+               READ_INT(8, filelen);
+
+               cc_log("Reading entry #%u: %s (%lu)",
+                      i,
+                      str_eq(suffix, "stderr") ? "<stderr>" : suffix,
+                      (unsigned long)filelen);
  
                 bool found = false;
-               if (copy) {
+               if (dump_stream) {
+                       fprintf(dump_stream,
+                               "Entry: %s (size: %" PRIu64 " bytes)\n",
+                               str_eq(suffix, "stderr") ? "<stderr>" : suffix,
+                               filelen);
+               } else {
                         for (uint32_t j = 0; j < l->n_files; j++) {
-                               if (sufflen == l->files[j].suffix_len &&
-                               str_eq(suffix, l->files[j].suffix)) {
+                               if (str_eq(suffix, l->files[j].suffix)) {
                                         found = true;
  
-                                       cc_log("Copying %s from cache", l->files[i].path);
+                                       cc_log("Copying file to %s", l->files[i].path);
  
                                         READ_FILE(filelen, l->files[j].path);
                                 }
                         }
-               } else {
-                       add_file_to_filelist(l, "", suffix);
-                       l->sizes[l->n_files-1] = filelen;
                 }
                 if (!found) {
                         // Skip the data, if no match
                         gzseek(f, filelen, SEEK_CUR);
                 }
-
-               free(suffix);
         }
-       return l;
  
  error:
+       gzclose(f);
         cc_log("Corrupt cache file");
-       return NULL;
+       return false;
  }
  
  #define WRITE_BYTE(var) \
@@ -222,9 +312,9 @@ error:
                 } \
         } while (false)
  
-#define WRITE_STR(var) \
+#define WRITE_BYTES(length, buf) \
         do { \
-               if (gzputs(f, var) == EOF || gzputc(f, '\0') == EOF) { \
+               if (gzwrite(f, buf, length) != (long)length) { \
                         goto error; \
                 } \
         } while (false)
@@ -247,31 +337,25 @@ error:
  static int
  write_cache(gzFile f, const struct filelist *l)
  {
-       WRITE_INT(4, MAGIC);
-
-       WRITE_BYTE(RESULT_VERSION);
-       WRITE_INT(1, 16);
-       WRITE_INT(2, 0);
-
-       WRITE_INT(4, l->n_files);
         for (uint32_t i = 0; i < l->n_files; i++) {
                 struct stat st;
                 if (x_stat(l->files[i].path, &st) != 0) {
                         return -1;
                 }
  
-               cc_log("Writing file #%d: %s (%ld)", i, l->files[i].suffix,
-                      (long)st.st_size);
-
-               WRITE_INT(4, l->files[i].suffix_len);
-               WRITE_STR(l->files[i].suffix);
+               cc_log("Writing file #%u: %s (%lu)", i, l->files[i].suffix,
+                      (unsigned long)st.st_size);
  
-               cc_log("Copying %s to cache", l->files[i].path);
-
-               WRITE_INT(4, st.st_size);
+               WRITE_BYTE(FILE_MARKER);
+               size_t suffix_len = strlen(l->files[i].suffix);
+               WRITE_BYTE(suffix_len);
+               WRITE_BYTES(suffix_len, l->files[i].suffix);
+               WRITE_INT(8, st.st_size);
                 WRITE_FILE(st.st_size, l->files[i].path);
         }
  
+       WRITE_BYTE(EOF_MARKER);
+
         return 1;
  
  error:
@@ -279,34 +363,10 @@ error:
         return 0;
  }
  
-bool cache_get(const char *cache_path, struct filelist *l)
+bool cache_get(const char *path, struct filelist *l)
  {
-       int ret = 0;
-       gzFile f = NULL;
-
-       int fd = open(cache_path, O_RDONLY | O_BINARY);
-       if (fd == -1) {
-               // Cache miss.
-               cc_log("No such cache file");
-               goto out;
-       }
-       f = gzdopen(fd, "rb");
-       if (!f) {
-               close(fd);
-               cc_log("Failed to gzdopen cache file");
-               goto out;
-       }
-       l = read_cache(f, l, true);
-       if (!l) {
-               cc_log("Error reading cache file");
-               goto out;
-       }
-       ret = 1;
-out:
-       if (f) {
-               gzclose(f);
-       }
-       return ret;
+       cc_log("Getting result %s from cache", path);
+       return read_cache(path, l, NULL);
  }
  
  bool cache_put(const char *cache_path, struct filelist *l, int compression_level)
@@ -314,21 +374,32 @@ bool cache_put(const char *cache_path, struct filelist *l, int compression_level
         int ret = 0;
         gzFile f2 = NULL;
         char *tmp_file = NULL;
-       char *mode;
  
         tmp_file = format("%s.tmp", cache_path);
         int fd = create_tmp_fd(&tmp_file);
+
+       char header[7];
+       memcpy(header, MAGIC, sizeof(MAGIC));
+       header[4] = RESULT_VERSION;
+       header[5] = compression_level == 0 ? COMPR_TYPE_NONE : COMPR_TYPE_GZIP;
+       header[6] = compression_level;
+       if (write(fd, header, sizeof(header)) != (ssize_t)sizeof(header)) {
+               cc_log("Failed to write to %s", tmp_file);
+               close(fd);
+       }
+
+       char *mode;
         if (compression_level > 0) {
                 mode = format("wb%d", compression_level);
         } else {
                 mode = x_strdup("wbT");
         }
         f2 = gzdopen(fd, mode);
+       free(mode);
         if (!f2) {
                 cc_log("Failed to gzdopen %s", tmp_file);
                 goto out;
         }
-       free(mode);
  
         if (write_cache(f2, l)) {
                 gzclose(f2);
@@ -356,46 +427,5 @@ out:
  bool
  cache_dump(const char *cache_path, FILE *stream)
  {
-       struct filelist *l = create_empty_filelist();
-       gzFile f = NULL;
-       bool ret = false;
-
-       int fd = open(cache_path, O_RDONLY | O_BINARY);
-       if (fd == -1) {
-               fprintf(stderr, "No such cache file: %s\n", cache_path);
-               goto out;
-       }
-       f = gzdopen(fd, "rb");
-       if (!f) {
-               fprintf(stderr, "Failed to gzdopen cache file\n");
-               close(fd);
-               goto out;
-       }
-       l = read_cache(f, l, false);
-       if (!l) {
-               fprintf(stderr, "Error reading cache file\n");
-               goto out;
-       }
-
-       fprintf(stream, "Magic: %c%c%c%c\n",
-               (MAGIC >> 24) & 0xFF,
-               (MAGIC >> 16) & 0xFF,
-               (MAGIC >> 8) & 0xFF,
-               MAGIC & 0xFF);
-       fprintf(stream, "File paths (%u):\n", (unsigned)l->n_files);
-       for (unsigned i = 0; i < l->n_files; ++i) {
-               fprintf(stream, "  %u: %s (%s)\n", i, l->files[i].suffix,
-                               format_human_readable_size(l->sizes[i]));
-       }
-
-       ret = true;
-
-out:
-       if (l) {
-               free_filelist(l);
-       }
-       if (f) {
-               gzclose(f);
-       }
-       return ret;
+       return read_cache(cache_path, NULL, stream);
  }
author	Joel Rosdahl <joel@rosdahl.net>
	Thu, 30 May 2019 18:37:12 +0000 (20:37 +0200)
committer	Joel Rosdahl <joel@rosdahl.net>
	Tue, 4 Jun 2019 20:18:07 +0000 (22:18 +0200)
src/ccache.c		patch \| blob \| blame \| history
src/manifest.c		patch \| blob \| blame \| history
src/result.c		patch \| blob \| blame \| history