]> git.ipfire.org Git - thirdparty/git.git/blobdiff - sha1_file.c
Unmap individual windows rather than entire files.
[thirdparty/git.git] / sha1_file.c
index 1c4df5b73e9dff900c69c0994eb21105614511b6..8e14a5a882c8ff7893b1d548390f7359f60d8fa4 100644 (file)
@@ -397,7 +397,6 @@ static char *find_sha1_file(const unsigned char *sha1, struct stat *st)
        return NULL;
 }
 
-#define PACK_MAX_SZ (1<<26)
 static int pack_used_ctr;
 static unsigned long pack_mapped;
 struct packed_git *packed_git;
@@ -451,86 +450,120 @@ static int check_packed_git_idx(const char *path, unsigned long *idx_size_,
        return 0;
 }
 
-static int unuse_one_packed_git(void)
+static int unuse_one_window(void)
 {
-       struct packed_git *p, *lru = NULL;
+       struct packed_git *p, *lru_p = NULL;
+       struct pack_window *w, *w_l, *lru_w = NULL, *lru_l = NULL;
 
        for (p = packed_git; p; p = p->next) {
-               if (p->pack_use_cnt || !p->pack_base)
-                       continue;
-               if (!lru || p->pack_last_used < lru->pack_last_used)
-                       lru = p;
+               for (w_l = NULL, w = p->windows; w; w = w->next) {
+                       if (!w->inuse_cnt) {
+                               if (!lru_w || w->last_used < lru_w->last_used) {
+                                       lru_p = p;
+                                       lru_w = w;
+                                       lru_l = w_l;
+                               }
+                       }
+                       w_l = w;
+               }
        }
-       if (!lru)
-               return 0;
-       munmap(lru->pack_base, lru->pack_size);
-       lru->pack_base = NULL;
-       return 1;
+       if (lru_p) {
+               munmap(lru_w->base, lru_w->len);
+               pack_mapped -= lru_w->len;
+               if (lru_l)
+                       lru_l->next = lru_w->next;
+               else {
+                       lru_p->windows = lru_w->next;
+                       if (!lru_p->windows) {
+                               close(lru_p->pack_fd);
+                               lru_p->pack_fd = -1;
+                       }
+               }
+               free(lru_w);
+               return 1;
+       }
+       return 0;
 }
 
-void unuse_packed_git(struct packed_git *p)
+void unuse_pack(struct pack_window **w_cursor)
 {
-       p->pack_use_cnt--;
+       struct pack_window *w = *w_cursor;
+       if (w) {
+               w->inuse_cnt--;
+               *w_cursor = NULL;
+       }
 }
 
-int use_packed_git(struct packed_git *p)
+static void open_packed_git(struct packed_git *p)
 {
+       struct stat st;
+       struct pack_header hdr;
+       unsigned char sha1[20];
+       unsigned char *idx_sha1;
+
+       p->pack_fd = open(p->pack_name, O_RDONLY);
+       if (p->pack_fd < 0 || fstat(p->pack_fd, &st))
+               die("packfile %s cannot be opened", p->pack_name);
+
+       /* If we created the struct before we had the pack we lack size. */
        if (!p->pack_size) {
-               struct stat st;
-               /* We created the struct before we had the pack */
-               stat(p->pack_name, &st);
                if (!S_ISREG(st.st_mode))
                        die("packfile %s not a regular file", p->pack_name);
                p->pack_size = st.st_size;
-       }
-       if (!p->pack_base) {
-               int fd;
-               struct stat st;
-               void *map;
-               struct pack_header *hdr;
-
+       } else if (p->pack_size != st.st_size)
+               die("packfile %s size changed", p->pack_name);
+
+       /* Verify we recognize this pack file format. */
+       read_or_die(p->pack_fd, &hdr, sizeof(hdr));
+       if (hdr.hdr_signature != htonl(PACK_SIGNATURE))
+               die("file %s is not a GIT packfile", p->pack_name);
+       if (!pack_version_ok(hdr.hdr_version))
+               die("packfile %s is version %u and not supported"
+                       " (try upgrading GIT to a newer version)",
+                       p->pack_name, ntohl(hdr.hdr_version));
+
+       /* Verify the pack matches its index. */
+       if (num_packed_objects(p) != ntohl(hdr.hdr_entries))
+               die("packfile %s claims to have %u objects"
+                       " while index size indicates %u objects",
+                       p->pack_name, ntohl(hdr.hdr_entries),
+                       num_packed_objects(p));
+       if (lseek(p->pack_fd, p->pack_size - sizeof(sha1), SEEK_SET) == -1)
+               die("end of packfile %s is unavailable", p->pack_name);
+       read_or_die(p->pack_fd, sha1, sizeof(sha1));
+       idx_sha1 = ((unsigned char *)p->index_base) + p->index_size - 40;
+       if (hashcmp(sha1, idx_sha1))
+               die("packfile %s does not match index", p->pack_name);
+}
+
+unsigned char* use_pack(struct packed_git *p,
+               struct pack_window **w_cursor,
+               unsigned long offset,
+               unsigned int *left)
+{
+       struct pack_window *win = p->windows;
+
+       if (p->pack_fd == -1)
+               open_packed_git(p);
+       if (!win) {
                pack_mapped += p->pack_size;
-               while (PACK_MAX_SZ < pack_mapped && unuse_one_packed_git())
+               while (packed_git_limit < pack_mapped && unuse_one_window())
                        ; /* nothing */
-               fd = open(p->pack_name, O_RDONLY);
-               if (fd < 0)
-                       die("packfile %s cannot be opened", p->pack_name);
-               if (fstat(fd, &st)) {
-                       close(fd);
-                       die("packfile %s cannot be opened", p->pack_name);
-               }
-               if (st.st_size != p->pack_size)
-                       die("packfile %s size mismatch.", p->pack_name);
-               map = mmap(NULL, p->pack_size, PROT_READ, MAP_PRIVATE, fd, 0);
-               close(fd);
-               if (map == MAP_FAILED)
+               win = xcalloc(1, sizeof(*win));
+               win->len = p->pack_size;
+               win->base = mmap(NULL, p->pack_size, PROT_READ, MAP_PRIVATE, p->pack_fd, 0);
+               if (win->base == MAP_FAILED)
                        die("packfile %s cannot be mapped.", p->pack_name);
-               p->pack_base = map;
-
-               /* Check if we understand this pack file.  If we don't we're
-                * likely too old to handle it.
-                */
-               hdr = map;
-               if (hdr->hdr_signature != htonl(PACK_SIGNATURE))
-                       die("packfile %s isn't actually a pack.", p->pack_name);
-               if (!pack_version_ok(hdr->hdr_version))
-                       die("packfile %s is version %i and not supported"
-                               " (try upgrading GIT to a newer version)",
-                               p->pack_name, ntohl(hdr->hdr_version));
-
-               /* Check if the pack file matches with the index file.
-                * this is cheap.
-                */
-               if (hashcmp((unsigned char *)(p->index_base) +
-                           p->index_size - 40,
-                           (unsigned char *)p->pack_base +
-                           p->pack_size - 20)) {
-                       die("packfile %s does not match index.", p->pack_name);
-               }
+               p->windows = win;
        }
-       p->pack_last_used = pack_used_ctr++;
-       p->pack_use_cnt++;
-       return 0;
+       if (win != *w_cursor) {
+               win->last_used = pack_used_ctr++;
+               win->inuse_cnt++;
+               *w_cursor = win;
+       }
+       if (left)
+               *left = win->len - offset;
+       return win->base + offset;
 }
 
 struct packed_git *add_packed_git(char *path, int path_len, int local)
@@ -559,9 +592,8 @@ struct packed_git *add_packed_git(char *path, int path_len, int local)
        p->pack_size = st.st_size;
        p->index_base = idx_map;
        p->next = NULL;
-       p->pack_base = NULL;
-       p->pack_last_used = 0;
-       p->pack_use_cnt = 0;
+       p->windows = NULL;
+       p->pack_fd = -1;
        p->pack_local = local;
        if ((path_len > 44) && !get_sha1_hex(path + path_len - 44, sha1))
                hashcpy(p->sha1, sha1);
@@ -592,9 +624,8 @@ struct packed_git *parse_pack_index_file(const unsigned char *sha1, char *idx_pa
        p->pack_size = 0;
        p->index_base = idx_map;
        p->next = NULL;
-       p->pack_base = NULL;
-       p->pack_last_used = 0;
-       p->pack_use_cnt = 0;
+       p->windows = NULL;
+       p->pack_fd = -1;
        hashcpy(p->sha1, sha1);
        return p;
 }
@@ -878,18 +909,21 @@ void * unpack_sha1_file(void *map, unsigned long mapsize, char *type, unsigned l
 }
 
 static unsigned long get_delta_base(struct packed_git *p,
+                                   struct pack_window **w_curs,
                                    unsigned long offset,
                                    enum object_type kind,
                                    unsigned long delta_obj_offset,
                                    unsigned long *base_obj_offset)
 {
-       unsigned char *base_info = (unsigned char *) p->pack_base + offset;
+       unsigned char *base_info = use_pack(p, w_curs, offset, NULL);
        unsigned long base_offset;
 
-       /* there must be at least 20 bytes left regardless of delta type */
-       if (p->pack_size <= offset + 20)
-               die("truncated pack file");
-
+       /* use_pack() assured us we have [base_info, base_info + 20)
+        * as a range that we can look at without walking off the
+        * end of the mapped window.  Its actually the hash size
+        * that is assured.  An OFS_DELTA longer than the hash size
+        * is stupid, as then a REF_DELTA would be smaller to store.
+        */
        if (kind == OBJ_OFS_DELTA) {
                unsigned used = 0;
                unsigned char c = base_info[used++];
@@ -923,6 +957,7 @@ static int packed_object_info(struct packed_git *p, unsigned long offset,
                              char *type, unsigned long *sizep);
 
 static int packed_delta_info(struct packed_git *p,
+                            struct pack_window **w_curs,
                             unsigned long offset,
                             enum object_type kind,
                             unsigned long obj_offset,
@@ -931,7 +966,8 @@ static int packed_delta_info(struct packed_git *p,
 {
        unsigned long base_offset;
 
-       offset = get_delta_base(p, offset, kind, obj_offset, &base_offset);
+       offset = get_delta_base(p, w_curs, offset, kind,
+               obj_offset, &base_offset);
 
        /* We choose to only get the type of the base object and
         * ignore potentially corrupt pack file that expects the delta
@@ -943,20 +979,23 @@ static int packed_delta_info(struct packed_git *p,
 
        if (sizep) {
                const unsigned char *data;
-               unsigned char delta_head[20];
+               unsigned char delta_head[20], *in;
                unsigned long result_size;
                z_stream stream;
                int st;
 
                memset(&stream, 0, sizeof(stream));
-
-               stream.next_in = (unsigned char *) p->pack_base + offset;
-               stream.avail_in = p->pack_size - offset;
                stream.next_out = delta_head;
                stream.avail_out = sizeof(delta_head);
 
                inflateInit(&stream);
-               st = inflate(&stream, Z_FINISH);
+               do {
+                       in = use_pack(p, w_curs, offset, &stream.avail_in);
+                       stream.next_in = in;
+                       st = inflate(&stream, Z_FINISH);
+                       offset += stream.next_in - in;
+               } while ((st == Z_OK || st == Z_BUF_ERROR)
+                       && stream.total_out < sizeof(delta_head));
                inflateEnd(&stream);
                if ((st != Z_STREAM_END) &&
                    stream.total_out != sizeof(delta_head))
@@ -977,17 +1016,24 @@ static int packed_delta_info(struct packed_git *p,
        return 0;
 }
 
-static unsigned long unpack_object_header(struct packed_git *p, unsigned long offset,
-       enum object_type *type, unsigned long *sizep)
+static unsigned long unpack_object_header(struct packed_git *p,
+               struct pack_window **w_curs,
+               unsigned long offset,
+               enum object_type *type,
+               unsigned long *sizep)
 {
+       unsigned char *base;
+       unsigned int left;
        unsigned long used;
 
-       if (p->pack_size <= offset)
-               die("object offset outside of pack file");
-
-       used = unpack_object_header_gently((unsigned char *)p->pack_base +
-                                          offset,
-                                          p->pack_size - offset, type, sizep);
+       /* use_pack() assures us we have [base, base + 20) available
+        * as a range that we can look at at.  (Its actually the hash
+        * size that is assurred.)  With our object header encoding
+        * the maximum deflated object size is 2^137, which is just
+        * insane, so we know won't exceed what we have been given.
+        */
+       base = use_pack(p, w_curs, offset, &left);
+       used = unpack_object_header_gently(base, left, type, sizep);
        if (!used)
                die("object offset outside of pack file");
 
@@ -1002,13 +1048,14 @@ void packed_object_info_detail(struct packed_git *p,
                               unsigned int *delta_chain_length,
                               unsigned char *base_sha1)
 {
+       struct pack_window *w_curs = NULL;
        unsigned long obj_offset, val;
        unsigned char *next_sha1;
        enum object_type kind;
 
        *delta_chain_length = 0;
        obj_offset = offset;
-       offset = unpack_object_header(p, offset, &kind, size);
+       offset = unpack_object_header(p, &w_curs, offset, &kind, size);
 
        for (;;) {
                switch (kind) {
@@ -1021,25 +1068,24 @@ void packed_object_info_detail(struct packed_git *p,
                case OBJ_TAG:
                        strcpy(type, type_names[kind]);
                        *store_size = 0; /* notyet */
+                       unuse_pack(&w_curs);
                        return;
                case OBJ_OFS_DELTA:
-                       get_delta_base(p, offset, kind, obj_offset, &offset);
+                       get_delta_base(p, &w_curs, offset, kind,
+                               obj_offset, &offset);
                        if (*delta_chain_length == 0) {
                                /* TODO: find base_sha1 as pointed by offset */
                        }
                        break;
                case OBJ_REF_DELTA:
-                       if (p->pack_size <= offset + 20)
-                               die("pack file %s records an incomplete delta base",
-                                   p->pack_name);
-                       next_sha1 = (unsigned char *) p->pack_base + offset;
+                       next_sha1 = use_pack(p, &w_curs, offset, NULL);
                        if (*delta_chain_length == 0)
                                hashcpy(base_sha1, next_sha1);
                        offset = find_pack_entry_one(next_sha1, p);
                        break;
                }
                obj_offset = offset;
-               offset = unpack_object_header(p, offset, &kind, &val);
+               offset = unpack_object_header(p, &w_curs, offset, &kind, &val);
                (*delta_chain_length)++;
        }
 }
@@ -1047,20 +1093,26 @@ void packed_object_info_detail(struct packed_git *p,
 static int packed_object_info(struct packed_git *p, unsigned long offset,
                              char *type, unsigned long *sizep)
 {
+       struct pack_window *w_curs = NULL;
        unsigned long size, obj_offset = offset;
        enum object_type kind;
+       int r;
 
-       offset = unpack_object_header(p, offset, &kind, &size);
+       offset = unpack_object_header(p, &w_curs, offset, &kind, &size);
 
        switch (kind) {
        case OBJ_OFS_DELTA:
        case OBJ_REF_DELTA:
-               return packed_delta_info(p, offset, kind, obj_offset, type, sizep);
+               r = packed_delta_info(p, &w_curs, offset, kind,
+                       obj_offset, type, sizep);
+               unuse_pack(&w_curs);
+               return r;
        case OBJ_COMMIT:
        case OBJ_TREE:
        case OBJ_BLOB:
        case OBJ_TAG:
                strcpy(type, type_names[kind]);
+               unuse_pack(&w_curs);
                break;
        default:
                die("pack %s contains unknown object type %d",
@@ -1072,23 +1124,27 @@ static int packed_object_info(struct packed_git *p, unsigned long offset,
 }
 
 static void *unpack_compressed_entry(struct packed_git *p,
+                                   struct pack_window **w_curs,
                                    unsigned long offset,
                                    unsigned long size)
 {
        int st;
        z_stream stream;
-       unsigned char *buffer;
+       unsigned char *buffer, *in;
 
        buffer = xmalloc(size + 1);
        buffer[size] = 0;
        memset(&stream, 0, sizeof(stream));
-       stream.next_in = (unsigned char*)p->pack_base + offset;
-       stream.avail_in = p->pack_size - offset;
        stream.next_out = buffer;
        stream.avail_out = size;
 
        inflateInit(&stream);
-       st = inflate(&stream, Z_FINISH);
+       do {
+               in = use_pack(p, w_curs, offset, &stream.avail_in);
+               stream.next_in = in;
+               st = inflate(&stream, Z_FINISH);
+               offset += stream.next_in - in;
+       } while (st == Z_OK || st == Z_BUF_ERROR);
        inflateEnd(&stream);
        if ((st != Z_STREAM_END) || stream.total_out != size) {
                free(buffer);
@@ -1099,6 +1155,7 @@ static void *unpack_compressed_entry(struct packed_git *p,
 }
 
 static void *unpack_delta_entry(struct packed_git *p,
+                               struct pack_window **w_curs,
                                unsigned long offset,
                                unsigned long delta_size,
                                enum object_type kind,
@@ -1109,13 +1166,14 @@ static void *unpack_delta_entry(struct packed_git *p,
        void *delta_data, *result, *base;
        unsigned long result_size, base_size, base_offset;
 
-       offset = get_delta_base(p, offset, kind, obj_offset, &base_offset);
-       base = unpack_entry_gently(p, base_offset, type, &base_size);
+       offset = get_delta_base(p, w_curs, offset, kind,
+               obj_offset, &base_offset);
+       base = unpack_entry(p, base_offset, type, &base_size);
        if (!base)
                die("failed to read delta base object at %lu from %s",
                    base_offset, p->pack_name);
 
-       delta_data = unpack_compressed_entry(p, offset, delta_size);
+       delta_data = unpack_compressed_entry(p, w_curs, offset, delta_size);
        result = patch_delta(base, base_size,
                             delta_data, delta_size,
                             &result_size);
@@ -1127,43 +1185,34 @@ static void *unpack_delta_entry(struct packed_git *p,
        return result;
 }
 
-static void *unpack_entry(struct pack_entry *entry,
-                         char *type, unsigned long *sizep)
-{
-       struct packed_git *p = entry->p;
-       void *retval;
-
-       if (use_packed_git(p))
-               die("cannot map packed file");
-       retval = unpack_entry_gently(p, entry->offset, type, sizep);
-       unuse_packed_git(p);
-       if (!retval)
-               die("corrupted pack file %s", p->pack_name);
-       return retval;
-}
-
-/* The caller is responsible for use_packed_git()/unuse_packed_git() pair */
-void *unpack_entry_gently(struct packed_git *p, unsigned long offset,
+void *unpack_entry(struct packed_git *p, unsigned long offset,
                          char *type, unsigned long *sizep)
 {
+       struct pack_window *w_curs = NULL;
        unsigned long size, obj_offset = offset;
        enum object_type kind;
+       void *retval;
 
-       offset = unpack_object_header(p, offset, &kind, &size);
+       offset = unpack_object_header(p, &w_curs, offset, &kind, &size);
        switch (kind) {
        case OBJ_OFS_DELTA:
        case OBJ_REF_DELTA:
-               return unpack_delta_entry(p, offset, size, kind, obj_offset, type, sizep);
+               retval = unpack_delta_entry(p, &w_curs, offset, size,
+                       kind, obj_offset, type, sizep);
+               break;
        case OBJ_COMMIT:
        case OBJ_TREE:
        case OBJ_BLOB:
        case OBJ_TAG:
                strcpy(type, type_names[kind]);
                *sizep = size;
-               return unpack_compressed_entry(p, offset, size);
+               retval = unpack_compressed_entry(p, &w_curs, offset, size);
+               break;
        default:
-               return NULL;
+               die("unknown object type %i in %s", kind, p->pack_name);
        }
+       unuse_pack(&w_curs);
+       return retval;
 }
 
 int num_packed_objects(const struct packed_git *p)
@@ -1289,7 +1338,6 @@ static int sha1_loose_object_info(const unsigned char *sha1, char *type, unsigne
 
 int sha1_object_info(const unsigned char *sha1, char *type, unsigned long *sizep)
 {
-       int status;
        struct pack_entry e;
 
        if (!find_pack_entry(sha1, &e, NULL)) {
@@ -1297,11 +1345,7 @@ int sha1_object_info(const unsigned char *sha1, char *type, unsigned long *sizep
                if (!find_pack_entry(sha1, &e, NULL))
                        return sha1_loose_object_info(sha1, type, sizep);
        }
-       if (use_packed_git(e.p))
-               die("cannot map packed file");
-       status = packed_object_info(e.p, e.offset, type, sizep);
-       unuse_packed_git(e.p);
-       return status;
+       return packed_object_info(e.p, e.offset, type, sizep);
 }
 
 static void *read_packed_sha1(const unsigned char *sha1, char *type, unsigned long *size)
@@ -1312,7 +1356,7 @@ static void *read_packed_sha1(const unsigned char *sha1, char *type, unsigned lo
                error("cannot read sha1_file for %s", sha1_to_hex(sha1));
                return NULL;
        }
-       return unpack_entry(&e, type, size);
+       return unpack_entry(e.p, e.offset, type, size);
 }
 
 void * read_sha1_file(const unsigned char *sha1, char *type, unsigned long *size)