]> git.ipfire.org Git - thirdparty/git.git/commitdiff
object-file.c: refactor write_loose_object() to several steps
authorHan Xin <hanxin.hx@alibaba-inc.com>
Sat, 11 Jun 2022 02:44:17 +0000 (10:44 +0800)
committerJunio C Hamano <gitster@pobox.com>
Mon, 13 Jun 2022 17:22:35 +0000 (10:22 -0700)
When writing a large blob using "write_loose_object()", we have to pass
a buffer with the whole content of the blob, and this behavior will
consume lots of memory and may cause OOM. We will introduce a stream
version function ("stream_loose_object()") in later commit to resolve
this issue.

Before introducing that streaming function, do some refactoring on
"write_loose_object()" to reuse code for both versions.

Rewrite "write_loose_object()" as follows:

 1. Figure out a path for the (temp) object file. This step is only
    used in "write_loose_object()".

 2. Move common steps for starting to write loose objects into a new
    function "start_loose_object_common()".

 3. Compress data.

 4. Move common steps for ending zlib stream into a new function
    "end_loose_object_common()".

 5. Close fd and finalize the object file.

Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Han Xin <chiyutianyi@gmail.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
object-file.c

index 79eb8339b60710b106748792d3faab59b374b62a..b5bce0327413a99cb334700f3b0bb10878636d23 100644 (file)
@@ -1951,6 +1951,74 @@ static int create_tmpfile(struct strbuf *tmp, const char *filename)
        return fd;
 }
 
+/**
+ * Common steps for loose object writers to start writing loose
+ * objects:
+ *
+ * - Create tmpfile for the loose object.
+ * - Setup zlib stream for compression.
+ * - Start to feed header to zlib stream.
+ *
+ * Returns a "fd", which should later be provided to
+ * end_loose_object_common().
+ */
+static int start_loose_object_common(struct strbuf *tmp_file,
+                                    const char *filename, unsigned flags,
+                                    git_zstream *stream,
+                                    unsigned char *buf, size_t buflen,
+                                    git_hash_ctx *c,
+                                    char *hdr, int hdrlen)
+{
+       int fd;
+
+       fd = create_tmpfile(tmp_file, filename);
+       if (fd < 0) {
+               if (flags & HASH_SILENT)
+                       return -1;
+               else if (errno == EACCES)
+                       return error(_("insufficient permission for adding "
+                                      "an object to repository database %s"),
+                                    get_object_directory());
+               else
+                       return error_errno(
+                               _("unable to create temporary file"));
+       }
+
+       /*  Setup zlib stream for compression */
+       git_deflate_init(stream, zlib_compression_level);
+       stream->next_out = buf;
+       stream->avail_out = buflen;
+       the_hash_algo->init_fn(c);
+
+       /*  Start to feed header to zlib stream */
+       stream->next_in = (unsigned char *)hdr;
+       stream->avail_in = hdrlen;
+       while (git_deflate(stream, 0) == Z_OK)
+               ; /* nothing */
+       the_hash_algo->update_fn(c, hdr, hdrlen);
+
+       return fd;
+}
+
+/**
+ * Common steps for loose object writers to end writing loose objects:
+ *
+ * - End the compression of zlib stream.
+ * - Get the calculated oid to "oid".
+ */
+static int end_loose_object_common(git_hash_ctx *c, git_zstream *stream,
+                                  struct object_id *oid)
+{
+       int ret;
+
+       ret = git_deflate_end_gently(stream);
+       if (ret != Z_OK)
+               return ret;
+       the_hash_algo->final_oid_fn(oid, c);
+
+       return Z_OK;
+}
+
 static int write_loose_object(const struct object_id *oid, char *hdr,
                              int hdrlen, const void *buf, unsigned long len,
                              time_t mtime, unsigned flags)
@@ -1968,28 +2036,11 @@ static int write_loose_object(const struct object_id *oid, char *hdr,
 
        loose_object_path(the_repository, &filename, oid);
 
-       fd = create_tmpfile(&tmp_file, filename.buf);
-       if (fd < 0) {
-               if (flags & HASH_SILENT)
-                       return -1;
-               else if (errno == EACCES)
-                       return error(_("insufficient permission for adding an object to repository database %s"), get_object_directory());
-               else
-                       return error_errno(_("unable to create temporary file"));
-       }
-
-       /* Set it up */
-       git_deflate_init(&stream, zlib_compression_level);
-       stream.next_out = compressed;
-       stream.avail_out = sizeof(compressed);
-       the_hash_algo->init_fn(&c);
-
-       /* First header.. */
-       stream.next_in = (unsigned char *)hdr;
-       stream.avail_in = hdrlen;
-       while (git_deflate(&stream, 0) == Z_OK)
-               ; /* nothing */
-       the_hash_algo->update_fn(&c, hdr, hdrlen);
+       fd = start_loose_object_common(&tmp_file, filename.buf, flags,
+                                      &stream, compressed, sizeof(compressed),
+                                      &c, hdr, hdrlen);
+       if (fd < 0)
+               return -1;
 
        /* Then the data itself.. */
        stream.next_in = (void *)buf;
@@ -2007,11 +2058,10 @@ static int write_loose_object(const struct object_id *oid, char *hdr,
        if (ret != Z_STREAM_END)
                die(_("unable to deflate new object %s (%d)"), oid_to_hex(oid),
                    ret);
-       ret = git_deflate_end_gently(&stream);
+       ret = end_loose_object_common(&c, &stream, &parano_oid);
        if (ret != Z_OK)
                die(_("deflateEnd on object %s failed (%d)"), oid_to_hex(oid),
                    ret);
-       the_hash_algo->final_oid_fn(&parano_oid, &c);
        if (!oideq(oid, &parano_oid))
                die(_("confused by unstable object source data for %s"),
                    oid_to_hex(oid));