]> git.ipfire.org Git - thirdparty/git.git/commitdiff
Merge branch 'jk/maint-avoid-streaming-filtered-contents'
authorJunio C Hamano <gitster@pobox.com>
Mon, 27 Feb 2012 07:05:38 +0000 (23:05 -0800)
committerJunio C Hamano <gitster@pobox.com>
Mon, 27 Feb 2012 07:05:38 +0000 (23:05 -0800)
* jk/maint-avoid-streaming-filtered-contents:
  do not stream large files to pack when filters are in use
  teach dry-run convert_to_git not to require a src buffer
  teach convert_to_git a "dry run" mode

1  2 
convert.c
sha1_file.c

diff --combined convert.c
index 33373b3ac0ebeaeb39167b006a9e11494b5e837a,aa7f72d8a0515bd1b6b4ec018a5604fd5c1549ab..4534e2c2b003998a590637fc43c0549ff23f71b2
+++ b/convert.c
@@@ -2,7 -2,6 +2,7 @@@
  #include "attr.h"
  #include "run-command.h"
  #include "quote.h"
 +#include "sigchain.h"
  
  /*
   * convert.c - convert a file when checking it out and checking it in.
@@@ -196,9 -195,17 +196,17 @@@ static int crlf_to_git(const char *path
        char *dst;
  
        if (crlf_action == CRLF_BINARY ||
-           (crlf_action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE) || !len)
+           (crlf_action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE) ||
+           (src && !len))
                return 0;
  
+       /*
+        * If we are doing a dry-run and have no source buffer, there is
+        * nothing to analyze; we must assume we would convert.
+        */
+       if (!buf && !src)
+               return 1;
        gather_stats(src, len, &stats);
  
        if (crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS) {
        if (!stats.cr)
                return 0;
  
+       /*
+        * At this point all of our source analysis is done, and we are sure we
+        * would convert. If we are in dry-run mode, we can give an answer.
+        */
+       if (!buf)
+               return 1;
        /* only grow if not in place */
        if (strbuf_avail(buf) + buf->len < len)
                strbuf_grow(buf, len - buf->len);
@@@ -361,16 -375,12 +376,16 @@@ static int filter_buffer(int in, int ou
        if (start_command(&child_process))
                return error("cannot fork to run external filter %s", params->cmd);
  
 +      sigchain_push(SIGPIPE, SIG_IGN);
 +
        write_err = (write_in_full(child_process.in, params->src, params->size) < 0);
        if (close(child_process.in))
                write_err = 1;
        if (write_err)
                error("cannot feed the input to external filter %s", params->cmd);
  
 +      sigchain_pop(SIGPIPE);
 +
        status = finish_command(&child_process);
        if (status)
                error("external filter %s failed %d", params->cmd, status);
@@@ -396,6 -406,9 +411,9 @@@ static int apply_filter(const char *pat
        if (!cmd)
                return 0;
  
+       if (!dst)
+               return 1;
        memset(&async, 0, sizeof(async));
        async.proc = filter_buffer;
        async.data = &params;
@@@ -527,9 -540,12 +545,12 @@@ static int ident_to_git(const char *pat
  {
        char *dst, *dollar;
  
-       if (!ident || !count_ident(src, len))
+       if (!ident || (src && !count_ident(src, len)))
                return 0;
  
+       if (!buf)
+               return 1;
        /* only grow if not in place */
        if (strbuf_avail(buf) + buf->len < len)
                strbuf_grow(buf, len - buf->len);
@@@ -759,13 -775,13 +780,13 @@@ int convert_to_git(const char *path, co
                filter = ca.drv->clean;
  
        ret |= apply_filter(path, src, len, dst, filter);
-       if (ret) {
+       if (ret && dst) {
                src = dst->buf;
                len = dst->len;
        }
        ca.crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr);
        ret |= crlf_to_git(path, src, len, dst, ca.crlf_action, checksafe);
-       if (ret) {
+       if (ret && dst) {
                src = dst->buf;
                len = dst->len;
        }
diff --combined sha1_file.c
index f9f8d5e91c278000e5869f49ba6a79ddfaf13145,c9ae7ec2ae1495df209268358a73ce18f97eee96..4f06a0e450359744528d3b125fb09eacebf1eb4a
@@@ -18,7 -18,6 +18,7 @@@
  #include "refs.h"
  #include "pack-revindex.h"
  #include "sha1-lookup.h"
 +#include "bulk-checkin.h"
  
  #ifndef O_NOATIME
  #if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
@@@ -54,8 -53,6 +54,8 @@@ static struct cached_object empty_tree 
        0
  };
  
 +static struct packed_git *last_found_pack;
 +
  static struct cached_object *find_cached_object(const unsigned char *sha1)
  {
        int i;
@@@ -722,8 -719,6 +722,8 @@@ void free_pack_by_name(const char *pack
                        close_pack_index(p);
                        free(p->bad_object_sha1);
                        *pp = p->next;
 +                      if (last_found_pack == p)
 +                              last_found_pack = NULL;
                        free(p);
                        return;
                }
@@@ -1206,11 -1201,6 +1206,11 @@@ void *map_sha1_file(const unsigned cha
  
                if (!fstat(fd, &st)) {
                        *size = xsize_t(st.st_size);
 +                      if (!*size) {
 +                              /* mmap() is forbidden on empty files */
 +                              error("object file %s is empty", sha1_file_name(sha1));
 +                              return NULL;
 +                      }
                        map = xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd, 0);
                }
                close(fd);
@@@ -2019,58 -2009,54 +2019,58 @@@ int is_pack_valid(struct packed_git *p
        return !open_packed_git(p);
  }
  
 +static int fill_pack_entry(const unsigned char *sha1,
 +                         struct pack_entry *e,
 +                         struct packed_git *p)
 +{
 +      off_t offset;
 +
 +      if (p->num_bad_objects) {
 +              unsigned i;
 +              for (i = 0; i < p->num_bad_objects; i++)
 +                      if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
 +                              return 0;
 +      }
 +
 +      offset = find_pack_entry_one(sha1, p);
 +      if (!offset)
 +              return 0;
 +
 +      /*
 +       * We are about to tell the caller where they can locate the
 +       * requested object.  We better make sure the packfile is
 +       * still here and can be accessed before supplying that
 +       * answer, as it may have been deleted since the index was
 +       * loaded!
 +       */
 +      if (!is_pack_valid(p)) {
 +              warning("packfile %s cannot be accessed", p->pack_name);
 +              return 0;
 +      }
 +      e->offset = offset;
 +      e->p = p;
 +      hashcpy(e->sha1, sha1);
 +      return 1;
 +}
 +
  static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
  {
 -      static struct packed_git *last_found = (void *)1;
        struct packed_git *p;
 -      off_t offset;
  
        prepare_packed_git();
        if (!packed_git)
                return 0;
 -      p = (last_found == (void *)1) ? packed_git : last_found;
  
 -      do {
 -              if (p->num_bad_objects) {
 -                      unsigned i;
 -                      for (i = 0; i < p->num_bad_objects; i++)
 -                              if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
 -                                      goto next;
 -              }
 +      if (last_found_pack && fill_pack_entry(sha1, e, last_found_pack))
 +              return 1;
  
 -              offset = find_pack_entry_one(sha1, p);
 -              if (offset) {
 -                      /*
 -                       * We are about to tell the caller where they can
 -                       * locate the requested object.  We better make
 -                       * sure the packfile is still here and can be
 -                       * accessed before supplying that answer, as
 -                       * it may have been deleted since the index
 -                       * was loaded!
 -                       */
 -                      if (!is_pack_valid(p)) {
 -                              warning("packfile %s cannot be accessed", p->pack_name);
 -                              goto next;
 -                      }
 -                      e->offset = offset;
 -                      e->p = p;
 -                      hashcpy(e->sha1, sha1);
 -                      last_found = p;
 -                      return 1;
 -              }
 +      for (p = packed_git; p; p = p->next) {
 +              if (p == last_found_pack || !fill_pack_entry(sha1, e, p))
 +                      continue;
  
 -              next:
 -              if (p == last_found)
 -                      p = packed_git;
 -              else
 -                      p = p->next;
 -              if (p == last_found)
 -                      p = p->next;
 -      } while (p);
 +              last_found_pack = p;
 +              return 1;
 +      }
        return 0;
  }
  
@@@ -2465,15 -2451,15 +2465,15 @@@ static int write_loose_object(const uns
        git_SHA_CTX c;
        unsigned char parano_sha1[20];
        char *filename;
 -      static char tmpfile[PATH_MAX];
 +      static char tmp_file[PATH_MAX];
  
        filename = sha1_file_name(sha1);
 -      fd = create_tmpfile(tmpfile, sizeof(tmpfile), filename);
 +      fd = create_tmpfile(tmp_file, sizeof(tmp_file), filename);
        if (fd < 0) {
                if (errno == EACCES)
                        return error("insufficient permission for adding an object to repository database %s\n", get_object_directory());
                else
 -                      return error("unable to create temporary sha1 filename %s: %s\n", tmpfile, strerror(errno));
 +                      return error("unable to create temporary sha1 filename %s: %s\n", tmp_file, strerror(errno));
        }
  
        /* Set it up */
                struct utimbuf utb;
                utb.actime = mtime;
                utb.modtime = mtime;
 -              if (utime(tmpfile, &utb) < 0)
 +              if (utime(tmp_file, &utb) < 0)
                        warning("failed utime() on %s: %s",
 -                              tmpfile, strerror(errno));
 +                              tmp_file, strerror(errno));
        }
  
 -      return move_temp_to_file(tmpfile, filename);
 +      return move_temp_to_file(tmp_file, filename);
  }
  
  int write_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *returnsha1)
@@@ -2694,22 -2680,85 +2694,25 @@@ static int index_core(unsigned char *sh
  }
  
  /*
 - * This creates one packfile per large blob, because the caller
 - * immediately wants the result sha1, and fast-import can report the
 - * object name via marks mechanism only by closing the created
 - * packfile.
 + * This creates one packfile per large blob unless bulk-checkin
 + * machinery is "plugged".
   *
   * This also bypasses the usual "convert-to-git" dance, and that is on
   * purpose. We could write a streaming version of the converting
   * functions and insert that before feeding the data to fast-import
-  * (or equivalent in-core API described above), but the primary
-  * motivation for trying to stream from the working tree file and to
-  * avoid mmaping it in core is to deal with large binary blobs, and
-  * by definition they do _not_ want to get any conversion.
+  * (or equivalent in-core API described above). However, that is
+  * somewhat complicated, as we do not know the size of the filter
+  * result, which we need to know beforehand when writing a git object.
+  * Since the primary motivation for trying to stream from the working
+  * tree file and to avoid mmaping it in core is to deal with large
+  * binary blobs, they generally do not want to get any conversion, and
+  * callers should avoid this code path when filters are requested.
   */
  static int index_stream(unsigned char *sha1, int fd, size_t size,
                        enum object_type type, const char *path,
                        unsigned flags)
  {
 -      struct child_process fast_import;
 -      char export_marks[512];
 -      const char *argv[] = { "fast-import", "--quiet", export_marks, NULL };
 -      char tmpfile[512];
 -      char fast_import_cmd[512];
 -      char buf[512];
 -      int len, tmpfd;
 -
 -      strcpy(tmpfile, git_path("hashstream_XXXXXX"));
 -      tmpfd = git_mkstemp_mode(tmpfile, 0600);
 -      if (tmpfd < 0)
 -              die_errno("cannot create tempfile: %s", tmpfile);
 -      if (close(tmpfd))
 -              die_errno("cannot close tempfile: %s", tmpfile);
 -      sprintf(export_marks, "--export-marks=%s", tmpfile);
 -
 -      memset(&fast_import, 0, sizeof(fast_import));
 -      fast_import.in = -1;
 -      fast_import.argv = argv;
 -      fast_import.git_cmd = 1;
 -      if (start_command(&fast_import))
 -              die_errno("index-stream: git fast-import failed");
 -
 -      len = sprintf(fast_import_cmd, "blob\nmark :1\ndata %lu\n",
 -                    (unsigned long) size);
 -      write_or_whine(fast_import.in, fast_import_cmd, len,
 -                     "index-stream: feeding fast-import");
 -      while (size) {
 -              char buf[10240];
 -              size_t sz = size < sizeof(buf) ? size : sizeof(buf);
 -              ssize_t actual;
 -
 -              actual = read_in_full(fd, buf, sz);
 -              if (actual < 0)
 -                      die_errno("index-stream: reading input");
 -              if (write_in_full(fast_import.in, buf, actual) != actual)
 -                      die_errno("index-stream: feeding fast-import");
 -              size -= actual;
 -      }
 -      if (close(fast_import.in))
 -              die_errno("index-stream: closing fast-import");
 -      if (finish_command(&fast_import))
 -              die_errno("index-stream: finishing fast-import");
 -
 -      tmpfd = open(tmpfile, O_RDONLY);
 -      if (tmpfd < 0)
 -              die_errno("index-stream: cannot open fast-import mark");
 -      len = read(tmpfd, buf, sizeof(buf));
 -      if (len < 0)
 -              die_errno("index-stream: reading fast-import mark");
 -      if (close(tmpfd) < 0)
 -              die_errno("index-stream: closing fast-import mark");
 -      if (unlink(tmpfile))
 -              die_errno("index-stream: unlinking fast-import mark");
 -      if (len != 44 ||
 -          memcmp(":1 ", buf, 3) ||
 -          get_sha1_hex(buf + 3, sha1))
 -              die_errno("index-stream: unexpected fast-import mark: <%s>", buf);
 -      return 0;
 +      return index_bulk_checkin(sha1, fd, size, type, path, flags);
  }
  
  int index_fd(unsigned char *sha1, int fd, struct stat *st,
  
        if (!S_ISREG(st->st_mode))
                ret = index_pipe(sha1, fd, type, path, flags);
-       else if (size <= big_file_threshold || type != OBJ_BLOB)
+       else if (size <= big_file_threshold || type != OBJ_BLOB ||
+                (path && would_convert_to_git(path, NULL, 0, 0)))
                ret = index_core(sha1, fd, size, type, path, flags);
        else
                ret = index_stream(sha1, fd, size, type, path, flags);