]> git.ipfire.org Git - thirdparty/git.git/commitdiff
Merge branch 'nd/large-blobs'
authorJunio C Hamano <gitster@pobox.com>
Thu, 11 Sep 2014 17:33:32 +0000 (10:33 -0700)
committerJunio C Hamano <gitster@pobox.com>
Thu, 11 Sep 2014 17:33:33 +0000 (10:33 -0700)
Teach a few codepaths to punt (instead of dying) when large blobs
that would not fit in core are involved in the operation.

* nd/large-blobs:
  diff: shortcut for diff'ing two binary SHA-1 objects
  diff --stat: mark any file larger than core.bigfilethreshold binary
  diff.c: allow to pass more flags to diff_populate_filespec
  sha1_file.c: do not die failing to malloc in unpack_compressed_entry
  wrapper.c: introduce gentle xmallocz that does not die()

Documentation/config.txt
Documentation/gitattributes.txt
diff.c
diffcore-rename.c
diffcore.h
git-compat-util.h
sha1_file.c
t/t1050-large.sh
wrapper.c

index c55c22ab7be94e798164a48622aca21dd4daa44f..3b5b24aeb7f16c03e06bd6f38fd89e9080011ce7 100644 (file)
@@ -499,7 +499,8 @@ core.bigFileThreshold::
        Files larger than this size are stored deflated, without
        attempting delta compression.  Storing large files without
        delta compression avoids excessive memory usage, at the
-       slight expense of increased disk usage.
+       slight expense of increased disk usage. Additionally files
+       larger than this size are always treated as binary.
 +
 Default is 512 MiB on all platforms.  This should be reasonable
 for most projects as source code and other text files can still
index 643c1ba9290ff10813eab57b95f7c26d8c43c7e2..9b45bda7485c7e4742b13cdf19272968592b640a 100644 (file)
@@ -440,8 +440,8 @@ Unspecified::
 
        A path to which the `diff` attribute is unspecified
        first gets its contents inspected, and if it looks like
-       text, it is treated as text.  Otherwise it would
-       generate `Binary files differ`.
+       text and is smaller than core.bigFileThreshold, it is treated
+       as text. Otherwise it would generate `Binary files differ`.
 
 String::
 
diff --git a/diff.c b/diff.c
index e7d4d4200f593ca43a1eba88fbc43bc5348a3afc..d7a5c81bb8545584ce5fe652dc42f2ee8bc1e2fd 100644 (file)
--- a/diff.c
+++ b/diff.c
@@ -376,7 +376,7 @@ static unsigned long diff_filespec_size(struct diff_filespec *one)
 {
        if (!DIFF_FILE_VALID(one))
                return 0;
-       diff_populate_filespec(one, 1);
+       diff_populate_filespec(one, CHECK_SIZE_ONLY);
        return one->size;
 }
 
@@ -1910,11 +1910,11 @@ static void show_dirstat(struct diff_options *options)
                        diff_free_filespec_data(p->one);
                        diff_free_filespec_data(p->two);
                } else if (DIFF_FILE_VALID(p->one)) {
-                       diff_populate_filespec(p->one, 1);
+                       diff_populate_filespec(p->one, CHECK_SIZE_ONLY);
                        copied = added = 0;
                        diff_free_filespec_data(p->one);
                } else if (DIFF_FILE_VALID(p->two)) {
-                       diff_populate_filespec(p->two, 1);
+                       diff_populate_filespec(p->two, CHECK_SIZE_ONLY);
                        copied = 0;
                        added = p->two->size;
                        diff_free_filespec_data(p->two);
@@ -2188,8 +2188,8 @@ int diff_filespec_is_binary(struct diff_filespec *one)
                        one->is_binary = one->driver->binary;
                else {
                        if (!one->data && DIFF_FILE_VALID(one))
-                               diff_populate_filespec(one, 0);
-                       if (one->data)
+                               diff_populate_filespec(one, CHECK_BINARY);
+                       if (one->is_binary == -1 && one->data)
                                one->is_binary = buffer_is_binary(one->data,
                                                one->size);
                        if (one->is_binary == -1)
@@ -2324,6 +2324,19 @@ static void builtin_diff(const char *name_a,
        } else if (!DIFF_OPT_TST(o, TEXT) &&
            ( (!textconv_one && diff_filespec_is_binary(one)) ||
              (!textconv_two && diff_filespec_is_binary(two)) )) {
+               if (!one->data && !two->data &&
+                   S_ISREG(one->mode) && S_ISREG(two->mode) &&
+                   !DIFF_OPT_TST(o, BINARY)) {
+                       if (!hashcmp(one->sha1, two->sha1)) {
+                               if (must_show_header)
+                                       fprintf(o->file, "%s", header.buf);
+                               goto free_ab_and_return;
+                       }
+                       fprintf(o->file, "%s", header.buf);
+                       fprintf(o->file, "%sBinary files %s and %s differ\n",
+                               line_prefix, lbl[0], lbl[1]);
+                       goto free_ab_and_return;
+               }
                if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0)
                        die("unable to read files to diff");
                /* Quite common confusing case */
@@ -2668,8 +2681,9 @@ static int diff_populate_gitlink(struct diff_filespec *s, int size_only)
  * grab the data for the blob (or file) for our own in-core comparison.
  * diff_filespec has data and size fields for this purpose.
  */
-int diff_populate_filespec(struct diff_filespec *s, int size_only)
+int diff_populate_filespec(struct diff_filespec *s, unsigned int flags)
 {
+       int size_only = flags & CHECK_SIZE_ONLY;
        int err = 0;
        /*
         * demote FAIL to WARN to allow inspecting the situation
@@ -2724,6 +2738,11 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
                }
                if (size_only)
                        return 0;
+               if ((flags & CHECK_BINARY) &&
+                   s->size > big_file_threshold && s->is_binary == -1) {
+                       s->is_binary = 1;
+                       return 0;
+               }
                fd = open(s->path, O_RDONLY);
                if (fd < 0)
                        goto err_empty;
@@ -2745,16 +2764,21 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
        }
        else {
                enum object_type type;
-               if (size_only) {
+               if (size_only || (flags & CHECK_BINARY)) {
                        type = sha1_object_info(s->sha1, &s->size);
                        if (type < 0)
                                die("unable to read %s", sha1_to_hex(s->sha1));
-               } else {
-                       s->data = read_sha1_file(s->sha1, &type, &s->size);
-                       if (!s->data)
-                               die("unable to read %s", sha1_to_hex(s->sha1));
-                       s->should_free = 1;
+                       if (size_only)
+                               return 0;
+                       if (s->size > big_file_threshold && s->is_binary == -1) {
+                               s->is_binary = 1;
+                               return 0;
+                       }
                }
+               s->data = read_sha1_file(s->sha1, &type, &s->size);
+               if (!s->data)
+                       die("unable to read %s", sha1_to_hex(s->sha1));
+               s->should_free = 1;
        }
        return 0;
 }
@@ -4688,8 +4712,8 @@ static int diff_filespec_check_stat_unmatch(struct diff_filepair *p)
            !DIFF_FILE_VALID(p->two) ||
            (p->one->sha1_valid && p->two->sha1_valid) ||
            (p->one->mode != p->two->mode) ||
-           diff_populate_filespec(p->one, 1) ||
-           diff_populate_filespec(p->two, 1) ||
+           diff_populate_filespec(p->one, CHECK_SIZE_ONLY) ||
+           diff_populate_filespec(p->two, CHECK_SIZE_ONLY) ||
            (p->one->size != p->two->size) ||
            !diff_filespec_is_identical(p->one, p->two)) /* (2) */
                p->skip_stat_unmatch_result = 1;
index 2e44a3745939bb75841730ba0cff78ea872df8d9..4e132f1fdb68ed3c930ca8224a5403c9b406cc3b 100644 (file)
@@ -147,9 +147,11 @@ static int estimate_similarity(struct diff_filespec *src,
         * is a possible size - we really should have a flag to
         * say whether the size is valid or not!)
         */
-       if (!src->cnt_data && diff_populate_filespec(src, 1))
+       if (!src->cnt_data &&
+           diff_populate_filespec(src, CHECK_SIZE_ONLY))
                return 0;
-       if (!dst->cnt_data && diff_populate_filespec(dst, 1))
+       if (!dst->cnt_data &&
+           diff_populate_filespec(dst, CHECK_SIZE_ONLY))
                return 0;
 
        max_size = ((src->size > dst->size) ? src->size : dst->size);
index c876dac71a585abb1e138444eb28258e3bf6d7e8..33ea2de348803b29a08a6713ae4cab1345f874d9 100644 (file)
@@ -55,7 +55,9 @@ extern void free_filespec(struct diff_filespec *);
 extern void fill_filespec(struct diff_filespec *, const unsigned char *,
                          int, unsigned short);
 
-extern int diff_populate_filespec(struct diff_filespec *, int);
+#define CHECK_SIZE_ONLY 1
+#define CHECK_BINARY    2
+extern int diff_populate_filespec(struct diff_filespec *, unsigned int);
 extern void diff_free_filespec_data(struct diff_filespec *);
 extern void diff_free_filespec_blob(struct diff_filespec *);
 extern int diff_filespec_is_binary(struct diff_filespec *);
index d675c89603eaf6135354c120f06289bd90a3f3d3..4e7e3f8726a26a1f7dfbb889fe3531d01d4419b9 100644 (file)
@@ -609,6 +609,7 @@ extern try_to_free_t set_try_to_free_routine(try_to_free_t);
 extern char *xstrdup(const char *str);
 extern void *xmalloc(size_t size);
 extern void *xmallocz(size_t size);
+extern void *xmallocz_gently(size_t size);
 extern void *xmemdupz(const void *data, size_t len);
 extern char *xstrndup(const char *str, size_t len);
 extern void *xrealloc(void *ptr, size_t size);
index 95afd209107277da3154226dd08bd8040a9097b5..c08c0cbea805b38104504b9b51266949affb6991 100644 (file)
@@ -1923,7 +1923,9 @@ static void *unpack_compressed_entry(struct packed_git *p,
        git_zstream stream;
        unsigned char *buffer, *in;
 
-       buffer = xmallocz(size);
+       buffer = xmallocz_gently(size);
+       if (!buffer)
+               return NULL;
        memset(&stream, 0, sizeof(stream));
        stream.next_out = buffer;
        stream.avail_out = size + 1;
index aea493646e4400e733749768e48f2a3d1a470299..05a1e1d270d2f6254658607541f1b114299533e5 100755 (executable)
@@ -112,6 +112,20 @@ test_expect_success 'diff --raw' '
        git diff --raw HEAD^
 '
 
+test_expect_success 'diff --stat' '
+       git diff --stat HEAD^ HEAD
+'
+
+test_expect_success 'diff' '
+       git diff HEAD^ HEAD >actual &&
+       grep "Binary files.*differ" actual
+'
+
+test_expect_success 'diff --cached' '
+       git diff --cached HEAD^ >actual &&
+       grep "Binary files.*differ" actual
+'
+
 test_expect_success 'hash-object' '
        git hash-object large1
 '
@@ -163,4 +177,10 @@ test_expect_success 'zip achiving, deflate' '
        git archive --format=zip HEAD >/dev/null
 '
 
+test_expect_success 'fsck' '
+       test_must_fail git fsck 2>err &&
+       n=$(grep "error: attempting to allocate .* over limit" err | wc -l) &&
+       test "$n" -gt 1
+'
+
 test_done
index bd24cdabfb818d4eddde4f08819f5a0166283a6a..25074d71b6ce72066efc02abda74ddfb10f71d6c 100644 (file)
--- a/wrapper.c
+++ b/wrapper.c
@@ -9,16 +9,23 @@ static void do_nothing(size_t size)
 
 static void (*try_to_free_routine)(size_t size) = do_nothing;
 
-static void memory_limit_check(size_t size)
+static int memory_limit_check(size_t size, int gentle)
 {
        static int limit = -1;
        if (limit == -1) {
                const char *env = getenv("GIT_ALLOC_LIMIT");
                limit = env ? atoi(env) * 1024 : 0;
        }
-       if (limit && size > limit)
-               die("attempting to allocate %"PRIuMAX" over limit %d",
-                   (intmax_t)size, limit);
+       if (limit && size > limit) {
+               if (gentle) {
+                       error("attempting to allocate %"PRIuMAX" over limit %d",
+                             (intmax_t)size, limit);
+                       return -1;
+               } else
+                       die("attempting to allocate %"PRIuMAX" over limit %d",
+                           (intmax_t)size, limit);
+       }
+       return 0;
 }
 
 try_to_free_t set_try_to_free_routine(try_to_free_t routine)
@@ -42,11 +49,12 @@ char *xstrdup(const char *str)
        return ret;
 }
 
-void *xmalloc(size_t size)
+static void *do_xmalloc(size_t size, int gentle)
 {
        void *ret;
 
-       memory_limit_check(size);
+       if (memory_limit_check(size, gentle))
+               return NULL;
        ret = malloc(size);
        if (!ret && !size)
                ret = malloc(1);
@@ -55,9 +63,16 @@ void *xmalloc(size_t size)
                ret = malloc(size);
                if (!ret && !size)
                        ret = malloc(1);
-               if (!ret)
-                       die("Out of memory, malloc failed (tried to allocate %lu bytes)",
-                           (unsigned long)size);
+               if (!ret) {
+                       if (!gentle)
+                               die("Out of memory, malloc failed (tried to allocate %lu bytes)",
+                                   (unsigned long)size);
+                       else {
+                               error("Out of memory, malloc failed (tried to allocate %lu bytes)",
+                                     (unsigned long)size);
+                               return NULL;
+                       }
+               }
        }
 #ifdef XMALLOC_POISON
        memset(ret, 0xA5, size);
@@ -65,16 +80,37 @@ void *xmalloc(size_t size)
        return ret;
 }
 
-void *xmallocz(size_t size)
+void *xmalloc(size_t size)
+{
+       return do_xmalloc(size, 0);
+}
+
+static void *do_xmallocz(size_t size, int gentle)
 {
        void *ret;
-       if (unsigned_add_overflows(size, 1))
-               die("Data too large to fit into virtual memory space.");
-       ret = xmalloc(size + 1);
-       ((char*)ret)[size] = 0;
+       if (unsigned_add_overflows(size, 1)) {
+               if (gentle) {
+                       error("Data too large to fit into virtual memory space.");
+                       return NULL;
+               } else
+                       die("Data too large to fit into virtual memory space.");
+       }
+       ret = do_xmalloc(size + 1, gentle);
+       if (ret)
+               ((char*)ret)[size] = 0;
        return ret;
 }
 
+void *xmallocz(size_t size)
+{
+       return do_xmallocz(size, 0);
+}
+
+void *xmallocz_gently(size_t size)
+{
+       return do_xmallocz(size, 1);
+}
+
 /*
  * xmemdupz() allocates (len + 1) bytes of memory, duplicates "len" bytes of
  * "data" to the allocated memory, zero terminates the allocated memory,
@@ -96,7 +132,7 @@ void *xrealloc(void *ptr, size_t size)
 {
        void *ret;
 
-       memory_limit_check(size);
+       memory_limit_check(size, 0);
        ret = realloc(ptr, size);
        if (!ret && !size)
                ret = realloc(ptr, 1);
@@ -115,7 +151,7 @@ void *xcalloc(size_t nmemb, size_t size)
 {
        void *ret;
 
-       memory_limit_check(size * nmemb);
+       memory_limit_check(size * nmemb, 0);
        ret = calloc(nmemb, size);
        if (!ret && (!nmemb || !size))
                ret = calloc(1, 1);