]> git.ipfire.org Git - thirdparty/git.git/commitdiff
http-fetch: support fetching packfiles by URL
authorJonathan Tan <jonathantanmy@google.com>
Wed, 10 Jun 2020 20:57:18 +0000 (13:57 -0700)
committerJunio C Hamano <gitster@pobox.com>
Thu, 11 Jun 2020 01:06:34 +0000 (18:06 -0700)
Teach http-fetch the ability to download packfiles directly, given a
URL, and to verify them.

The http_pack_request suite has been augmented with a function that
takes a URL directly. With this function, the hash is only used to
determine the name of the temporary file.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Documentation/git-http-fetch.txt
http-fetch.c
http.c
http.h
t/t5550-http-fetch-dumb.sh

index 666b042679f405fd1759b42a8d86aafb083e817c..4deb4893f517c3a0bc1e01228f0a28818c05daea 100644 (file)
@@ -9,7 +9,7 @@ git-http-fetch - Download from a remote Git repository via HTTP
 SYNOPSIS
 --------
 [verse]
-'git http-fetch' [-c] [-t] [-a] [-d] [-v] [-w filename] [--recover] [--stdin] <commit> <url>
+'git http-fetch' [-c] [-t] [-a] [-d] [-v] [-w filename] [--recover] [--stdin | --packfile=<hash> | <commit>] <url>
 
 DESCRIPTION
 -----------
@@ -40,6 +40,13 @@ commit-id::
 
                <commit-id>['\t'<filename-as-in--w>]
 
+--packfile=<hash>::
+       Instead of a commit id on the command line (which is not expected in
+       this case), 'git http-fetch' fetches the packfile directly at the given
+       URL and uses index-pack to generate corresponding .idx and .keep files.
+       The hash is used to determine the name of the temporary file and is
+       arbitrary. The output of index-pack is printed to stdout.
+
 --recover::
        Verify that everything reachable from target is fetched.  Used after
        an earlier fetch is interrupted.
index e538174bde42e5a17e0bf6cb03b2dec9e8dfd843..1df376e745ce69390bf8dda422882906a5f747ec 100644 (file)
@@ -5,7 +5,7 @@
 #include "walker.h"
 
 static const char http_fetch_usage[] = "git http-fetch "
-"[-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin] commit-id url";
+"[-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin | --packfile=hash | commit-id] url";
 
 static int fetch_using_walker(const char *raw_url, int get_verbosely,
                              int get_recover, int commits, char **commit_id,
@@ -43,6 +43,37 @@ static int fetch_using_walker(const char *raw_url, int get_verbosely,
        return rc;
 }
 
+static void fetch_single_packfile(struct object_id *packfile_hash,
+                                 const char *url) {
+       struct http_pack_request *preq;
+       struct slot_results results;
+       int ret;
+
+       http_init(NULL, url, 0);
+
+       preq = new_direct_http_pack_request(packfile_hash->hash, xstrdup(url));
+       if (preq == NULL)
+               die("couldn't create http pack request");
+       preq->slot->results = &results;
+       preq->generate_keep = 1;
+
+       if (start_active_slot(preq->slot)) {
+               run_active_slot(preq->slot);
+               if (results.curl_result != CURLE_OK) {
+                       die("Unable to get pack file %s\n%s", preq->url,
+                           curl_errorstr);
+               }
+       } else {
+               die("Unable to start request");
+       }
+
+       if ((ret = finish_http_pack_request(preq)))
+               die("finish_http_pack_request gave result %d", ret);
+
+       release_http_pack_request(preq);
+       http_cleanup();
+}
+
 int cmd_main(int argc, const char **argv)
 {
        int commits_on_stdin = 0;
@@ -52,8 +83,12 @@ int cmd_main(int argc, const char **argv)
        int arg = 1;
        int get_verbosely = 0;
        int get_recover = 0;
+       int packfile = 0;
+       struct object_id packfile_hash;
 
        while (arg < argc && argv[arg][0] == '-') {
+               const char *p;
+
                if (argv[arg][1] == 't') {
                } else if (argv[arg][1] == 'c') {
                } else if (argv[arg][1] == 'a') {
@@ -68,25 +103,33 @@ int cmd_main(int argc, const char **argv)
                        get_recover = 1;
                } else if (!strcmp(argv[arg], "--stdin")) {
                        commits_on_stdin = 1;
+               } else if (skip_prefix(argv[arg], "--packfile=", &p)) {
+                       const char *end;
+
+                       packfile = 1;
+                       if (parse_oid_hex(p, &packfile_hash, &end) || *end)
+                               die(_("argument to --packfile must be a valid hash (got '%s')"), p);
                }
                arg++;
        }
-       if (argc != arg + 2 - commits_on_stdin)
+       if (argc != arg + 2 - (commits_on_stdin || packfile))
                usage(http_fetch_usage);
-       if (commits_on_stdin) {
-               commits = walker_targets_stdin(&commit_id, &write_ref);
-       } else {
-               commit_id = (char **) &argv[arg++];
-               commits = 1;
-       }
 
        setup_git_directory();
 
        git_config(git_default_config, NULL);
 
-       if (!argv[arg])
-               BUG("must have one arg remaining");
+       if (packfile) {
+               fetch_single_packfile(&packfile_hash, argv[arg]);
+               return 0;
+       }
 
+       if (commits_on_stdin) {
+               commits = walker_targets_stdin(&commit_id, &write_ref);
+       } else {
+               commit_id = (char **) &argv[arg++];
+               commits = 1;
+       }
        return fetch_using_walker(argv[arg], get_verbosely, get_recover,
                                  commits, commit_id, write_ref,
                                  commits_on_stdin);
diff --git a/http.c b/http.c
index 4f6e1fb0188a7401a63e2bf01755dd3fd566f991..3aa0fa9fe6fad2c4bf805824be030e38f9ff1d9a 100644 (file)
--- a/http.c
+++ b/http.c
@@ -2281,7 +2281,13 @@ int finish_http_pack_request(struct http_pack_request *preq)
        argv_array_push(&ip.args, "--stdin");
        ip.git_cmd = 1;
        ip.in = tmpfile_fd;
-       ip.no_stdout = 1;
+       if (preq->generate_keep) {
+               argv_array_pushf(&ip.args, "--keep=git %"PRIuMAX,
+                                (uintmax_t)getpid());
+               ip.out = 0;
+       } else {
+               ip.no_stdout = 1;
+       }
 
        if (run_command(&ip)) {
                ret = -1;
@@ -2307,19 +2313,27 @@ void http_install_packfile(struct packed_git *p,
 }
 
 struct http_pack_request *new_http_pack_request(
-       const unsigned char *packed_git_hash, const char *base_url)
+       const unsigned char *packed_git_hash, const char *base_url) {
+
+       struct strbuf buf = STRBUF_INIT;
+
+       end_url_with_slash(&buf, base_url);
+       strbuf_addf(&buf, "objects/pack/pack-%s.pack",
+               hash_to_hex(packed_git_hash));
+       return new_direct_http_pack_request(packed_git_hash,
+                                           strbuf_detach(&buf, NULL));
+}
+
+struct http_pack_request *new_direct_http_pack_request(
+       const unsigned char *packed_git_hash, char *url)
 {
        off_t prev_posn = 0;
-       struct strbuf buf = STRBUF_INIT;
        struct http_pack_request *preq;
 
        preq = xcalloc(1, sizeof(*preq));
        strbuf_init(&preq->tmpfile, 0);
 
-       end_url_with_slash(&buf, base_url);
-       strbuf_addf(&buf, "objects/pack/pack-%s.pack",
-               hash_to_hex(packed_git_hash));
-       preq->url = strbuf_detach(&buf, NULL);
+       preq->url = url;
 
        strbuf_addf(&preq->tmpfile, "%s.temp", sha1_pack_name(packed_git_hash));
        preq->packfile = fopen(preq->tmpfile.buf, "a");
diff --git a/http.h b/http.h
index bbc6b070f148840d42d90e5692941aafd187916b..dc49c6016547aaa490e563dd4233a0be9a174da3 100644 (file)
--- a/http.h
+++ b/http.h
@@ -216,6 +216,15 @@ int http_get_info_packs(const char *base_url,
 
 struct http_pack_request {
        char *url;
+
+       /*
+        * If this is true, finish_http_pack_request() will pass "--keep" to
+        * index-pack, resulting in the creation of a keep file, and will not
+        * suppress its stdout (that is, the "keep\t<hash>\n" line will be
+        * printed to stdout).
+        */
+       unsigned generate_keep : 1;
+
        FILE *packfile;
        struct strbuf tmpfile;
        struct active_request_slot *slot;
@@ -223,6 +232,8 @@ struct http_pack_request {
 
 struct http_pack_request *new_http_pack_request(
        const unsigned char *packed_git_hash, const char *base_url);
+struct http_pack_request *new_direct_http_pack_request(
+       const unsigned char *packed_git_hash, char *url);
 int finish_http_pack_request(struct http_pack_request *preq);
 void release_http_pack_request(struct http_pack_request *preq);
 
index 50485300eb19ea757dac4ee36ec395329d33e1f0..ca2e8af022f0b4749fd4fcd30de93864c780f04a 100755 (executable)
@@ -199,6 +199,28 @@ test_expect_success 'fetch packed objects' '
        git clone $HTTPD_URL/dumb/repo_pack.git
 '
 
+test_expect_success 'http-fetch --packfile' '
+       # Arbitrary hash. Use rev-parse so that we get one of the correct
+       # length.
+       ARBITRARY=$(git -C "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git rev-parse HEAD) &&
+
+       git init packfileclient &&
+       p=$(cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git && ls objects/pack/pack-*.pack) &&
+       git -C packfileclient http-fetch --packfile=$ARBITRARY "$HTTPD_URL"/dumb/repo_pack.git/$p >out &&
+
+       grep "^keep.[0-9a-f]\{16,\}$" out &&
+       cut -c6- out >packhash &&
+
+       # Ensure that the expected files are generated
+       test -e "packfileclient/.git/objects/pack/pack-$(cat packhash).pack" &&
+       test -e "packfileclient/.git/objects/pack/pack-$(cat packhash).idx" &&
+       test -e "packfileclient/.git/objects/pack/pack-$(cat packhash).keep" &&
+
+       # Ensure that it has the HEAD of repo_pack, at least
+       HASH=$(git -C "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git rev-parse HEAD) &&
+       git -C packfileclient cat-file -e "$HASH"
+'
+
 test_expect_success 'fetch notices corrupt pack' '
        cp -R "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad1.git &&
        (cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad1.git &&
@@ -214,6 +236,14 @@ test_expect_success 'fetch notices corrupt pack' '
        )
 '
 
+test_expect_success 'http-fetch --packfile with corrupt pack' '
+       rm -rf packfileclient &&
+       git init packfileclient &&
+       p=$(cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad1.git && ls objects/pack/pack-*.pack) &&
+       test_must_fail git -C packfileclient http-fetch --packfile \
+               "$HTTPD_URL"/dumb/repo_bad1.git/$p
+'
+
 test_expect_success 'fetch notices corrupt idx' '
        cp -R "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad2.git &&
        (cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad2.git &&