]> git.ipfire.org Git - thirdparty/git.git/blob - http-fetch.c
Merge branch 'sp/reflog' into next
[thirdparty/git.git] / http-fetch.c
1 #include "cache.h"
2 #include "commit.h"
3 #include "pack.h"
4 #include "fetch.h"
5 #include "http.h"
6
7 #ifndef NO_EXPAT
8 #include <expat.h>
9
10 /* Definitions for DAV requests */
11 #define DAV_PROPFIND "PROPFIND"
12 #define DAV_PROPFIND_RESP ".multistatus.response"
13 #define DAV_PROPFIND_NAME ".multistatus.response.href"
14 #define DAV_PROPFIND_COLLECTION ".multistatus.response.propstat.prop.resourcetype.collection"
15 #define PROPFIND_ALL_REQUEST "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n<D:propfind xmlns:D=\"DAV:\">\n<D:allprop/>\n</D:propfind>"
16
17 /* Definitions for processing XML DAV responses */
18 #ifndef XML_STATUS_OK
19 enum XML_Status {
20 XML_STATUS_OK = 1,
21 XML_STATUS_ERROR = 0
22 };
23 #define XML_STATUS_OK 1
24 #define XML_STATUS_ERROR 0
25 #endif
26
27 /* Flags that control remote_ls processing */
28 #define PROCESS_FILES (1u << 0)
29 #define PROCESS_DIRS (1u << 1)
30 #define RECURSIVE (1u << 2)
31
32 /* Flags that remote_ls passes to callback functions */
33 #define IS_DIR (1u << 0)
34 #endif
35
36 #define PREV_BUF_SIZE 4096
37 #define RANGE_HEADER_SIZE 30
38
39 static int got_alternates = -1;
40 static int corrupt_object_found = 0;
41
42 static struct curl_slist *no_pragma_header;
43
44 struct alt_base
45 {
46 char *base;
47 int path_len;
48 int got_indices;
49 struct packed_git *packs;
50 struct alt_base *next;
51 };
52
53 static struct alt_base *alt = NULL;
54
55 enum object_request_state {
56 WAITING,
57 ABORTED,
58 ACTIVE,
59 COMPLETE,
60 };
61
62 struct object_request
63 {
64 unsigned char sha1[20];
65 struct alt_base *repo;
66 char *url;
67 char filename[PATH_MAX];
68 char tmpfile[PATH_MAX];
69 int local;
70 enum object_request_state state;
71 CURLcode curl_result;
72 char errorstr[CURL_ERROR_SIZE];
73 long http_code;
74 unsigned char real_sha1[20];
75 SHA_CTX c;
76 z_stream stream;
77 int zret;
78 int rename;
79 struct active_request_slot *slot;
80 struct object_request *next;
81 };
82
83 struct alternates_request {
84 char *base;
85 char *url;
86 struct buffer *buffer;
87 struct active_request_slot *slot;
88 int http_specific;
89 };
90
91 #ifndef NO_EXPAT
92 struct xml_ctx
93 {
94 char *name;
95 int len;
96 char *cdata;
97 void (*userFunc)(struct xml_ctx *ctx, int tag_closed);
98 void *userData;
99 };
100
101 struct remote_ls_ctx
102 {
103 struct alt_base *repo;
104 char *path;
105 void (*userFunc)(struct remote_ls_ctx *ls);
106 void *userData;
107 int flags;
108 char *dentry_name;
109 int dentry_flags;
110 int rc;
111 struct remote_ls_ctx *parent;
112 };
113 #endif
114
115 static struct object_request *object_queue_head = NULL;
116
117 static size_t fwrite_sha1_file(void *ptr, size_t eltsize, size_t nmemb,
118 void *data)
119 {
120 unsigned char expn[4096];
121 size_t size = eltsize * nmemb;
122 int posn = 0;
123 struct object_request *obj_req = (struct object_request *)data;
124 do {
125 ssize_t retval = write(obj_req->local,
126 ptr + posn, size - posn);
127 if (retval < 0)
128 return posn;
129 posn += retval;
130 } while (posn < size);
131
132 obj_req->stream.avail_in = size;
133 obj_req->stream.next_in = ptr;
134 do {
135 obj_req->stream.next_out = expn;
136 obj_req->stream.avail_out = sizeof(expn);
137 obj_req->zret = inflate(&obj_req->stream, Z_SYNC_FLUSH);
138 SHA1_Update(&obj_req->c, expn,
139 sizeof(expn) - obj_req->stream.avail_out);
140 } while (obj_req->stream.avail_in && obj_req->zret == Z_OK);
141 data_received++;
142 return size;
143 }
144
145 static void fetch_alternates(char *base);
146
147 static void process_object_response(void *callback_data);
148
149 static void start_object_request(struct object_request *obj_req)
150 {
151 char *hex = sha1_to_hex(obj_req->sha1);
152 char prevfile[PATH_MAX];
153 char *url;
154 char *posn;
155 int prevlocal;
156 unsigned char prev_buf[PREV_BUF_SIZE];
157 ssize_t prev_read = 0;
158 long prev_posn = 0;
159 char range[RANGE_HEADER_SIZE];
160 struct curl_slist *range_header = NULL;
161 struct active_request_slot *slot;
162
163 snprintf(prevfile, sizeof(prevfile), "%s.prev", obj_req->filename);
164 unlink(prevfile);
165 rename(obj_req->tmpfile, prevfile);
166 unlink(obj_req->tmpfile);
167
168 if (obj_req->local != -1)
169 error("fd leakage in start: %d", obj_req->local);
170 obj_req->local = open(obj_req->tmpfile,
171 O_WRONLY | O_CREAT | O_EXCL, 0666);
172 /* This could have failed due to the "lazy directory creation";
173 * try to mkdir the last path component.
174 */
175 if (obj_req->local < 0 && errno == ENOENT) {
176 char *dir = strrchr(obj_req->tmpfile, '/');
177 if (dir) {
178 *dir = 0;
179 mkdir(obj_req->tmpfile, 0777);
180 *dir = '/';
181 }
182 obj_req->local = open(obj_req->tmpfile,
183 O_WRONLY | O_CREAT | O_EXCL, 0666);
184 }
185
186 if (obj_req->local < 0) {
187 obj_req->state = ABORTED;
188 error("Couldn't create temporary file %s for %s: %s",
189 obj_req->tmpfile, obj_req->filename, strerror(errno));
190 return;
191 }
192
193 memset(&obj_req->stream, 0, sizeof(obj_req->stream));
194
195 inflateInit(&obj_req->stream);
196
197 SHA1_Init(&obj_req->c);
198
199 url = xmalloc(strlen(obj_req->repo->base) + 50);
200 obj_req->url = xmalloc(strlen(obj_req->repo->base) + 50);
201 strcpy(url, obj_req->repo->base);
202 posn = url + strlen(obj_req->repo->base);
203 strcpy(posn, "objects/");
204 posn += 8;
205 memcpy(posn, hex, 2);
206 posn += 2;
207 *(posn++) = '/';
208 strcpy(posn, hex + 2);
209 strcpy(obj_req->url, url);
210
211 /* If a previous temp file is present, process what was already
212 fetched. */
213 prevlocal = open(prevfile, O_RDONLY);
214 if (prevlocal != -1) {
215 do {
216 prev_read = read(prevlocal, prev_buf, PREV_BUF_SIZE);
217 if (prev_read>0) {
218 if (fwrite_sha1_file(prev_buf,
219 1,
220 prev_read,
221 obj_req) == prev_read) {
222 prev_posn += prev_read;
223 } else {
224 prev_read = -1;
225 }
226 }
227 } while (prev_read > 0);
228 close(prevlocal);
229 }
230 unlink(prevfile);
231
232 /* Reset inflate/SHA1 if there was an error reading the previous temp
233 file; also rewind to the beginning of the local file. */
234 if (prev_read == -1) {
235 memset(&obj_req->stream, 0, sizeof(obj_req->stream));
236 inflateInit(&obj_req->stream);
237 SHA1_Init(&obj_req->c);
238 if (prev_posn>0) {
239 prev_posn = 0;
240 lseek(obj_req->local, SEEK_SET, 0);
241 ftruncate(obj_req->local, 0);
242 }
243 }
244
245 slot = get_active_slot();
246 slot->callback_func = process_object_response;
247 slot->callback_data = obj_req;
248 obj_req->slot = slot;
249
250 curl_easy_setopt(slot->curl, CURLOPT_FILE, obj_req);
251 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
252 curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, obj_req->errorstr);
253 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
254 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
255
256 /* If we have successfully processed data from a previous fetch
257 attempt, only fetch the data we don't already have. */
258 if (prev_posn>0) {
259 if (get_verbosely)
260 fprintf(stderr,
261 "Resuming fetch of object %s at byte %ld\n",
262 hex, prev_posn);
263 sprintf(range, "Range: bytes=%ld-", prev_posn);
264 range_header = curl_slist_append(range_header, range);
265 curl_easy_setopt(slot->curl,
266 CURLOPT_HTTPHEADER, range_header);
267 }
268
269 /* Try to get the request started, abort the request on error */
270 obj_req->state = ACTIVE;
271 if (!start_active_slot(slot)) {
272 obj_req->state = ABORTED;
273 obj_req->slot = NULL;
274 close(obj_req->local); obj_req->local = -1;
275 free(obj_req->url);
276 return;
277 }
278 }
279
280 static void finish_object_request(struct object_request *obj_req)
281 {
282 struct stat st;
283
284 fchmod(obj_req->local, 0444);
285 close(obj_req->local); obj_req->local = -1;
286
287 if (obj_req->http_code == 416) {
288 fprintf(stderr, "Warning: requested range invalid; we may already have all the data.\n");
289 } else if (obj_req->curl_result != CURLE_OK) {
290 if (stat(obj_req->tmpfile, &st) == 0)
291 if (st.st_size == 0)
292 unlink(obj_req->tmpfile);
293 return;
294 }
295
296 inflateEnd(&obj_req->stream);
297 SHA1_Final(obj_req->real_sha1, &obj_req->c);
298 if (obj_req->zret != Z_STREAM_END) {
299 unlink(obj_req->tmpfile);
300 return;
301 }
302 if (memcmp(obj_req->sha1, obj_req->real_sha1, 20)) {
303 unlink(obj_req->tmpfile);
304 return;
305 }
306 obj_req->rename =
307 move_temp_to_file(obj_req->tmpfile, obj_req->filename);
308
309 if (obj_req->rename == 0)
310 pull_say("got %s\n", sha1_to_hex(obj_req->sha1));
311 }
312
313 static void process_object_response(void *callback_data)
314 {
315 struct object_request *obj_req =
316 (struct object_request *)callback_data;
317
318 obj_req->curl_result = obj_req->slot->curl_result;
319 obj_req->http_code = obj_req->slot->http_code;
320 obj_req->slot = NULL;
321 obj_req->state = COMPLETE;
322
323 /* Use alternates if necessary */
324 if (obj_req->http_code == 404 ||
325 obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE) {
326 fetch_alternates(alt->base);
327 if (obj_req->repo->next != NULL) {
328 obj_req->repo =
329 obj_req->repo->next;
330 close(obj_req->local);
331 obj_req->local = -1;
332 start_object_request(obj_req);
333 return;
334 }
335 }
336
337 finish_object_request(obj_req);
338 }
339
340 static void release_object_request(struct object_request *obj_req)
341 {
342 struct object_request *entry = object_queue_head;
343
344 if (obj_req->local != -1)
345 error("fd leakage in release: %d", obj_req->local);
346 if (obj_req == object_queue_head) {
347 object_queue_head = obj_req->next;
348 } else {
349 while (entry->next != NULL && entry->next != obj_req)
350 entry = entry->next;
351 if (entry->next == obj_req)
352 entry->next = entry->next->next;
353 }
354
355 free(obj_req->url);
356 free(obj_req);
357 }
358
359 #ifdef USE_CURL_MULTI
360 void fill_active_slots(void)
361 {
362 struct object_request *obj_req = object_queue_head;
363 struct active_request_slot *slot = active_queue_head;
364 int num_transfers;
365
366 while (active_requests < max_requests && obj_req != NULL) {
367 if (obj_req->state == WAITING) {
368 if (has_sha1_file(obj_req->sha1))
369 obj_req->state = COMPLETE;
370 else
371 start_object_request(obj_req);
372 curl_multi_perform(curlm, &num_transfers);
373 }
374 obj_req = obj_req->next;
375 }
376
377 while (slot != NULL) {
378 if (!slot->in_use && slot->curl != NULL) {
379 curl_easy_cleanup(slot->curl);
380 slot->curl = NULL;
381 }
382 slot = slot->next;
383 }
384 }
385 #endif
386
387 void prefetch(unsigned char *sha1)
388 {
389 struct object_request *newreq;
390 struct object_request *tail;
391 char *filename = sha1_file_name(sha1);
392
393 newreq = xmalloc(sizeof(*newreq));
394 memcpy(newreq->sha1, sha1, 20);
395 newreq->repo = alt;
396 newreq->url = NULL;
397 newreq->local = -1;
398 newreq->state = WAITING;
399 snprintf(newreq->filename, sizeof(newreq->filename), "%s", filename);
400 snprintf(newreq->tmpfile, sizeof(newreq->tmpfile),
401 "%s.temp", filename);
402 newreq->next = NULL;
403
404 if (object_queue_head == NULL) {
405 object_queue_head = newreq;
406 } else {
407 tail = object_queue_head;
408 while (tail->next != NULL) {
409 tail = tail->next;
410 }
411 tail->next = newreq;
412 }
413
414 #ifdef USE_CURL_MULTI
415 fill_active_slots();
416 step_active_slots();
417 #endif
418 }
419
420 static int fetch_index(struct alt_base *repo, unsigned char *sha1)
421 {
422 char *hex = sha1_to_hex(sha1);
423 char *filename;
424 char *url;
425 char tmpfile[PATH_MAX];
426 long prev_posn = 0;
427 char range[RANGE_HEADER_SIZE];
428 struct curl_slist *range_header = NULL;
429
430 FILE *indexfile;
431 struct active_request_slot *slot;
432 struct slot_results results;
433
434 if (has_pack_index(sha1))
435 return 0;
436
437 if (get_verbosely)
438 fprintf(stderr, "Getting index for pack %s\n", hex);
439
440 url = xmalloc(strlen(repo->base) + 64);
441 sprintf(url, "%s/objects/pack/pack-%s.idx", repo->base, hex);
442
443 filename = sha1_pack_index_name(sha1);
444 snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
445 indexfile = fopen(tmpfile, "a");
446 if (!indexfile)
447 return error("Unable to open local file %s for pack index",
448 filename);
449
450 slot = get_active_slot();
451 slot->results = &results;
452 curl_easy_setopt(slot->curl, CURLOPT_FILE, indexfile);
453 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
454 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
455 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
456 slot->local = indexfile;
457
458 /* If there is data present from a previous transfer attempt,
459 resume where it left off */
460 prev_posn = ftell(indexfile);
461 if (prev_posn>0) {
462 if (get_verbosely)
463 fprintf(stderr,
464 "Resuming fetch of index for pack %s at byte %ld\n",
465 hex, prev_posn);
466 sprintf(range, "Range: bytes=%ld-", prev_posn);
467 range_header = curl_slist_append(range_header, range);
468 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
469 }
470
471 if (start_active_slot(slot)) {
472 run_active_slot(slot);
473 if (results.curl_result != CURLE_OK) {
474 fclose(indexfile);
475 return error("Unable to get pack index %s\n%s", url,
476 curl_errorstr);
477 }
478 } else {
479 fclose(indexfile);
480 return error("Unable to start request");
481 }
482
483 fclose(indexfile);
484
485 return move_temp_to_file(tmpfile, filename);
486 }
487
488 static int setup_index(struct alt_base *repo, unsigned char *sha1)
489 {
490 struct packed_git *new_pack;
491 if (has_pack_file(sha1))
492 return 0; // don't list this as something we can get
493
494 if (fetch_index(repo, sha1))
495 return -1;
496
497 new_pack = parse_pack_index(sha1);
498 new_pack->next = repo->packs;
499 repo->packs = new_pack;
500 return 0;
501 }
502
503 static void process_alternates_response(void *callback_data)
504 {
505 struct alternates_request *alt_req =
506 (struct alternates_request *)callback_data;
507 struct active_request_slot *slot = alt_req->slot;
508 struct alt_base *tail = alt;
509 char *base = alt_req->base;
510 static const char null_byte = '\0';
511 char *data;
512 int i = 0;
513
514 if (alt_req->http_specific) {
515 if (slot->curl_result != CURLE_OK ||
516 !alt_req->buffer->posn) {
517
518 /* Try reusing the slot to get non-http alternates */
519 alt_req->http_specific = 0;
520 sprintf(alt_req->url, "%s/objects/info/alternates",
521 base);
522 curl_easy_setopt(slot->curl, CURLOPT_URL,
523 alt_req->url);
524 active_requests++;
525 slot->in_use = 1;
526 if (slot->finished != NULL)
527 (*slot->finished) = 0;
528 if (!start_active_slot(slot)) {
529 got_alternates = -1;
530 slot->in_use = 0;
531 if (slot->finished != NULL)
532 (*slot->finished) = 1;
533 }
534 return;
535 }
536 } else if (slot->curl_result != CURLE_OK) {
537 if (slot->http_code != 404 &&
538 slot->curl_result != CURLE_FILE_COULDNT_READ_FILE) {
539 got_alternates = -1;
540 return;
541 }
542 }
543
544 fwrite_buffer(&null_byte, 1, 1, alt_req->buffer);
545 alt_req->buffer->posn--;
546 data = alt_req->buffer->buffer;
547
548 while (i < alt_req->buffer->posn) {
549 int posn = i;
550 while (posn < alt_req->buffer->posn && data[posn] != '\n')
551 posn++;
552 if (data[posn] == '\n') {
553 int okay = 0;
554 int serverlen = 0;
555 struct alt_base *newalt;
556 char *target = NULL;
557 char *path;
558 if (data[i] == '/') {
559 serverlen = strchr(base + 8, '/') - base;
560 okay = 1;
561 } else if (!memcmp(data + i, "../", 3)) {
562 i += 3;
563 serverlen = strlen(base);
564 while (i + 2 < posn &&
565 !memcmp(data + i, "../", 3)) {
566 do {
567 serverlen--;
568 } while (serverlen &&
569 base[serverlen - 1] != '/');
570 i += 3;
571 }
572 // If the server got removed, give up.
573 okay = strchr(base, ':') - base + 3 <
574 serverlen;
575 } else if (alt_req->http_specific) {
576 char *colon = strchr(data + i, ':');
577 char *slash = strchr(data + i, '/');
578 if (colon && slash && colon < data + posn &&
579 slash < data + posn && colon < slash) {
580 okay = 1;
581 }
582 }
583 // skip 'objects' at end
584 if (okay) {
585 target = xmalloc(serverlen + posn - i - 6);
586 strncpy(target, base, serverlen);
587 strncpy(target + serverlen, data + i,
588 posn - i - 7);
589 target[serverlen + posn - i - 7] = '\0';
590 if (get_verbosely)
591 fprintf(stderr,
592 "Also look at %s\n", target);
593 newalt = xmalloc(sizeof(*newalt));
594 newalt->next = NULL;
595 newalt->base = target;
596 newalt->got_indices = 0;
597 newalt->packs = NULL;
598 path = strstr(target, "//");
599 if (path) {
600 path = strchr(path+2, '/');
601 if (path)
602 newalt->path_len = strlen(path);
603 }
604
605 while (tail->next != NULL)
606 tail = tail->next;
607 tail->next = newalt;
608 }
609 }
610 i = posn + 1;
611 }
612
613 got_alternates = 1;
614 }
615
616 static void fetch_alternates(char *base)
617 {
618 struct buffer buffer;
619 char *url;
620 char *data;
621 struct active_request_slot *slot;
622 struct alternates_request alt_req;
623
624 /* If another request has already started fetching alternates,
625 wait for them to arrive and return to processing this request's
626 curl message */
627 #ifdef USE_CURL_MULTI
628 while (got_alternates == 0) {
629 step_active_slots();
630 }
631 #endif
632
633 /* Nothing to do if they've already been fetched */
634 if (got_alternates == 1)
635 return;
636
637 /* Start the fetch */
638 got_alternates = 0;
639
640 data = xmalloc(4096);
641 buffer.size = 4096;
642 buffer.posn = 0;
643 buffer.buffer = data;
644
645 if (get_verbosely)
646 fprintf(stderr, "Getting alternates list for %s\n", base);
647
648 url = xmalloc(strlen(base) + 31);
649 sprintf(url, "%s/objects/info/http-alternates", base);
650
651 /* Use a callback to process the result, since another request
652 may fail and need to have alternates loaded before continuing */
653 slot = get_active_slot();
654 slot->callback_func = process_alternates_response;
655 slot->callback_data = &alt_req;
656
657 curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
658 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
659 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
660
661 alt_req.base = base;
662 alt_req.url = url;
663 alt_req.buffer = &buffer;
664 alt_req.http_specific = 1;
665 alt_req.slot = slot;
666
667 if (start_active_slot(slot))
668 run_active_slot(slot);
669 else
670 got_alternates = -1;
671
672 free(data);
673 free(url);
674 }
675
676 #ifndef NO_EXPAT
677 static void
678 xml_start_tag(void *userData, const char *name, const char **atts)
679 {
680 struct xml_ctx *ctx = (struct xml_ctx *)userData;
681 const char *c = strchr(name, ':');
682 int new_len;
683
684 if (c == NULL)
685 c = name;
686 else
687 c++;
688
689 new_len = strlen(ctx->name) + strlen(c) + 2;
690
691 if (new_len > ctx->len) {
692 ctx->name = xrealloc(ctx->name, new_len);
693 ctx->len = new_len;
694 }
695 strcat(ctx->name, ".");
696 strcat(ctx->name, c);
697
698 if (ctx->cdata) {
699 free(ctx->cdata);
700 ctx->cdata = NULL;
701 }
702
703 ctx->userFunc(ctx, 0);
704 }
705
706 static void
707 xml_end_tag(void *userData, const char *name)
708 {
709 struct xml_ctx *ctx = (struct xml_ctx *)userData;
710 const char *c = strchr(name, ':');
711 char *ep;
712
713 ctx->userFunc(ctx, 1);
714
715 if (c == NULL)
716 c = name;
717 else
718 c++;
719
720 ep = ctx->name + strlen(ctx->name) - strlen(c) - 1;
721 *ep = 0;
722 }
723
724 static void
725 xml_cdata(void *userData, const XML_Char *s, int len)
726 {
727 struct xml_ctx *ctx = (struct xml_ctx *)userData;
728 if (ctx->cdata)
729 free(ctx->cdata);
730 ctx->cdata = xcalloc(len+1, 1);
731 strncpy(ctx->cdata, s, len);
732 }
733
734 static int remote_ls(struct alt_base *repo, const char *path, int flags,
735 void (*userFunc)(struct remote_ls_ctx *ls),
736 void *userData);
737
738 static void handle_remote_ls_ctx(struct xml_ctx *ctx, int tag_closed)
739 {
740 struct remote_ls_ctx *ls = (struct remote_ls_ctx *)ctx->userData;
741
742 if (tag_closed) {
743 if (!strcmp(ctx->name, DAV_PROPFIND_RESP) && ls->dentry_name) {
744 if (ls->dentry_flags & IS_DIR) {
745 if (ls->flags & PROCESS_DIRS) {
746 ls->userFunc(ls);
747 }
748 if (strcmp(ls->dentry_name, ls->path) &&
749 ls->flags & RECURSIVE) {
750 ls->rc = remote_ls(ls->repo,
751 ls->dentry_name,
752 ls->flags,
753 ls->userFunc,
754 ls->userData);
755 }
756 } else if (ls->flags & PROCESS_FILES) {
757 ls->userFunc(ls);
758 }
759 } else if (!strcmp(ctx->name, DAV_PROPFIND_NAME) && ctx->cdata) {
760 ls->dentry_name = xmalloc(strlen(ctx->cdata) -
761 ls->repo->path_len + 1);
762 strcpy(ls->dentry_name, ctx->cdata + ls->repo->path_len);
763 } else if (!strcmp(ctx->name, DAV_PROPFIND_COLLECTION)) {
764 ls->dentry_flags |= IS_DIR;
765 }
766 } else if (!strcmp(ctx->name, DAV_PROPFIND_RESP)) {
767 if (ls->dentry_name) {
768 free(ls->dentry_name);
769 }
770 ls->dentry_name = NULL;
771 ls->dentry_flags = 0;
772 }
773 }
774
775 static int remote_ls(struct alt_base *repo, const char *path, int flags,
776 void (*userFunc)(struct remote_ls_ctx *ls),
777 void *userData)
778 {
779 char *url = xmalloc(strlen(repo->base) + strlen(path) + 1);
780 struct active_request_slot *slot;
781 struct slot_results results;
782 struct buffer in_buffer;
783 struct buffer out_buffer;
784 char *in_data;
785 char *out_data;
786 XML_Parser parser = XML_ParserCreate(NULL);
787 enum XML_Status result;
788 struct curl_slist *dav_headers = NULL;
789 struct xml_ctx ctx;
790 struct remote_ls_ctx ls;
791
792 ls.flags = flags;
793 ls.repo = repo;
794 ls.path = strdup(path);
795 ls.dentry_name = NULL;
796 ls.dentry_flags = 0;
797 ls.userData = userData;
798 ls.userFunc = userFunc;
799 ls.rc = 0;
800
801 sprintf(url, "%s%s", repo->base, path);
802
803 out_buffer.size = strlen(PROPFIND_ALL_REQUEST);
804 out_data = xmalloc(out_buffer.size + 1);
805 snprintf(out_data, out_buffer.size + 1, PROPFIND_ALL_REQUEST);
806 out_buffer.posn = 0;
807 out_buffer.buffer = out_data;
808
809 in_buffer.size = 4096;
810 in_data = xmalloc(in_buffer.size);
811 in_buffer.posn = 0;
812 in_buffer.buffer = in_data;
813
814 dav_headers = curl_slist_append(dav_headers, "Depth: 1");
815 dav_headers = curl_slist_append(dav_headers, "Content-Type: text/xml");
816
817 slot = get_active_slot();
818 slot->results = &results;
819 curl_easy_setopt(slot->curl, CURLOPT_INFILE, &out_buffer);
820 curl_easy_setopt(slot->curl, CURLOPT_INFILESIZE, out_buffer.size);
821 curl_easy_setopt(slot->curl, CURLOPT_READFUNCTION, fread_buffer);
822 curl_easy_setopt(slot->curl, CURLOPT_FILE, &in_buffer);
823 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
824 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
825 curl_easy_setopt(slot->curl, CURLOPT_UPLOAD, 1);
826 curl_easy_setopt(slot->curl, CURLOPT_CUSTOMREQUEST, DAV_PROPFIND);
827 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, dav_headers);
828
829 if (start_active_slot(slot)) {
830 run_active_slot(slot);
831 if (results.curl_result == CURLE_OK) {
832 ctx.name = xcalloc(10, 1);
833 ctx.len = 0;
834 ctx.cdata = NULL;
835 ctx.userFunc = handle_remote_ls_ctx;
836 ctx.userData = &ls;
837 XML_SetUserData(parser, &ctx);
838 XML_SetElementHandler(parser, xml_start_tag,
839 xml_end_tag);
840 XML_SetCharacterDataHandler(parser, xml_cdata);
841 result = XML_Parse(parser, in_buffer.buffer,
842 in_buffer.posn, 1);
843 free(ctx.name);
844
845 if (result != XML_STATUS_OK) {
846 ls.rc = error("XML error: %s",
847 XML_ErrorString(
848 XML_GetErrorCode(parser)));
849 }
850 } else {
851 ls.rc = -1;
852 }
853 } else {
854 ls.rc = error("Unable to start PROPFIND request");
855 }
856
857 free(ls.path);
858 free(url);
859 free(out_data);
860 free(in_buffer.buffer);
861 curl_slist_free_all(dav_headers);
862
863 return ls.rc;
864 }
865
866 static void process_ls_pack(struct remote_ls_ctx *ls)
867 {
868 unsigned char sha1[20];
869
870 if (strlen(ls->dentry_name) == 63 &&
871 !strncmp(ls->dentry_name, "objects/pack/pack-", 18) &&
872 !strncmp(ls->dentry_name+58, ".pack", 5)) {
873 get_sha1_hex(ls->dentry_name + 18, sha1);
874 setup_index(ls->repo, sha1);
875 }
876 }
877 #endif
878
879 static int fetch_indices(struct alt_base *repo)
880 {
881 unsigned char sha1[20];
882 char *url;
883 struct buffer buffer;
884 char *data;
885 int i = 0;
886
887 struct active_request_slot *slot;
888 struct slot_results results;
889
890 if (repo->got_indices)
891 return 0;
892
893 data = xmalloc(4096);
894 buffer.size = 4096;
895 buffer.posn = 0;
896 buffer.buffer = data;
897
898 if (get_verbosely)
899 fprintf(stderr, "Getting pack list for %s\n", repo->base);
900
901 #ifndef NO_EXPAT
902 if (remote_ls(repo, "objects/pack/", PROCESS_FILES,
903 process_ls_pack, NULL) == 0)
904 return 0;
905 #endif
906
907 url = xmalloc(strlen(repo->base) + 21);
908 sprintf(url, "%s/objects/info/packs", repo->base);
909
910 slot = get_active_slot();
911 slot->results = &results;
912 curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
913 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
914 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
915 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
916 if (start_active_slot(slot)) {
917 run_active_slot(slot);
918 if (results.curl_result != CURLE_OK) {
919 if (results.http_code == 404 ||
920 results.curl_result == CURLE_FILE_COULDNT_READ_FILE) {
921 repo->got_indices = 1;
922 free(buffer.buffer);
923 return 0;
924 } else {
925 repo->got_indices = 0;
926 free(buffer.buffer);
927 return error("%s", curl_errorstr);
928 }
929 }
930 } else {
931 repo->got_indices = 0;
932 free(buffer.buffer);
933 return error("Unable to start request");
934 }
935
936 data = buffer.buffer;
937 while (i < buffer.posn) {
938 switch (data[i]) {
939 case 'P':
940 i++;
941 if (i + 52 <= buffer.posn &&
942 !strncmp(data + i, " pack-", 6) &&
943 !strncmp(data + i + 46, ".pack\n", 6)) {
944 get_sha1_hex(data + i + 6, sha1);
945 setup_index(repo, sha1);
946 i += 51;
947 break;
948 }
949 default:
950 while (i < buffer.posn && data[i] != '\n')
951 i++;
952 }
953 i++;
954 }
955
956 free(buffer.buffer);
957 repo->got_indices = 1;
958 return 0;
959 }
960
961 static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
962 {
963 char *url;
964 struct packed_git *target;
965 struct packed_git **lst;
966 FILE *packfile;
967 char *filename;
968 char tmpfile[PATH_MAX];
969 int ret;
970 long prev_posn = 0;
971 char range[RANGE_HEADER_SIZE];
972 struct curl_slist *range_header = NULL;
973
974 struct active_request_slot *slot;
975 struct slot_results results;
976
977 if (fetch_indices(repo))
978 return -1;
979 target = find_sha1_pack(sha1, repo->packs);
980 if (!target)
981 return -1;
982
983 if (get_verbosely) {
984 fprintf(stderr, "Getting pack %s\n",
985 sha1_to_hex(target->sha1));
986 fprintf(stderr, " which contains %s\n",
987 sha1_to_hex(sha1));
988 }
989
990 url = xmalloc(strlen(repo->base) + 65);
991 sprintf(url, "%s/objects/pack/pack-%s.pack",
992 repo->base, sha1_to_hex(target->sha1));
993
994 filename = sha1_pack_name(target->sha1);
995 snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
996 packfile = fopen(tmpfile, "a");
997 if (!packfile)
998 return error("Unable to open local file %s for pack",
999 filename);
1000
1001 slot = get_active_slot();
1002 slot->results = &results;
1003 curl_easy_setopt(slot->curl, CURLOPT_FILE, packfile);
1004 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
1005 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1006 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
1007 slot->local = packfile;
1008
1009 /* If there is data present from a previous transfer attempt,
1010 resume where it left off */
1011 prev_posn = ftell(packfile);
1012 if (prev_posn>0) {
1013 if (get_verbosely)
1014 fprintf(stderr,
1015 "Resuming fetch of pack %s at byte %ld\n",
1016 sha1_to_hex(target->sha1), prev_posn);
1017 sprintf(range, "Range: bytes=%ld-", prev_posn);
1018 range_header = curl_slist_append(range_header, range);
1019 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
1020 }
1021
1022 if (start_active_slot(slot)) {
1023 run_active_slot(slot);
1024 if (results.curl_result != CURLE_OK) {
1025 fclose(packfile);
1026 return error("Unable to get pack file %s\n%s", url,
1027 curl_errorstr);
1028 }
1029 } else {
1030 fclose(packfile);
1031 return error("Unable to start request");
1032 }
1033
1034 fclose(packfile);
1035
1036 ret = move_temp_to_file(tmpfile, filename);
1037 if (ret)
1038 return ret;
1039
1040 lst = &repo->packs;
1041 while (*lst != target)
1042 lst = &((*lst)->next);
1043 *lst = (*lst)->next;
1044
1045 if (verify_pack(target, 0))
1046 return -1;
1047 install_packed_git(target);
1048
1049 return 0;
1050 }
1051
1052 static void abort_object_request(struct object_request *obj_req)
1053 {
1054 if (obj_req->local >= 0) {
1055 close(obj_req->local);
1056 obj_req->local = -1;
1057 }
1058 unlink(obj_req->tmpfile);
1059 if (obj_req->slot) {
1060 release_active_slot(obj_req->slot);
1061 obj_req->slot = NULL;
1062 }
1063 release_object_request(obj_req);
1064 }
1065
1066 static int fetch_object(struct alt_base *repo, unsigned char *sha1)
1067 {
1068 char *hex = sha1_to_hex(sha1);
1069 int ret = 0;
1070 struct object_request *obj_req = object_queue_head;
1071
1072 while (obj_req != NULL && memcmp(obj_req->sha1, sha1, 20))
1073 obj_req = obj_req->next;
1074 if (obj_req == NULL)
1075 return error("Couldn't find request for %s in the queue", hex);
1076
1077 if (has_sha1_file(obj_req->sha1)) {
1078 abort_object_request(obj_req);
1079 return 0;
1080 }
1081
1082 #ifdef USE_CURL_MULTI
1083 while (obj_req->state == WAITING) {
1084 step_active_slots();
1085 }
1086 #else
1087 start_object_request(obj_req);
1088 #endif
1089
1090 while (obj_req->state == ACTIVE) {
1091 run_active_slot(obj_req->slot);
1092 }
1093 if (obj_req->local != -1) {
1094 close(obj_req->local); obj_req->local = -1;
1095 }
1096
1097 if (obj_req->state == ABORTED) {
1098 ret = error("Request for %s aborted", hex);
1099 } else if (obj_req->curl_result != CURLE_OK &&
1100 obj_req->http_code != 416) {
1101 if (obj_req->http_code == 404 ||
1102 obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE)
1103 ret = -1; /* Be silent, it is probably in a pack. */
1104 else
1105 ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
1106 obj_req->errorstr, obj_req->curl_result,
1107 obj_req->http_code, hex);
1108 } else if (obj_req->zret != Z_STREAM_END) {
1109 corrupt_object_found++;
1110 ret = error("File %s (%s) corrupt", hex, obj_req->url);
1111 } else if (memcmp(obj_req->sha1, obj_req->real_sha1, 20)) {
1112 ret = error("File %s has bad hash", hex);
1113 } else if (obj_req->rename < 0) {
1114 ret = error("unable to write sha1 filename %s",
1115 obj_req->filename);
1116 }
1117
1118 release_object_request(obj_req);
1119 return ret;
1120 }
1121
1122 int fetch(unsigned char *sha1)
1123 {
1124 struct alt_base *altbase = alt;
1125
1126 if (!fetch_object(altbase, sha1))
1127 return 0;
1128 while (altbase) {
1129 if (!fetch_pack(altbase, sha1))
1130 return 0;
1131 fetch_alternates(alt->base);
1132 altbase = altbase->next;
1133 }
1134 return error("Unable to find %s under %s", sha1_to_hex(sha1),
1135 alt->base);
1136 }
1137
1138 static inline int needs_quote(int ch)
1139 {
1140 switch (ch) {
1141 case '/': case '-': case '.':
1142 case 'A'...'Z': case 'a'...'z': case '0'...'9':
1143 return 0;
1144 default:
1145 return 1;
1146 }
1147 }
1148
1149 static inline int hex(int v)
1150 {
1151 if (v < 10) return '0' + v;
1152 else return 'A' + v - 10;
1153 }
1154
1155 static char *quote_ref_url(const char *base, const char *ref)
1156 {
1157 const char *cp;
1158 char *dp, *qref;
1159 int len, baselen, ch;
1160
1161 baselen = strlen(base);
1162 len = baselen + 6; /* "refs/" + NUL */
1163 for (cp = ref; (ch = *cp) != 0; cp++, len++)
1164 if (needs_quote(ch))
1165 len += 2; /* extra two hex plus replacement % */
1166 qref = xmalloc(len);
1167 memcpy(qref, base, baselen);
1168 memcpy(qref + baselen, "refs/", 5);
1169 for (cp = ref, dp = qref + baselen + 5; (ch = *cp) != 0; cp++) {
1170 if (needs_quote(ch)) {
1171 *dp++ = '%';
1172 *dp++ = hex((ch >> 4) & 0xF);
1173 *dp++ = hex(ch & 0xF);
1174 }
1175 else
1176 *dp++ = ch;
1177 }
1178 *dp = 0;
1179
1180 return qref;
1181 }
1182
1183 int fetch_ref(char *ref, unsigned char *sha1)
1184 {
1185 char *url;
1186 char hex[42];
1187 struct buffer buffer;
1188 char *base = alt->base;
1189 struct active_request_slot *slot;
1190 struct slot_results results;
1191 buffer.size = 41;
1192 buffer.posn = 0;
1193 buffer.buffer = hex;
1194 hex[41] = '\0';
1195
1196 url = quote_ref_url(base, ref);
1197 slot = get_active_slot();
1198 slot->results = &results;
1199 curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
1200 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
1201 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
1202 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1203 if (start_active_slot(slot)) {
1204 run_active_slot(slot);
1205 if (results.curl_result != CURLE_OK)
1206 return error("Couldn't get %s for %s\n%s",
1207 url, ref, curl_errorstr);
1208 } else {
1209 return error("Unable to start request");
1210 }
1211
1212 hex[40] = '\0';
1213 get_sha1_hex(hex, sha1);
1214 return 0;
1215 }
1216
1217 int main(int argc, char **argv)
1218 {
1219 char *commit_id;
1220 char *url;
1221 char *path;
1222 int arg = 1;
1223 int rc = 0;
1224
1225 setup_git_directory();
1226 git_config(git_default_config);
1227
1228 while (arg < argc && argv[arg][0] == '-') {
1229 if (argv[arg][1] == 't') {
1230 get_tree = 1;
1231 } else if (argv[arg][1] == 'c') {
1232 get_history = 1;
1233 } else if (argv[arg][1] == 'a') {
1234 get_all = 1;
1235 get_tree = 1;
1236 get_history = 1;
1237 } else if (argv[arg][1] == 'v') {
1238 get_verbosely = 1;
1239 } else if (argv[arg][1] == 'w') {
1240 write_ref = argv[arg + 1];
1241 arg++;
1242 } else if (!strcmp(argv[arg], "--recover")) {
1243 get_recover = 1;
1244 }
1245 arg++;
1246 }
1247 if (argc < arg + 2) {
1248 usage("git-http-fetch [-c] [-t] [-a] [-d] [-v] [--recover] [-w ref] commit-id url");
1249 return 1;
1250 }
1251 commit_id = argv[arg];
1252 url = argv[arg + 1];
1253 write_ref_log_details = url;
1254
1255 http_init();
1256
1257 no_pragma_header = curl_slist_append(no_pragma_header, "Pragma:");
1258
1259 alt = xmalloc(sizeof(*alt));
1260 alt->base = url;
1261 alt->got_indices = 0;
1262 alt->packs = NULL;
1263 alt->next = NULL;
1264 path = strstr(url, "//");
1265 if (path) {
1266 path = strchr(path+2, '/');
1267 if (path)
1268 alt->path_len = strlen(path);
1269 }
1270
1271 if (pull(commit_id))
1272 rc = 1;
1273
1274 http_cleanup();
1275
1276 curl_slist_free_all(no_pragma_header);
1277
1278 if (corrupt_object_found) {
1279 fprintf(stderr,
1280 "Some loose object were found to be corrupt, but they might be just\n"
1281 "a false '404 Not Found' error message sent with incorrect HTTP\n"
1282 "status code. Suggest running git fsck-objects.\n");
1283 }
1284 return rc;
1285 }