]> git.ipfire.org Git - thirdparty/git.git/blob - http-fetch.c
New tests and en-passant modifications to mktag.
[thirdparty/git.git] / http-fetch.c
1 #include "cache.h"
2 #include "commit.h"
3 #include "pack.h"
4 #include "fetch.h"
5 #include "http.h"
6
7 #ifndef NO_EXPAT
8 #include <expat.h>
9
10 /* Definitions for DAV requests */
11 #define DAV_PROPFIND "PROPFIND"
12 #define DAV_PROPFIND_RESP ".multistatus.response"
13 #define DAV_PROPFIND_NAME ".multistatus.response.href"
14 #define DAV_PROPFIND_COLLECTION ".multistatus.response.propstat.prop.resourcetype.collection"
15 #define PROPFIND_ALL_REQUEST "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n<D:propfind xmlns:D=\"DAV:\">\n<D:allprop/>\n</D:propfind>"
16
17 /* Definitions for processing XML DAV responses */
18 #ifndef XML_STATUS_OK
19 enum XML_Status {
20 XML_STATUS_OK = 1,
21 XML_STATUS_ERROR = 0
22 };
23 #define XML_STATUS_OK 1
24 #define XML_STATUS_ERROR 0
25 #endif
26
27 /* Flags that control remote_ls processing */
28 #define PROCESS_FILES (1u << 0)
29 #define PROCESS_DIRS (1u << 1)
30 #define RECURSIVE (1u << 2)
31
32 /* Flags that remote_ls passes to callback functions */
33 #define IS_DIR (1u << 0)
34 #endif
35
36 #define PREV_BUF_SIZE 4096
37 #define RANGE_HEADER_SIZE 30
38
39 static int commits_on_stdin = 0;
40
41 static int got_alternates = -1;
42 static int corrupt_object_found = 0;
43
44 static struct curl_slist *no_pragma_header;
45
46 struct alt_base
47 {
48 const char *base;
49 int path_len;
50 int got_indices;
51 struct packed_git *packs;
52 struct alt_base *next;
53 };
54
55 static struct alt_base *alt = NULL;
56
57 enum object_request_state {
58 WAITING,
59 ABORTED,
60 ACTIVE,
61 COMPLETE,
62 };
63
64 struct object_request
65 {
66 unsigned char sha1[20];
67 struct alt_base *repo;
68 char *url;
69 char filename[PATH_MAX];
70 char tmpfile[PATH_MAX];
71 int local;
72 enum object_request_state state;
73 CURLcode curl_result;
74 char errorstr[CURL_ERROR_SIZE];
75 long http_code;
76 unsigned char real_sha1[20];
77 SHA_CTX c;
78 z_stream stream;
79 int zret;
80 int rename;
81 struct active_request_slot *slot;
82 struct object_request *next;
83 };
84
85 struct alternates_request {
86 const char *base;
87 char *url;
88 struct buffer *buffer;
89 struct active_request_slot *slot;
90 int http_specific;
91 };
92
93 #ifndef NO_EXPAT
94 struct xml_ctx
95 {
96 char *name;
97 int len;
98 char *cdata;
99 void (*userFunc)(struct xml_ctx *ctx, int tag_closed);
100 void *userData;
101 };
102
103 struct remote_ls_ctx
104 {
105 struct alt_base *repo;
106 char *path;
107 void (*userFunc)(struct remote_ls_ctx *ls);
108 void *userData;
109 int flags;
110 char *dentry_name;
111 int dentry_flags;
112 int rc;
113 struct remote_ls_ctx *parent;
114 };
115 #endif
116
117 static struct object_request *object_queue_head = NULL;
118
119 static size_t fwrite_sha1_file(void *ptr, size_t eltsize, size_t nmemb,
120 void *data)
121 {
122 unsigned char expn[4096];
123 size_t size = eltsize * nmemb;
124 int posn = 0;
125 struct object_request *obj_req = (struct object_request *)data;
126 do {
127 ssize_t retval = write(obj_req->local,
128 (char *) ptr + posn, size - posn);
129 if (retval < 0)
130 return posn;
131 posn += retval;
132 } while (posn < size);
133
134 obj_req->stream.avail_in = size;
135 obj_req->stream.next_in = ptr;
136 do {
137 obj_req->stream.next_out = expn;
138 obj_req->stream.avail_out = sizeof(expn);
139 obj_req->zret = inflate(&obj_req->stream, Z_SYNC_FLUSH);
140 SHA1_Update(&obj_req->c, expn,
141 sizeof(expn) - obj_req->stream.avail_out);
142 } while (obj_req->stream.avail_in && obj_req->zret == Z_OK);
143 data_received++;
144 return size;
145 }
146
147 static void fetch_alternates(const char *base);
148
149 static void process_object_response(void *callback_data);
150
151 static void start_object_request(struct object_request *obj_req)
152 {
153 char *hex = sha1_to_hex(obj_req->sha1);
154 char prevfile[PATH_MAX];
155 char *url;
156 char *posn;
157 int prevlocal;
158 unsigned char prev_buf[PREV_BUF_SIZE];
159 ssize_t prev_read = 0;
160 long prev_posn = 0;
161 char range[RANGE_HEADER_SIZE];
162 struct curl_slist *range_header = NULL;
163 struct active_request_slot *slot;
164
165 snprintf(prevfile, sizeof(prevfile), "%s.prev", obj_req->filename);
166 unlink(prevfile);
167 rename(obj_req->tmpfile, prevfile);
168 unlink(obj_req->tmpfile);
169
170 if (obj_req->local != -1)
171 error("fd leakage in start: %d", obj_req->local);
172 obj_req->local = open(obj_req->tmpfile,
173 O_WRONLY | O_CREAT | O_EXCL, 0666);
174 /* This could have failed due to the "lazy directory creation";
175 * try to mkdir the last path component.
176 */
177 if (obj_req->local < 0 && errno == ENOENT) {
178 char *dir = strrchr(obj_req->tmpfile, '/');
179 if (dir) {
180 *dir = 0;
181 mkdir(obj_req->tmpfile, 0777);
182 *dir = '/';
183 }
184 obj_req->local = open(obj_req->tmpfile,
185 O_WRONLY | O_CREAT | O_EXCL, 0666);
186 }
187
188 if (obj_req->local < 0) {
189 obj_req->state = ABORTED;
190 error("Couldn't create temporary file %s for %s: %s",
191 obj_req->tmpfile, obj_req->filename, strerror(errno));
192 return;
193 }
194
195 memset(&obj_req->stream, 0, sizeof(obj_req->stream));
196
197 inflateInit(&obj_req->stream);
198
199 SHA1_Init(&obj_req->c);
200
201 url = xmalloc(strlen(obj_req->repo->base) + 50);
202 obj_req->url = xmalloc(strlen(obj_req->repo->base) + 50);
203 strcpy(url, obj_req->repo->base);
204 posn = url + strlen(obj_req->repo->base);
205 strcpy(posn, "objects/");
206 posn += 8;
207 memcpy(posn, hex, 2);
208 posn += 2;
209 *(posn++) = '/';
210 strcpy(posn, hex + 2);
211 strcpy(obj_req->url, url);
212
213 /* If a previous temp file is present, process what was already
214 fetched. */
215 prevlocal = open(prevfile, O_RDONLY);
216 if (prevlocal != -1) {
217 do {
218 prev_read = read(prevlocal, prev_buf, PREV_BUF_SIZE);
219 if (prev_read>0) {
220 if (fwrite_sha1_file(prev_buf,
221 1,
222 prev_read,
223 obj_req) == prev_read) {
224 prev_posn += prev_read;
225 } else {
226 prev_read = -1;
227 }
228 }
229 } while (prev_read > 0);
230 close(prevlocal);
231 }
232 unlink(prevfile);
233
234 /* Reset inflate/SHA1 if there was an error reading the previous temp
235 file; also rewind to the beginning of the local file. */
236 if (prev_read == -1) {
237 memset(&obj_req->stream, 0, sizeof(obj_req->stream));
238 inflateInit(&obj_req->stream);
239 SHA1_Init(&obj_req->c);
240 if (prev_posn>0) {
241 prev_posn = 0;
242 lseek(obj_req->local, SEEK_SET, 0);
243 ftruncate(obj_req->local, 0);
244 }
245 }
246
247 slot = get_active_slot();
248 slot->callback_func = process_object_response;
249 slot->callback_data = obj_req;
250 obj_req->slot = slot;
251
252 curl_easy_setopt(slot->curl, CURLOPT_FILE, obj_req);
253 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
254 curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, obj_req->errorstr);
255 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
256 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
257
258 /* If we have successfully processed data from a previous fetch
259 attempt, only fetch the data we don't already have. */
260 if (prev_posn>0) {
261 if (get_verbosely)
262 fprintf(stderr,
263 "Resuming fetch of object %s at byte %ld\n",
264 hex, prev_posn);
265 sprintf(range, "Range: bytes=%ld-", prev_posn);
266 range_header = curl_slist_append(range_header, range);
267 curl_easy_setopt(slot->curl,
268 CURLOPT_HTTPHEADER, range_header);
269 }
270
271 /* Try to get the request started, abort the request on error */
272 obj_req->state = ACTIVE;
273 if (!start_active_slot(slot)) {
274 obj_req->state = ABORTED;
275 obj_req->slot = NULL;
276 close(obj_req->local); obj_req->local = -1;
277 free(obj_req->url);
278 return;
279 }
280 }
281
282 static void finish_object_request(struct object_request *obj_req)
283 {
284 struct stat st;
285
286 fchmod(obj_req->local, 0444);
287 close(obj_req->local); obj_req->local = -1;
288
289 if (obj_req->http_code == 416) {
290 fprintf(stderr, "Warning: requested range invalid; we may already have all the data.\n");
291 } else if (obj_req->curl_result != CURLE_OK) {
292 if (stat(obj_req->tmpfile, &st) == 0)
293 if (st.st_size == 0)
294 unlink(obj_req->tmpfile);
295 return;
296 }
297
298 inflateEnd(&obj_req->stream);
299 SHA1_Final(obj_req->real_sha1, &obj_req->c);
300 if (obj_req->zret != Z_STREAM_END) {
301 unlink(obj_req->tmpfile);
302 return;
303 }
304 if (memcmp(obj_req->sha1, obj_req->real_sha1, 20)) {
305 unlink(obj_req->tmpfile);
306 return;
307 }
308 obj_req->rename =
309 move_temp_to_file(obj_req->tmpfile, obj_req->filename);
310
311 if (obj_req->rename == 0)
312 pull_say("got %s\n", sha1_to_hex(obj_req->sha1));
313 }
314
315 static void process_object_response(void *callback_data)
316 {
317 struct object_request *obj_req =
318 (struct object_request *)callback_data;
319
320 obj_req->curl_result = obj_req->slot->curl_result;
321 obj_req->http_code = obj_req->slot->http_code;
322 obj_req->slot = NULL;
323 obj_req->state = COMPLETE;
324
325 /* Use alternates if necessary */
326 if (obj_req->http_code == 404 ||
327 obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE) {
328 fetch_alternates(alt->base);
329 if (obj_req->repo->next != NULL) {
330 obj_req->repo =
331 obj_req->repo->next;
332 close(obj_req->local);
333 obj_req->local = -1;
334 start_object_request(obj_req);
335 return;
336 }
337 }
338
339 finish_object_request(obj_req);
340 }
341
342 static void release_object_request(struct object_request *obj_req)
343 {
344 struct object_request *entry = object_queue_head;
345
346 if (obj_req->local != -1)
347 error("fd leakage in release: %d", obj_req->local);
348 if (obj_req == object_queue_head) {
349 object_queue_head = obj_req->next;
350 } else {
351 while (entry->next != NULL && entry->next != obj_req)
352 entry = entry->next;
353 if (entry->next == obj_req)
354 entry->next = entry->next->next;
355 }
356
357 free(obj_req->url);
358 free(obj_req);
359 }
360
361 #ifdef USE_CURL_MULTI
362 void fill_active_slots(void)
363 {
364 struct object_request *obj_req = object_queue_head;
365 struct active_request_slot *slot = active_queue_head;
366 int num_transfers;
367
368 while (active_requests < max_requests && obj_req != NULL) {
369 if (obj_req->state == WAITING) {
370 if (has_sha1_file(obj_req->sha1))
371 obj_req->state = COMPLETE;
372 else
373 start_object_request(obj_req);
374 curl_multi_perform(curlm, &num_transfers);
375 }
376 obj_req = obj_req->next;
377 }
378
379 while (slot != NULL) {
380 if (!slot->in_use && slot->curl != NULL) {
381 curl_easy_cleanup(slot->curl);
382 slot->curl = NULL;
383 }
384 slot = slot->next;
385 }
386 }
387 #endif
388
389 void prefetch(unsigned char *sha1)
390 {
391 struct object_request *newreq;
392 struct object_request *tail;
393 char *filename = sha1_file_name(sha1);
394
395 newreq = xmalloc(sizeof(*newreq));
396 memcpy(newreq->sha1, sha1, 20);
397 newreq->repo = alt;
398 newreq->url = NULL;
399 newreq->local = -1;
400 newreq->state = WAITING;
401 snprintf(newreq->filename, sizeof(newreq->filename), "%s", filename);
402 snprintf(newreq->tmpfile, sizeof(newreq->tmpfile),
403 "%s.temp", filename);
404 newreq->slot = NULL;
405 newreq->next = NULL;
406
407 if (object_queue_head == NULL) {
408 object_queue_head = newreq;
409 } else {
410 tail = object_queue_head;
411 while (tail->next != NULL) {
412 tail = tail->next;
413 }
414 tail->next = newreq;
415 }
416
417 #ifdef USE_CURL_MULTI
418 fill_active_slots();
419 step_active_slots();
420 #endif
421 }
422
423 static int fetch_index(struct alt_base *repo, unsigned char *sha1)
424 {
425 char *hex = sha1_to_hex(sha1);
426 char *filename;
427 char *url;
428 char tmpfile[PATH_MAX];
429 long prev_posn = 0;
430 char range[RANGE_HEADER_SIZE];
431 struct curl_slist *range_header = NULL;
432
433 FILE *indexfile;
434 struct active_request_slot *slot;
435 struct slot_results results;
436
437 if (has_pack_index(sha1))
438 return 0;
439
440 if (get_verbosely)
441 fprintf(stderr, "Getting index for pack %s\n", hex);
442
443 url = xmalloc(strlen(repo->base) + 64);
444 sprintf(url, "%s/objects/pack/pack-%s.idx", repo->base, hex);
445
446 filename = sha1_pack_index_name(sha1);
447 snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
448 indexfile = fopen(tmpfile, "a");
449 if (!indexfile)
450 return error("Unable to open local file %s for pack index",
451 filename);
452
453 slot = get_active_slot();
454 slot->results = &results;
455 curl_easy_setopt(slot->curl, CURLOPT_FILE, indexfile);
456 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
457 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
458 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
459 slot->local = indexfile;
460
461 /* If there is data present from a previous transfer attempt,
462 resume where it left off */
463 prev_posn = ftell(indexfile);
464 if (prev_posn>0) {
465 if (get_verbosely)
466 fprintf(stderr,
467 "Resuming fetch of index for pack %s at byte %ld\n",
468 hex, prev_posn);
469 sprintf(range, "Range: bytes=%ld-", prev_posn);
470 range_header = curl_slist_append(range_header, range);
471 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
472 }
473
474 if (start_active_slot(slot)) {
475 run_active_slot(slot);
476 if (results.curl_result != CURLE_OK) {
477 fclose(indexfile);
478 return error("Unable to get pack index %s\n%s", url,
479 curl_errorstr);
480 }
481 } else {
482 fclose(indexfile);
483 return error("Unable to start request");
484 }
485
486 fclose(indexfile);
487
488 return move_temp_to_file(tmpfile, filename);
489 }
490
491 static int setup_index(struct alt_base *repo, unsigned char *sha1)
492 {
493 struct packed_git *new_pack;
494 if (has_pack_file(sha1))
495 return 0; /* don't list this as something we can get */
496
497 if (fetch_index(repo, sha1))
498 return -1;
499
500 new_pack = parse_pack_index(sha1);
501 new_pack->next = repo->packs;
502 repo->packs = new_pack;
503 return 0;
504 }
505
506 static void process_alternates_response(void *callback_data)
507 {
508 struct alternates_request *alt_req =
509 (struct alternates_request *)callback_data;
510 struct active_request_slot *slot = alt_req->slot;
511 struct alt_base *tail = alt;
512 const char *base = alt_req->base;
513 static const char null_byte = '\0';
514 char *data;
515 int i = 0;
516
517 if (alt_req->http_specific) {
518 if (slot->curl_result != CURLE_OK ||
519 !alt_req->buffer->posn) {
520
521 /* Try reusing the slot to get non-http alternates */
522 alt_req->http_specific = 0;
523 sprintf(alt_req->url, "%s/objects/info/alternates",
524 base);
525 curl_easy_setopt(slot->curl, CURLOPT_URL,
526 alt_req->url);
527 active_requests++;
528 slot->in_use = 1;
529 if (slot->finished != NULL)
530 (*slot->finished) = 0;
531 if (!start_active_slot(slot)) {
532 got_alternates = -1;
533 slot->in_use = 0;
534 if (slot->finished != NULL)
535 (*slot->finished) = 1;
536 }
537 return;
538 }
539 } else if (slot->curl_result != CURLE_OK) {
540 if (slot->http_code != 404 &&
541 slot->curl_result != CURLE_FILE_COULDNT_READ_FILE) {
542 got_alternates = -1;
543 return;
544 }
545 }
546
547 fwrite_buffer(&null_byte, 1, 1, alt_req->buffer);
548 alt_req->buffer->posn--;
549 data = alt_req->buffer->buffer;
550
551 while (i < alt_req->buffer->posn) {
552 int posn = i;
553 while (posn < alt_req->buffer->posn && data[posn] != '\n')
554 posn++;
555 if (data[posn] == '\n') {
556 int okay = 0;
557 int serverlen = 0;
558 struct alt_base *newalt;
559 char *target = NULL;
560 char *path;
561 if (data[i] == '/') {
562 serverlen = strchr(base + 8, '/') - base;
563 okay = 1;
564 } else if (!memcmp(data + i, "../", 3)) {
565 i += 3;
566 serverlen = strlen(base);
567 while (i + 2 < posn &&
568 !memcmp(data + i, "../", 3)) {
569 do {
570 serverlen--;
571 } while (serverlen &&
572 base[serverlen - 1] != '/');
573 i += 3;
574 }
575 /* If the server got removed, give up. */
576 okay = strchr(base, ':') - base + 3 <
577 serverlen;
578 } else if (alt_req->http_specific) {
579 char *colon = strchr(data + i, ':');
580 char *slash = strchr(data + i, '/');
581 if (colon && slash && colon < data + posn &&
582 slash < data + posn && colon < slash) {
583 okay = 1;
584 }
585 }
586 /* skip 'objects' at end */
587 if (okay) {
588 target = xmalloc(serverlen + posn - i - 6);
589 strlcpy(target, base, serverlen);
590 strlcpy(target + serverlen, data + i, posn - i - 6);
591 if (get_verbosely)
592 fprintf(stderr,
593 "Also look at %s\n", target);
594 newalt = xmalloc(sizeof(*newalt));
595 newalt->next = NULL;
596 newalt->base = target;
597 newalt->got_indices = 0;
598 newalt->packs = NULL;
599 path = strstr(target, "//");
600 if (path) {
601 path = strchr(path+2, '/');
602 if (path)
603 newalt->path_len = strlen(path);
604 }
605
606 while (tail->next != NULL)
607 tail = tail->next;
608 tail->next = newalt;
609 }
610 }
611 i = posn + 1;
612 }
613
614 got_alternates = 1;
615 }
616
617 static void fetch_alternates(const char *base)
618 {
619 struct buffer buffer;
620 char *url;
621 char *data;
622 struct active_request_slot *slot;
623 struct alternates_request alt_req;
624
625 /* If another request has already started fetching alternates,
626 wait for them to arrive and return to processing this request's
627 curl message */
628 #ifdef USE_CURL_MULTI
629 while (got_alternates == 0) {
630 step_active_slots();
631 }
632 #endif
633
634 /* Nothing to do if they've already been fetched */
635 if (got_alternates == 1)
636 return;
637
638 /* Start the fetch */
639 got_alternates = 0;
640
641 data = xmalloc(4096);
642 buffer.size = 4096;
643 buffer.posn = 0;
644 buffer.buffer = data;
645
646 if (get_verbosely)
647 fprintf(stderr, "Getting alternates list for %s\n", base);
648
649 url = xmalloc(strlen(base) + 31);
650 sprintf(url, "%s/objects/info/http-alternates", base);
651
652 /* Use a callback to process the result, since another request
653 may fail and need to have alternates loaded before continuing */
654 slot = get_active_slot();
655 slot->callback_func = process_alternates_response;
656 slot->callback_data = &alt_req;
657
658 curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
659 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
660 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
661
662 alt_req.base = base;
663 alt_req.url = url;
664 alt_req.buffer = &buffer;
665 alt_req.http_specific = 1;
666 alt_req.slot = slot;
667
668 if (start_active_slot(slot))
669 run_active_slot(slot);
670 else
671 got_alternates = -1;
672
673 free(data);
674 free(url);
675 }
676
677 #ifndef NO_EXPAT
678 static void
679 xml_start_tag(void *userData, const char *name, const char **atts)
680 {
681 struct xml_ctx *ctx = (struct xml_ctx *)userData;
682 const char *c = strchr(name, ':');
683 int new_len;
684
685 if (c == NULL)
686 c = name;
687 else
688 c++;
689
690 new_len = strlen(ctx->name) + strlen(c) + 2;
691
692 if (new_len > ctx->len) {
693 ctx->name = xrealloc(ctx->name, new_len);
694 ctx->len = new_len;
695 }
696 strcat(ctx->name, ".");
697 strcat(ctx->name, c);
698
699 if (ctx->cdata) {
700 free(ctx->cdata);
701 ctx->cdata = NULL;
702 }
703
704 ctx->userFunc(ctx, 0);
705 }
706
707 static void
708 xml_end_tag(void *userData, const char *name)
709 {
710 struct xml_ctx *ctx = (struct xml_ctx *)userData;
711 const char *c = strchr(name, ':');
712 char *ep;
713
714 ctx->userFunc(ctx, 1);
715
716 if (c == NULL)
717 c = name;
718 else
719 c++;
720
721 ep = ctx->name + strlen(ctx->name) - strlen(c) - 1;
722 *ep = 0;
723 }
724
725 static void
726 xml_cdata(void *userData, const XML_Char *s, int len)
727 {
728 struct xml_ctx *ctx = (struct xml_ctx *)userData;
729 if (ctx->cdata)
730 free(ctx->cdata);
731 ctx->cdata = xmalloc(len + 1);
732 strlcpy(ctx->cdata, s, len + 1);
733 }
734
735 static int remote_ls(struct alt_base *repo, const char *path, int flags,
736 void (*userFunc)(struct remote_ls_ctx *ls),
737 void *userData);
738
739 static void handle_remote_ls_ctx(struct xml_ctx *ctx, int tag_closed)
740 {
741 struct remote_ls_ctx *ls = (struct remote_ls_ctx *)ctx->userData;
742
743 if (tag_closed) {
744 if (!strcmp(ctx->name, DAV_PROPFIND_RESP) && ls->dentry_name) {
745 if (ls->dentry_flags & IS_DIR) {
746 if (ls->flags & PROCESS_DIRS) {
747 ls->userFunc(ls);
748 }
749 if (strcmp(ls->dentry_name, ls->path) &&
750 ls->flags & RECURSIVE) {
751 ls->rc = remote_ls(ls->repo,
752 ls->dentry_name,
753 ls->flags,
754 ls->userFunc,
755 ls->userData);
756 }
757 } else if (ls->flags & PROCESS_FILES) {
758 ls->userFunc(ls);
759 }
760 } else if (!strcmp(ctx->name, DAV_PROPFIND_NAME) && ctx->cdata) {
761 ls->dentry_name = xmalloc(strlen(ctx->cdata) -
762 ls->repo->path_len + 1);
763 strcpy(ls->dentry_name, ctx->cdata + ls->repo->path_len);
764 } else if (!strcmp(ctx->name, DAV_PROPFIND_COLLECTION)) {
765 ls->dentry_flags |= IS_DIR;
766 }
767 } else if (!strcmp(ctx->name, DAV_PROPFIND_RESP)) {
768 if (ls->dentry_name) {
769 free(ls->dentry_name);
770 }
771 ls->dentry_name = NULL;
772 ls->dentry_flags = 0;
773 }
774 }
775
776 static int remote_ls(struct alt_base *repo, const char *path, int flags,
777 void (*userFunc)(struct remote_ls_ctx *ls),
778 void *userData)
779 {
780 char *url = xmalloc(strlen(repo->base) + strlen(path) + 1);
781 struct active_request_slot *slot;
782 struct slot_results results;
783 struct buffer in_buffer;
784 struct buffer out_buffer;
785 char *in_data;
786 char *out_data;
787 XML_Parser parser = XML_ParserCreate(NULL);
788 enum XML_Status result;
789 struct curl_slist *dav_headers = NULL;
790 struct xml_ctx ctx;
791 struct remote_ls_ctx ls;
792
793 ls.flags = flags;
794 ls.repo = repo;
795 ls.path = strdup(path);
796 ls.dentry_name = NULL;
797 ls.dentry_flags = 0;
798 ls.userData = userData;
799 ls.userFunc = userFunc;
800 ls.rc = 0;
801
802 sprintf(url, "%s%s", repo->base, path);
803
804 out_buffer.size = strlen(PROPFIND_ALL_REQUEST);
805 out_data = xmalloc(out_buffer.size + 1);
806 snprintf(out_data, out_buffer.size + 1, PROPFIND_ALL_REQUEST);
807 out_buffer.posn = 0;
808 out_buffer.buffer = out_data;
809
810 in_buffer.size = 4096;
811 in_data = xmalloc(in_buffer.size);
812 in_buffer.posn = 0;
813 in_buffer.buffer = in_data;
814
815 dav_headers = curl_slist_append(dav_headers, "Depth: 1");
816 dav_headers = curl_slist_append(dav_headers, "Content-Type: text/xml");
817
818 slot = get_active_slot();
819 slot->results = &results;
820 curl_easy_setopt(slot->curl, CURLOPT_INFILE, &out_buffer);
821 curl_easy_setopt(slot->curl, CURLOPT_INFILESIZE, out_buffer.size);
822 curl_easy_setopt(slot->curl, CURLOPT_READFUNCTION, fread_buffer);
823 curl_easy_setopt(slot->curl, CURLOPT_FILE, &in_buffer);
824 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
825 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
826 curl_easy_setopt(slot->curl, CURLOPT_UPLOAD, 1);
827 curl_easy_setopt(slot->curl, CURLOPT_CUSTOMREQUEST, DAV_PROPFIND);
828 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, dav_headers);
829
830 if (start_active_slot(slot)) {
831 run_active_slot(slot);
832 if (results.curl_result == CURLE_OK) {
833 ctx.name = xcalloc(10, 1);
834 ctx.len = 0;
835 ctx.cdata = NULL;
836 ctx.userFunc = handle_remote_ls_ctx;
837 ctx.userData = &ls;
838 XML_SetUserData(parser, &ctx);
839 XML_SetElementHandler(parser, xml_start_tag,
840 xml_end_tag);
841 XML_SetCharacterDataHandler(parser, xml_cdata);
842 result = XML_Parse(parser, in_buffer.buffer,
843 in_buffer.posn, 1);
844 free(ctx.name);
845
846 if (result != XML_STATUS_OK) {
847 ls.rc = error("XML error: %s",
848 XML_ErrorString(
849 XML_GetErrorCode(parser)));
850 }
851 } else {
852 ls.rc = -1;
853 }
854 } else {
855 ls.rc = error("Unable to start PROPFIND request");
856 }
857
858 free(ls.path);
859 free(url);
860 free(out_data);
861 free(in_buffer.buffer);
862 curl_slist_free_all(dav_headers);
863
864 return ls.rc;
865 }
866
867 static void process_ls_pack(struct remote_ls_ctx *ls)
868 {
869 unsigned char sha1[20];
870
871 if (strlen(ls->dentry_name) == 63 &&
872 !strncmp(ls->dentry_name, "objects/pack/pack-", 18) &&
873 !strncmp(ls->dentry_name+58, ".pack", 5)) {
874 get_sha1_hex(ls->dentry_name + 18, sha1);
875 setup_index(ls->repo, sha1);
876 }
877 }
878 #endif
879
880 static int fetch_indices(struct alt_base *repo)
881 {
882 unsigned char sha1[20];
883 char *url;
884 struct buffer buffer;
885 char *data;
886 int i = 0;
887
888 struct active_request_slot *slot;
889 struct slot_results results;
890
891 if (repo->got_indices)
892 return 0;
893
894 data = xmalloc(4096);
895 buffer.size = 4096;
896 buffer.posn = 0;
897 buffer.buffer = data;
898
899 if (get_verbosely)
900 fprintf(stderr, "Getting pack list for %s\n", repo->base);
901
902 #ifndef NO_EXPAT
903 if (remote_ls(repo, "objects/pack/", PROCESS_FILES,
904 process_ls_pack, NULL) == 0)
905 return 0;
906 #endif
907
908 url = xmalloc(strlen(repo->base) + 21);
909 sprintf(url, "%s/objects/info/packs", repo->base);
910
911 slot = get_active_slot();
912 slot->results = &results;
913 curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
914 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
915 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
916 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
917 if (start_active_slot(slot)) {
918 run_active_slot(slot);
919 if (results.curl_result != CURLE_OK) {
920 if (results.http_code == 404 ||
921 results.curl_result == CURLE_FILE_COULDNT_READ_FILE) {
922 repo->got_indices = 1;
923 free(buffer.buffer);
924 return 0;
925 } else {
926 repo->got_indices = 0;
927 free(buffer.buffer);
928 return error("%s", curl_errorstr);
929 }
930 }
931 } else {
932 repo->got_indices = 0;
933 free(buffer.buffer);
934 return error("Unable to start request");
935 }
936
937 data = buffer.buffer;
938 while (i < buffer.posn) {
939 switch (data[i]) {
940 case 'P':
941 i++;
942 if (i + 52 <= buffer.posn &&
943 !strncmp(data + i, " pack-", 6) &&
944 !strncmp(data + i + 46, ".pack\n", 6)) {
945 get_sha1_hex(data + i + 6, sha1);
946 setup_index(repo, sha1);
947 i += 51;
948 break;
949 }
950 default:
951 while (i < buffer.posn && data[i] != '\n')
952 i++;
953 }
954 i++;
955 }
956
957 free(buffer.buffer);
958 repo->got_indices = 1;
959 return 0;
960 }
961
962 static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
963 {
964 char *url;
965 struct packed_git *target;
966 struct packed_git **lst;
967 FILE *packfile;
968 char *filename;
969 char tmpfile[PATH_MAX];
970 int ret;
971 long prev_posn = 0;
972 char range[RANGE_HEADER_SIZE];
973 struct curl_slist *range_header = NULL;
974
975 struct active_request_slot *slot;
976 struct slot_results results;
977
978 if (fetch_indices(repo))
979 return -1;
980 target = find_sha1_pack(sha1, repo->packs);
981 if (!target)
982 return -1;
983
984 if (get_verbosely) {
985 fprintf(stderr, "Getting pack %s\n",
986 sha1_to_hex(target->sha1));
987 fprintf(stderr, " which contains %s\n",
988 sha1_to_hex(sha1));
989 }
990
991 url = xmalloc(strlen(repo->base) + 65);
992 sprintf(url, "%s/objects/pack/pack-%s.pack",
993 repo->base, sha1_to_hex(target->sha1));
994
995 filename = sha1_pack_name(target->sha1);
996 snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
997 packfile = fopen(tmpfile, "a");
998 if (!packfile)
999 return error("Unable to open local file %s for pack",
1000 filename);
1001
1002 slot = get_active_slot();
1003 slot->results = &results;
1004 curl_easy_setopt(slot->curl, CURLOPT_FILE, packfile);
1005 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
1006 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1007 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
1008 slot->local = packfile;
1009
1010 /* If there is data present from a previous transfer attempt,
1011 resume where it left off */
1012 prev_posn = ftell(packfile);
1013 if (prev_posn>0) {
1014 if (get_verbosely)
1015 fprintf(stderr,
1016 "Resuming fetch of pack %s at byte %ld\n",
1017 sha1_to_hex(target->sha1), prev_posn);
1018 sprintf(range, "Range: bytes=%ld-", prev_posn);
1019 range_header = curl_slist_append(range_header, range);
1020 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
1021 }
1022
1023 if (start_active_slot(slot)) {
1024 run_active_slot(slot);
1025 if (results.curl_result != CURLE_OK) {
1026 fclose(packfile);
1027 return error("Unable to get pack file %s\n%s", url,
1028 curl_errorstr);
1029 }
1030 } else {
1031 fclose(packfile);
1032 return error("Unable to start request");
1033 }
1034
1035 fclose(packfile);
1036
1037 ret = move_temp_to_file(tmpfile, filename);
1038 if (ret)
1039 return ret;
1040
1041 lst = &repo->packs;
1042 while (*lst != target)
1043 lst = &((*lst)->next);
1044 *lst = (*lst)->next;
1045
1046 if (verify_pack(target, 0))
1047 return -1;
1048 install_packed_git(target);
1049
1050 return 0;
1051 }
1052
1053 static void abort_object_request(struct object_request *obj_req)
1054 {
1055 if (obj_req->local >= 0) {
1056 close(obj_req->local);
1057 obj_req->local = -1;
1058 }
1059 unlink(obj_req->tmpfile);
1060 if (obj_req->slot) {
1061 release_active_slot(obj_req->slot);
1062 obj_req->slot = NULL;
1063 }
1064 release_object_request(obj_req);
1065 }
1066
1067 static int fetch_object(struct alt_base *repo, unsigned char *sha1)
1068 {
1069 char *hex = sha1_to_hex(sha1);
1070 int ret = 0;
1071 struct object_request *obj_req = object_queue_head;
1072
1073 while (obj_req != NULL && memcmp(obj_req->sha1, sha1, 20))
1074 obj_req = obj_req->next;
1075 if (obj_req == NULL)
1076 return error("Couldn't find request for %s in the queue", hex);
1077
1078 if (has_sha1_file(obj_req->sha1)) {
1079 abort_object_request(obj_req);
1080 return 0;
1081 }
1082
1083 #ifdef USE_CURL_MULTI
1084 while (obj_req->state == WAITING) {
1085 step_active_slots();
1086 }
1087 #else
1088 start_object_request(obj_req);
1089 #endif
1090
1091 while (obj_req->state == ACTIVE) {
1092 run_active_slot(obj_req->slot);
1093 }
1094 if (obj_req->local != -1) {
1095 close(obj_req->local); obj_req->local = -1;
1096 }
1097
1098 if (obj_req->state == ABORTED) {
1099 ret = error("Request for %s aborted", hex);
1100 } else if (obj_req->curl_result != CURLE_OK &&
1101 obj_req->http_code != 416) {
1102 if (obj_req->http_code == 404 ||
1103 obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE)
1104 ret = -1; /* Be silent, it is probably in a pack. */
1105 else
1106 ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
1107 obj_req->errorstr, obj_req->curl_result,
1108 obj_req->http_code, hex);
1109 } else if (obj_req->zret != Z_STREAM_END) {
1110 corrupt_object_found++;
1111 ret = error("File %s (%s) corrupt", hex, obj_req->url);
1112 } else if (memcmp(obj_req->sha1, obj_req->real_sha1, 20)) {
1113 ret = error("File %s has bad hash", hex);
1114 } else if (obj_req->rename < 0) {
1115 ret = error("unable to write sha1 filename %s",
1116 obj_req->filename);
1117 }
1118
1119 release_object_request(obj_req);
1120 return ret;
1121 }
1122
1123 int fetch(unsigned char *sha1)
1124 {
1125 struct alt_base *altbase = alt;
1126
1127 if (!fetch_object(altbase, sha1))
1128 return 0;
1129 while (altbase) {
1130 if (!fetch_pack(altbase, sha1))
1131 return 0;
1132 fetch_alternates(alt->base);
1133 altbase = altbase->next;
1134 }
1135 return error("Unable to find %s under %s", sha1_to_hex(sha1),
1136 alt->base);
1137 }
1138
1139 static inline int needs_quote(int ch)
1140 {
1141 if (((ch >= 'A') && (ch <= 'Z'))
1142 || ((ch >= 'a') && (ch <= 'z'))
1143 || ((ch >= '0') && (ch <= '9'))
1144 || (ch == '/')
1145 || (ch == '-')
1146 || (ch == '.'))
1147 return 0;
1148 return 1;
1149 }
1150
1151 static inline int hex(int v)
1152 {
1153 if (v < 10) return '0' + v;
1154 else return 'A' + v - 10;
1155 }
1156
1157 static char *quote_ref_url(const char *base, const char *ref)
1158 {
1159 const char *cp;
1160 char *dp, *qref;
1161 int len, baselen, ch;
1162
1163 baselen = strlen(base);
1164 len = baselen + 6; /* "refs/" + NUL */
1165 for (cp = ref; (ch = *cp) != 0; cp++, len++)
1166 if (needs_quote(ch))
1167 len += 2; /* extra two hex plus replacement % */
1168 qref = xmalloc(len);
1169 memcpy(qref, base, baselen);
1170 memcpy(qref + baselen, "refs/", 5);
1171 for (cp = ref, dp = qref + baselen + 5; (ch = *cp) != 0; cp++) {
1172 if (needs_quote(ch)) {
1173 *dp++ = '%';
1174 *dp++ = hex((ch >> 4) & 0xF);
1175 *dp++ = hex(ch & 0xF);
1176 }
1177 else
1178 *dp++ = ch;
1179 }
1180 *dp = 0;
1181
1182 return qref;
1183 }
1184
1185 int fetch_ref(char *ref, unsigned char *sha1)
1186 {
1187 char *url;
1188 char hex[42];
1189 struct buffer buffer;
1190 const char *base = alt->base;
1191 struct active_request_slot *slot;
1192 struct slot_results results;
1193 buffer.size = 41;
1194 buffer.posn = 0;
1195 buffer.buffer = hex;
1196 hex[41] = '\0';
1197
1198 url = quote_ref_url(base, ref);
1199 slot = get_active_slot();
1200 slot->results = &results;
1201 curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
1202 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
1203 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
1204 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1205 if (start_active_slot(slot)) {
1206 run_active_slot(slot);
1207 if (results.curl_result != CURLE_OK)
1208 return error("Couldn't get %s for %s\n%s",
1209 url, ref, curl_errorstr);
1210 } else {
1211 return error("Unable to start request");
1212 }
1213
1214 hex[40] = '\0';
1215 get_sha1_hex(hex, sha1);
1216 return 0;
1217 }
1218
1219 int main(int argc, const char **argv)
1220 {
1221 int commits;
1222 const char **write_ref = NULL;
1223 char **commit_id;
1224 const char *url;
1225 char *path;
1226 int arg = 1;
1227 int rc = 0;
1228
1229 setup_git_directory();
1230 git_config(git_default_config);
1231
1232 while (arg < argc && argv[arg][0] == '-') {
1233 if (argv[arg][1] == 't') {
1234 get_tree = 1;
1235 } else if (argv[arg][1] == 'c') {
1236 get_history = 1;
1237 } else if (argv[arg][1] == 'a') {
1238 get_all = 1;
1239 get_tree = 1;
1240 get_history = 1;
1241 } else if (argv[arg][1] == 'v') {
1242 get_verbosely = 1;
1243 } else if (argv[arg][1] == 'w') {
1244 write_ref = &argv[arg + 1];
1245 arg++;
1246 } else if (!strcmp(argv[arg], "--recover")) {
1247 get_recover = 1;
1248 } else if (!strcmp(argv[arg], "--stdin")) {
1249 commits_on_stdin = 1;
1250 }
1251 arg++;
1252 }
1253 if (argc < arg + 2 - commits_on_stdin) {
1254 usage("git-http-fetch [-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin] commit-id url");
1255 return 1;
1256 }
1257 if (commits_on_stdin) {
1258 commits = pull_targets_stdin(&commit_id, &write_ref);
1259 } else {
1260 commit_id = (char **) &argv[arg++];
1261 commits = 1;
1262 }
1263 url = argv[arg];
1264
1265 http_init();
1266
1267 no_pragma_header = curl_slist_append(no_pragma_header, "Pragma:");
1268
1269 alt = xmalloc(sizeof(*alt));
1270 alt->base = url;
1271 alt->got_indices = 0;
1272 alt->packs = NULL;
1273 alt->next = NULL;
1274 path = strstr(url, "//");
1275 if (path) {
1276 path = strchr(path+2, '/');
1277 if (path)
1278 alt->path_len = strlen(path);
1279 }
1280
1281 if (pull(commits, commit_id, write_ref, url))
1282 rc = 1;
1283
1284 http_cleanup();
1285
1286 curl_slist_free_all(no_pragma_header);
1287
1288 if (commits_on_stdin)
1289 pull_targets_free(commits, commit_id, write_ref);
1290
1291 if (corrupt_object_found) {
1292 fprintf(stderr,
1293 "Some loose object were found to be corrupt, but they might be just\n"
1294 "a false '404 Not Found' error message sent with incorrect HTTP\n"
1295 "status code. Suggest running git fsck-objects.\n");
1296 }
1297 return rc;
1298 }