]> git.ipfire.org Git - thirdparty/git.git/blob - http-fetch.c
http-fetch.c: consolidate code to detect missing fetch target
[thirdparty/git.git] / http-fetch.c
1 #include "cache.h"
2 #include "commit.h"
3 #include "pack.h"
4 #include "fetch.h"
5 #include "http.h"
6
7 #ifndef NO_EXPAT
8 #include <expat.h>
9
10 /* Definitions for DAV requests */
11 #define DAV_PROPFIND "PROPFIND"
12 #define DAV_PROPFIND_RESP ".multistatus.response"
13 #define DAV_PROPFIND_NAME ".multistatus.response.href"
14 #define DAV_PROPFIND_COLLECTION ".multistatus.response.propstat.prop.resourcetype.collection"
15 #define PROPFIND_ALL_REQUEST "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n<D:propfind xmlns:D=\"DAV:\">\n<D:allprop/>\n</D:propfind>"
16
17 /* Definitions for processing XML DAV responses */
18 #ifndef XML_STATUS_OK
19 enum XML_Status {
20 XML_STATUS_OK = 1,
21 XML_STATUS_ERROR = 0
22 };
23 #define XML_STATUS_OK 1
24 #define XML_STATUS_ERROR 0
25 #endif
26
27 /* Flags that control remote_ls processing */
28 #define PROCESS_FILES (1u << 0)
29 #define PROCESS_DIRS (1u << 1)
30 #define RECURSIVE (1u << 2)
31
32 /* Flags that remote_ls passes to callback functions */
33 #define IS_DIR (1u << 0)
34 #endif
35
36 #define PREV_BUF_SIZE 4096
37 #define RANGE_HEADER_SIZE 30
38
39 static int commits_on_stdin;
40
41 static int got_alternates = -1;
42 static int corrupt_object_found;
43
44 static struct curl_slist *no_pragma_header;
45
46 struct alt_base
47 {
48 const char *base;
49 int path_len;
50 int got_indices;
51 struct packed_git *packs;
52 struct alt_base *next;
53 };
54
55 static struct alt_base *alt;
56
57 enum object_request_state {
58 WAITING,
59 ABORTED,
60 ACTIVE,
61 COMPLETE,
62 };
63
64 struct object_request
65 {
66 unsigned char sha1[20];
67 struct alt_base *repo;
68 char *url;
69 char filename[PATH_MAX];
70 char tmpfile[PATH_MAX];
71 int local;
72 enum object_request_state state;
73 CURLcode curl_result;
74 char errorstr[CURL_ERROR_SIZE];
75 long http_code;
76 unsigned char real_sha1[20];
77 SHA_CTX c;
78 z_stream stream;
79 int zret;
80 int rename;
81 struct active_request_slot *slot;
82 struct object_request *next;
83 };
84
85 struct alternates_request {
86 const char *base;
87 char *url;
88 struct buffer *buffer;
89 struct active_request_slot *slot;
90 int http_specific;
91 };
92
93 #ifndef NO_EXPAT
94 struct xml_ctx
95 {
96 char *name;
97 int len;
98 char *cdata;
99 void (*userFunc)(struct xml_ctx *ctx, int tag_closed);
100 void *userData;
101 };
102
103 struct remote_ls_ctx
104 {
105 struct alt_base *repo;
106 char *path;
107 void (*userFunc)(struct remote_ls_ctx *ls);
108 void *userData;
109 int flags;
110 char *dentry_name;
111 int dentry_flags;
112 int rc;
113 struct remote_ls_ctx *parent;
114 };
115 #endif
116
117 static struct object_request *object_queue_head;
118
119 static size_t fwrite_sha1_file(void *ptr, size_t eltsize, size_t nmemb,
120 void *data)
121 {
122 unsigned char expn[4096];
123 size_t size = eltsize * nmemb;
124 int posn = 0;
125 struct object_request *obj_req = (struct object_request *)data;
126 do {
127 ssize_t retval = write(obj_req->local,
128 (char *) ptr + posn, size - posn);
129 if (retval < 0)
130 return posn;
131 posn += retval;
132 } while (posn < size);
133
134 obj_req->stream.avail_in = size;
135 obj_req->stream.next_in = ptr;
136 do {
137 obj_req->stream.next_out = expn;
138 obj_req->stream.avail_out = sizeof(expn);
139 obj_req->zret = inflate(&obj_req->stream, Z_SYNC_FLUSH);
140 SHA1_Update(&obj_req->c, expn,
141 sizeof(expn) - obj_req->stream.avail_out);
142 } while (obj_req->stream.avail_in && obj_req->zret == Z_OK);
143 data_received++;
144 return size;
145 }
146
147 static int missing__target(int code, int result)
148 {
149 return /* file:// URL -- do we ever use one??? */
150 (result == CURLE_FILE_COULDNT_READ_FILE) ||
151 /* http:// and https:// URL */
152 (code == 404 && result == CURLE_HTTP_RETURNED_ERROR)
153 ;
154 }
155
156 #define missing_target(a) missing__target((a)->http_code, (a)->curl_result)
157
158 static void fetch_alternates(const char *base);
159
160 static void process_object_response(void *callback_data);
161
162 static void start_object_request(struct object_request *obj_req)
163 {
164 char *hex = sha1_to_hex(obj_req->sha1);
165 char prevfile[PATH_MAX];
166 char *url;
167 char *posn;
168 int prevlocal;
169 unsigned char prev_buf[PREV_BUF_SIZE];
170 ssize_t prev_read = 0;
171 long prev_posn = 0;
172 char range[RANGE_HEADER_SIZE];
173 struct curl_slist *range_header = NULL;
174 struct active_request_slot *slot;
175
176 snprintf(prevfile, sizeof(prevfile), "%s.prev", obj_req->filename);
177 unlink(prevfile);
178 rename(obj_req->tmpfile, prevfile);
179 unlink(obj_req->tmpfile);
180
181 if (obj_req->local != -1)
182 error("fd leakage in start: %d", obj_req->local);
183 obj_req->local = open(obj_req->tmpfile,
184 O_WRONLY | O_CREAT | O_EXCL, 0666);
185 /* This could have failed due to the "lazy directory creation";
186 * try to mkdir the last path component.
187 */
188 if (obj_req->local < 0 && errno == ENOENT) {
189 char *dir = strrchr(obj_req->tmpfile, '/');
190 if (dir) {
191 *dir = 0;
192 mkdir(obj_req->tmpfile, 0777);
193 *dir = '/';
194 }
195 obj_req->local = open(obj_req->tmpfile,
196 O_WRONLY | O_CREAT | O_EXCL, 0666);
197 }
198
199 if (obj_req->local < 0) {
200 obj_req->state = ABORTED;
201 error("Couldn't create temporary file %s for %s: %s",
202 obj_req->tmpfile, obj_req->filename, strerror(errno));
203 return;
204 }
205
206 memset(&obj_req->stream, 0, sizeof(obj_req->stream));
207
208 inflateInit(&obj_req->stream);
209
210 SHA1_Init(&obj_req->c);
211
212 url = xmalloc(strlen(obj_req->repo->base) + 50);
213 obj_req->url = xmalloc(strlen(obj_req->repo->base) + 50);
214 strcpy(url, obj_req->repo->base);
215 posn = url + strlen(obj_req->repo->base);
216 strcpy(posn, "objects/");
217 posn += 8;
218 memcpy(posn, hex, 2);
219 posn += 2;
220 *(posn++) = '/';
221 strcpy(posn, hex + 2);
222 strcpy(obj_req->url, url);
223
224 /* If a previous temp file is present, process what was already
225 fetched. */
226 prevlocal = open(prevfile, O_RDONLY);
227 if (prevlocal != -1) {
228 do {
229 prev_read = read(prevlocal, prev_buf, PREV_BUF_SIZE);
230 if (prev_read>0) {
231 if (fwrite_sha1_file(prev_buf,
232 1,
233 prev_read,
234 obj_req) == prev_read) {
235 prev_posn += prev_read;
236 } else {
237 prev_read = -1;
238 }
239 }
240 } while (prev_read > 0);
241 close(prevlocal);
242 }
243 unlink(prevfile);
244
245 /* Reset inflate/SHA1 if there was an error reading the previous temp
246 file; also rewind to the beginning of the local file. */
247 if (prev_read == -1) {
248 memset(&obj_req->stream, 0, sizeof(obj_req->stream));
249 inflateInit(&obj_req->stream);
250 SHA1_Init(&obj_req->c);
251 if (prev_posn>0) {
252 prev_posn = 0;
253 lseek(obj_req->local, SEEK_SET, 0);
254 ftruncate(obj_req->local, 0);
255 }
256 }
257
258 slot = get_active_slot();
259 slot->callback_func = process_object_response;
260 slot->callback_data = obj_req;
261 obj_req->slot = slot;
262
263 curl_easy_setopt(slot->curl, CURLOPT_FILE, obj_req);
264 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
265 curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, obj_req->errorstr);
266 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
267 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
268
269 /* If we have successfully processed data from a previous fetch
270 attempt, only fetch the data we don't already have. */
271 if (prev_posn>0) {
272 if (get_verbosely)
273 fprintf(stderr,
274 "Resuming fetch of object %s at byte %ld\n",
275 hex, prev_posn);
276 sprintf(range, "Range: bytes=%ld-", prev_posn);
277 range_header = curl_slist_append(range_header, range);
278 curl_easy_setopt(slot->curl,
279 CURLOPT_HTTPHEADER, range_header);
280 }
281
282 /* Try to get the request started, abort the request on error */
283 obj_req->state = ACTIVE;
284 if (!start_active_slot(slot)) {
285 obj_req->state = ABORTED;
286 obj_req->slot = NULL;
287 close(obj_req->local); obj_req->local = -1;
288 free(obj_req->url);
289 return;
290 }
291 }
292
293 static void finish_object_request(struct object_request *obj_req)
294 {
295 struct stat st;
296
297 fchmod(obj_req->local, 0444);
298 close(obj_req->local); obj_req->local = -1;
299
300 if (obj_req->http_code == 416) {
301 fprintf(stderr, "Warning: requested range invalid; we may already have all the data.\n");
302 } else if (obj_req->curl_result != CURLE_OK) {
303 if (stat(obj_req->tmpfile, &st) == 0)
304 if (st.st_size == 0)
305 unlink(obj_req->tmpfile);
306 return;
307 }
308
309 inflateEnd(&obj_req->stream);
310 SHA1_Final(obj_req->real_sha1, &obj_req->c);
311 if (obj_req->zret != Z_STREAM_END) {
312 unlink(obj_req->tmpfile);
313 return;
314 }
315 if (hashcmp(obj_req->sha1, obj_req->real_sha1)) {
316 unlink(obj_req->tmpfile);
317 return;
318 }
319 obj_req->rename =
320 move_temp_to_file(obj_req->tmpfile, obj_req->filename);
321
322 if (obj_req->rename == 0)
323 pull_say("got %s\n", sha1_to_hex(obj_req->sha1));
324 }
325
326 static void process_object_response(void *callback_data)
327 {
328 struct object_request *obj_req =
329 (struct object_request *)callback_data;
330
331 obj_req->curl_result = obj_req->slot->curl_result;
332 obj_req->http_code = obj_req->slot->http_code;
333 obj_req->slot = NULL;
334 obj_req->state = COMPLETE;
335
336 /* Use alternates if necessary */
337 if (missing_target(obj_req)) {
338 fetch_alternates(alt->base);
339 if (obj_req->repo->next != NULL) {
340 obj_req->repo =
341 obj_req->repo->next;
342 close(obj_req->local);
343 obj_req->local = -1;
344 start_object_request(obj_req);
345 return;
346 }
347 }
348
349 finish_object_request(obj_req);
350 }
351
352 static void release_object_request(struct object_request *obj_req)
353 {
354 struct object_request *entry = object_queue_head;
355
356 if (obj_req->local != -1)
357 error("fd leakage in release: %d", obj_req->local);
358 if (obj_req == object_queue_head) {
359 object_queue_head = obj_req->next;
360 } else {
361 while (entry->next != NULL && entry->next != obj_req)
362 entry = entry->next;
363 if (entry->next == obj_req)
364 entry->next = entry->next->next;
365 }
366
367 free(obj_req->url);
368 free(obj_req);
369 }
370
371 #ifdef USE_CURL_MULTI
372 void fill_active_slots(void)
373 {
374 struct object_request *obj_req = object_queue_head;
375 struct active_request_slot *slot = active_queue_head;
376 int num_transfers;
377
378 while (active_requests < max_requests && obj_req != NULL) {
379 if (obj_req->state == WAITING) {
380 if (has_sha1_file(obj_req->sha1))
381 obj_req->state = COMPLETE;
382 else
383 start_object_request(obj_req);
384 curl_multi_perform(curlm, &num_transfers);
385 }
386 obj_req = obj_req->next;
387 }
388
389 while (slot != NULL) {
390 if (!slot->in_use && slot->curl != NULL) {
391 curl_easy_cleanup(slot->curl);
392 slot->curl = NULL;
393 }
394 slot = slot->next;
395 }
396 }
397 #endif
398
399 void prefetch(unsigned char *sha1)
400 {
401 struct object_request *newreq;
402 struct object_request *tail;
403 char *filename = sha1_file_name(sha1);
404
405 newreq = xmalloc(sizeof(*newreq));
406 hashcpy(newreq->sha1, sha1);
407 newreq->repo = alt;
408 newreq->url = NULL;
409 newreq->local = -1;
410 newreq->state = WAITING;
411 snprintf(newreq->filename, sizeof(newreq->filename), "%s", filename);
412 snprintf(newreq->tmpfile, sizeof(newreq->tmpfile),
413 "%s.temp", filename);
414 newreq->slot = NULL;
415 newreq->next = NULL;
416
417 if (object_queue_head == NULL) {
418 object_queue_head = newreq;
419 } else {
420 tail = object_queue_head;
421 while (tail->next != NULL) {
422 tail = tail->next;
423 }
424 tail->next = newreq;
425 }
426
427 #ifdef USE_CURL_MULTI
428 fill_active_slots();
429 step_active_slots();
430 #endif
431 }
432
433 static int fetch_index(struct alt_base *repo, unsigned char *sha1)
434 {
435 char *hex = sha1_to_hex(sha1);
436 char *filename;
437 char *url;
438 char tmpfile[PATH_MAX];
439 long prev_posn = 0;
440 char range[RANGE_HEADER_SIZE];
441 struct curl_slist *range_header = NULL;
442
443 FILE *indexfile;
444 struct active_request_slot *slot;
445 struct slot_results results;
446
447 if (has_pack_index(sha1))
448 return 0;
449
450 if (get_verbosely)
451 fprintf(stderr, "Getting index for pack %s\n", hex);
452
453 url = xmalloc(strlen(repo->base) + 64);
454 sprintf(url, "%s/objects/pack/pack-%s.idx", repo->base, hex);
455
456 filename = sha1_pack_index_name(sha1);
457 snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
458 indexfile = fopen(tmpfile, "a");
459 if (!indexfile)
460 return error("Unable to open local file %s for pack index",
461 filename);
462
463 slot = get_active_slot();
464 slot->results = &results;
465 curl_easy_setopt(slot->curl, CURLOPT_FILE, indexfile);
466 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
467 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
468 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
469 slot->local = indexfile;
470
471 /* If there is data present from a previous transfer attempt,
472 resume where it left off */
473 prev_posn = ftell(indexfile);
474 if (prev_posn>0) {
475 if (get_verbosely)
476 fprintf(stderr,
477 "Resuming fetch of index for pack %s at byte %ld\n",
478 hex, prev_posn);
479 sprintf(range, "Range: bytes=%ld-", prev_posn);
480 range_header = curl_slist_append(range_header, range);
481 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
482 }
483
484 if (start_active_slot(slot)) {
485 run_active_slot(slot);
486 if (results.curl_result != CURLE_OK) {
487 fclose(indexfile);
488 return error("Unable to get pack index %s\n%s", url,
489 curl_errorstr);
490 }
491 } else {
492 fclose(indexfile);
493 return error("Unable to start request");
494 }
495
496 fclose(indexfile);
497
498 return move_temp_to_file(tmpfile, filename);
499 }
500
501 static int setup_index(struct alt_base *repo, unsigned char *sha1)
502 {
503 struct packed_git *new_pack;
504 if (has_pack_file(sha1))
505 return 0; /* don't list this as something we can get */
506
507 if (fetch_index(repo, sha1))
508 return -1;
509
510 new_pack = parse_pack_index(sha1);
511 new_pack->next = repo->packs;
512 repo->packs = new_pack;
513 return 0;
514 }
515
516 static void process_alternates_response(void *callback_data)
517 {
518 struct alternates_request *alt_req =
519 (struct alternates_request *)callback_data;
520 struct active_request_slot *slot = alt_req->slot;
521 struct alt_base *tail = alt;
522 const char *base = alt_req->base;
523 static const char null_byte = '\0';
524 char *data;
525 int i = 0;
526
527 if (alt_req->http_specific) {
528 if (slot->curl_result != CURLE_OK ||
529 !alt_req->buffer->posn) {
530
531 /* Try reusing the slot to get non-http alternates */
532 alt_req->http_specific = 0;
533 sprintf(alt_req->url, "%s/objects/info/alternates",
534 base);
535 curl_easy_setopt(slot->curl, CURLOPT_URL,
536 alt_req->url);
537 active_requests++;
538 slot->in_use = 1;
539 if (slot->finished != NULL)
540 (*slot->finished) = 0;
541 if (!start_active_slot(slot)) {
542 got_alternates = -1;
543 slot->in_use = 0;
544 if (slot->finished != NULL)
545 (*slot->finished) = 1;
546 }
547 return;
548 }
549 } else if (slot->curl_result != CURLE_OK) {
550 if (!missing_target(slot)) {
551 got_alternates = -1;
552 return;
553 }
554 }
555
556 fwrite_buffer(&null_byte, 1, 1, alt_req->buffer);
557 alt_req->buffer->posn--;
558 data = alt_req->buffer->buffer;
559
560 while (i < alt_req->buffer->posn) {
561 int posn = i;
562 while (posn < alt_req->buffer->posn && data[posn] != '\n')
563 posn++;
564 if (data[posn] == '\n') {
565 int okay = 0;
566 int serverlen = 0;
567 struct alt_base *newalt;
568 char *target = NULL;
569 char *path;
570 if (data[i] == '/') {
571 /* This counts
572 * http://git.host/pub/scm/linux.git/
573 * -----------here^
574 * so memcpy(dst, base, serverlen) will
575 * copy up to "...git.host".
576 */
577 const char *colon_ss = strstr(base,"://");
578 if (colon_ss) {
579 serverlen = (strchr(colon_ss + 3, '/')
580 - base);
581 okay = 1;
582 }
583 } else if (!memcmp(data + i, "../", 3)) {
584 /* Relative URL; chop the corresponding
585 * number of subpath from base (and ../
586 * from data), and concatenate the result.
587 *
588 * The code first drops ../ from data, and
589 * then drops one ../ from data and one path
590 * from base. IOW, one extra ../ is dropped
591 * from data than path is dropped from base.
592 *
593 * This is not wrong. The alternate in
594 * http://git.host/pub/scm/linux.git/
595 * to borrow from
596 * http://git.host/pub/scm/linus.git/
597 * is ../../linus.git/objects/. You need
598 * two ../../ to borrow from your direct
599 * neighbour.
600 */
601 i += 3;
602 serverlen = strlen(base);
603 while (i + 2 < posn &&
604 !memcmp(data + i, "../", 3)) {
605 do {
606 serverlen--;
607 } while (serverlen &&
608 base[serverlen - 1] != '/');
609 i += 3;
610 }
611 /* If the server got removed, give up. */
612 okay = strchr(base, ':') - base + 3 <
613 serverlen;
614 } else if (alt_req->http_specific) {
615 char *colon = strchr(data + i, ':');
616 char *slash = strchr(data + i, '/');
617 if (colon && slash && colon < data + posn &&
618 slash < data + posn && colon < slash) {
619 okay = 1;
620 }
621 }
622 /* skip "objects\n" at end */
623 if (okay) {
624 target = xmalloc(serverlen + posn - i - 6);
625 memcpy(target, base, serverlen);
626 memcpy(target + serverlen, data + i,
627 posn - i - 7);
628 target[serverlen + posn - i - 7] = 0;
629 if (get_verbosely)
630 fprintf(stderr,
631 "Also look at %s\n", target);
632 newalt = xmalloc(sizeof(*newalt));
633 newalt->next = NULL;
634 newalt->base = target;
635 newalt->got_indices = 0;
636 newalt->packs = NULL;
637 path = strstr(target, "//");
638 if (path) {
639 path = strchr(path+2, '/');
640 if (path)
641 newalt->path_len = strlen(path);
642 }
643
644 while (tail->next != NULL)
645 tail = tail->next;
646 tail->next = newalt;
647 }
648 }
649 i = posn + 1;
650 }
651
652 got_alternates = 1;
653 }
654
655 static void fetch_alternates(const char *base)
656 {
657 struct buffer buffer;
658 char *url;
659 char *data;
660 struct active_request_slot *slot;
661 struct alternates_request alt_req;
662
663 /* If another request has already started fetching alternates,
664 wait for them to arrive and return to processing this request's
665 curl message */
666 #ifdef USE_CURL_MULTI
667 while (got_alternates == 0) {
668 step_active_slots();
669 }
670 #endif
671
672 /* Nothing to do if they've already been fetched */
673 if (got_alternates == 1)
674 return;
675
676 /* Start the fetch */
677 got_alternates = 0;
678
679 data = xmalloc(4096);
680 buffer.size = 4096;
681 buffer.posn = 0;
682 buffer.buffer = data;
683
684 if (get_verbosely)
685 fprintf(stderr, "Getting alternates list for %s\n", base);
686
687 url = xmalloc(strlen(base) + 31);
688 sprintf(url, "%s/objects/info/http-alternates", base);
689
690 /* Use a callback to process the result, since another request
691 may fail and need to have alternates loaded before continuing */
692 slot = get_active_slot();
693 slot->callback_func = process_alternates_response;
694 slot->callback_data = &alt_req;
695
696 curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
697 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
698 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
699
700 alt_req.base = base;
701 alt_req.url = url;
702 alt_req.buffer = &buffer;
703 alt_req.http_specific = 1;
704 alt_req.slot = slot;
705
706 if (start_active_slot(slot))
707 run_active_slot(slot);
708 else
709 got_alternates = -1;
710
711 free(data);
712 free(url);
713 }
714
715 #ifndef NO_EXPAT
716 static void
717 xml_start_tag(void *userData, const char *name, const char **atts)
718 {
719 struct xml_ctx *ctx = (struct xml_ctx *)userData;
720 const char *c = strchr(name, ':');
721 int new_len;
722
723 if (c == NULL)
724 c = name;
725 else
726 c++;
727
728 new_len = strlen(ctx->name) + strlen(c) + 2;
729
730 if (new_len > ctx->len) {
731 ctx->name = xrealloc(ctx->name, new_len);
732 ctx->len = new_len;
733 }
734 strcat(ctx->name, ".");
735 strcat(ctx->name, c);
736
737 free(ctx->cdata);
738 ctx->cdata = NULL;
739
740 ctx->userFunc(ctx, 0);
741 }
742
743 static void
744 xml_end_tag(void *userData, const char *name)
745 {
746 struct xml_ctx *ctx = (struct xml_ctx *)userData;
747 const char *c = strchr(name, ':');
748 char *ep;
749
750 ctx->userFunc(ctx, 1);
751
752 if (c == NULL)
753 c = name;
754 else
755 c++;
756
757 ep = ctx->name + strlen(ctx->name) - strlen(c) - 1;
758 *ep = 0;
759 }
760
761 static void
762 xml_cdata(void *userData, const XML_Char *s, int len)
763 {
764 struct xml_ctx *ctx = (struct xml_ctx *)userData;
765 free(ctx->cdata);
766 ctx->cdata = xmalloc(len + 1);
767 strlcpy(ctx->cdata, s, len + 1);
768 }
769
770 static int remote_ls(struct alt_base *repo, const char *path, int flags,
771 void (*userFunc)(struct remote_ls_ctx *ls),
772 void *userData);
773
774 static void handle_remote_ls_ctx(struct xml_ctx *ctx, int tag_closed)
775 {
776 struct remote_ls_ctx *ls = (struct remote_ls_ctx *)ctx->userData;
777
778 if (tag_closed) {
779 if (!strcmp(ctx->name, DAV_PROPFIND_RESP) && ls->dentry_name) {
780 if (ls->dentry_flags & IS_DIR) {
781 if (ls->flags & PROCESS_DIRS) {
782 ls->userFunc(ls);
783 }
784 if (strcmp(ls->dentry_name, ls->path) &&
785 ls->flags & RECURSIVE) {
786 ls->rc = remote_ls(ls->repo,
787 ls->dentry_name,
788 ls->flags,
789 ls->userFunc,
790 ls->userData);
791 }
792 } else if (ls->flags & PROCESS_FILES) {
793 ls->userFunc(ls);
794 }
795 } else if (!strcmp(ctx->name, DAV_PROPFIND_NAME) && ctx->cdata) {
796 ls->dentry_name = xmalloc(strlen(ctx->cdata) -
797 ls->repo->path_len + 1);
798 strcpy(ls->dentry_name, ctx->cdata + ls->repo->path_len);
799 } else if (!strcmp(ctx->name, DAV_PROPFIND_COLLECTION)) {
800 ls->dentry_flags |= IS_DIR;
801 }
802 } else if (!strcmp(ctx->name, DAV_PROPFIND_RESP)) {
803 free(ls->dentry_name);
804 ls->dentry_name = NULL;
805 ls->dentry_flags = 0;
806 }
807 }
808
809 static int remote_ls(struct alt_base *repo, const char *path, int flags,
810 void (*userFunc)(struct remote_ls_ctx *ls),
811 void *userData)
812 {
813 char *url = xmalloc(strlen(repo->base) + strlen(path) + 1);
814 struct active_request_slot *slot;
815 struct slot_results results;
816 struct buffer in_buffer;
817 struct buffer out_buffer;
818 char *in_data;
819 char *out_data;
820 XML_Parser parser = XML_ParserCreate(NULL);
821 enum XML_Status result;
822 struct curl_slist *dav_headers = NULL;
823 struct xml_ctx ctx;
824 struct remote_ls_ctx ls;
825
826 ls.flags = flags;
827 ls.repo = repo;
828 ls.path = xstrdup(path);
829 ls.dentry_name = NULL;
830 ls.dentry_flags = 0;
831 ls.userData = userData;
832 ls.userFunc = userFunc;
833 ls.rc = 0;
834
835 sprintf(url, "%s%s", repo->base, path);
836
837 out_buffer.size = strlen(PROPFIND_ALL_REQUEST);
838 out_data = xmalloc(out_buffer.size + 1);
839 snprintf(out_data, out_buffer.size + 1, PROPFIND_ALL_REQUEST);
840 out_buffer.posn = 0;
841 out_buffer.buffer = out_data;
842
843 in_buffer.size = 4096;
844 in_data = xmalloc(in_buffer.size);
845 in_buffer.posn = 0;
846 in_buffer.buffer = in_data;
847
848 dav_headers = curl_slist_append(dav_headers, "Depth: 1");
849 dav_headers = curl_slist_append(dav_headers, "Content-Type: text/xml");
850
851 slot = get_active_slot();
852 slot->results = &results;
853 curl_easy_setopt(slot->curl, CURLOPT_INFILE, &out_buffer);
854 curl_easy_setopt(slot->curl, CURLOPT_INFILESIZE, out_buffer.size);
855 curl_easy_setopt(slot->curl, CURLOPT_READFUNCTION, fread_buffer);
856 curl_easy_setopt(slot->curl, CURLOPT_FILE, &in_buffer);
857 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
858 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
859 curl_easy_setopt(slot->curl, CURLOPT_UPLOAD, 1);
860 curl_easy_setopt(slot->curl, CURLOPT_CUSTOMREQUEST, DAV_PROPFIND);
861 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, dav_headers);
862
863 if (start_active_slot(slot)) {
864 run_active_slot(slot);
865 if (results.curl_result == CURLE_OK) {
866 ctx.name = xcalloc(10, 1);
867 ctx.len = 0;
868 ctx.cdata = NULL;
869 ctx.userFunc = handle_remote_ls_ctx;
870 ctx.userData = &ls;
871 XML_SetUserData(parser, &ctx);
872 XML_SetElementHandler(parser, xml_start_tag,
873 xml_end_tag);
874 XML_SetCharacterDataHandler(parser, xml_cdata);
875 result = XML_Parse(parser, in_buffer.buffer,
876 in_buffer.posn, 1);
877 free(ctx.name);
878
879 if (result != XML_STATUS_OK) {
880 ls.rc = error("XML error: %s",
881 XML_ErrorString(
882 XML_GetErrorCode(parser)));
883 }
884 } else {
885 ls.rc = -1;
886 }
887 } else {
888 ls.rc = error("Unable to start PROPFIND request");
889 }
890
891 free(ls.path);
892 free(url);
893 free(out_data);
894 free(in_buffer.buffer);
895 curl_slist_free_all(dav_headers);
896
897 return ls.rc;
898 }
899
900 static void process_ls_pack(struct remote_ls_ctx *ls)
901 {
902 unsigned char sha1[20];
903
904 if (strlen(ls->dentry_name) == 63 &&
905 !strncmp(ls->dentry_name, "objects/pack/pack-", 18) &&
906 has_extension(ls->dentry_name, ".pack")) {
907 get_sha1_hex(ls->dentry_name + 18, sha1);
908 setup_index(ls->repo, sha1);
909 }
910 }
911 #endif
912
913 static int fetch_indices(struct alt_base *repo)
914 {
915 unsigned char sha1[20];
916 char *url;
917 struct buffer buffer;
918 char *data;
919 int i = 0;
920
921 struct active_request_slot *slot;
922 struct slot_results results;
923
924 if (repo->got_indices)
925 return 0;
926
927 data = xmalloc(4096);
928 buffer.size = 4096;
929 buffer.posn = 0;
930 buffer.buffer = data;
931
932 if (get_verbosely)
933 fprintf(stderr, "Getting pack list for %s\n", repo->base);
934
935 #ifndef NO_EXPAT
936 if (remote_ls(repo, "objects/pack/", PROCESS_FILES,
937 process_ls_pack, NULL) == 0)
938 return 0;
939 #endif
940
941 url = xmalloc(strlen(repo->base) + 21);
942 sprintf(url, "%s/objects/info/packs", repo->base);
943
944 slot = get_active_slot();
945 slot->results = &results;
946 curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
947 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
948 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
949 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
950 if (start_active_slot(slot)) {
951 run_active_slot(slot);
952 if (results.curl_result != CURLE_OK) {
953 if (missing_target(&results)) {
954 repo->got_indices = 1;
955 free(buffer.buffer);
956 return 0;
957 } else {
958 repo->got_indices = 0;
959 free(buffer.buffer);
960 return error("%s", curl_errorstr);
961 }
962 }
963 } else {
964 repo->got_indices = 0;
965 free(buffer.buffer);
966 return error("Unable to start request");
967 }
968
969 data = buffer.buffer;
970 while (i < buffer.posn) {
971 switch (data[i]) {
972 case 'P':
973 i++;
974 if (i + 52 <= buffer.posn &&
975 !strncmp(data + i, " pack-", 6) &&
976 !strncmp(data + i + 46, ".pack\n", 6)) {
977 get_sha1_hex(data + i + 6, sha1);
978 setup_index(repo, sha1);
979 i += 51;
980 break;
981 }
982 default:
983 while (i < buffer.posn && data[i] != '\n')
984 i++;
985 }
986 i++;
987 }
988
989 free(buffer.buffer);
990 repo->got_indices = 1;
991 return 0;
992 }
993
994 static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
995 {
996 char *url;
997 struct packed_git *target;
998 struct packed_git **lst;
999 FILE *packfile;
1000 char *filename;
1001 char tmpfile[PATH_MAX];
1002 int ret;
1003 long prev_posn = 0;
1004 char range[RANGE_HEADER_SIZE];
1005 struct curl_slist *range_header = NULL;
1006
1007 struct active_request_slot *slot;
1008 struct slot_results results;
1009
1010 if (fetch_indices(repo))
1011 return -1;
1012 target = find_sha1_pack(sha1, repo->packs);
1013 if (!target)
1014 return -1;
1015
1016 if (get_verbosely) {
1017 fprintf(stderr, "Getting pack %s\n",
1018 sha1_to_hex(target->sha1));
1019 fprintf(stderr, " which contains %s\n",
1020 sha1_to_hex(sha1));
1021 }
1022
1023 url = xmalloc(strlen(repo->base) + 65);
1024 sprintf(url, "%s/objects/pack/pack-%s.pack",
1025 repo->base, sha1_to_hex(target->sha1));
1026
1027 filename = sha1_pack_name(target->sha1);
1028 snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
1029 packfile = fopen(tmpfile, "a");
1030 if (!packfile)
1031 return error("Unable to open local file %s for pack",
1032 filename);
1033
1034 slot = get_active_slot();
1035 slot->results = &results;
1036 curl_easy_setopt(slot->curl, CURLOPT_FILE, packfile);
1037 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
1038 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1039 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
1040 slot->local = packfile;
1041
1042 /* If there is data present from a previous transfer attempt,
1043 resume where it left off */
1044 prev_posn = ftell(packfile);
1045 if (prev_posn>0) {
1046 if (get_verbosely)
1047 fprintf(stderr,
1048 "Resuming fetch of pack %s at byte %ld\n",
1049 sha1_to_hex(target->sha1), prev_posn);
1050 sprintf(range, "Range: bytes=%ld-", prev_posn);
1051 range_header = curl_slist_append(range_header, range);
1052 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
1053 }
1054
1055 if (start_active_slot(slot)) {
1056 run_active_slot(slot);
1057 if (results.curl_result != CURLE_OK) {
1058 fclose(packfile);
1059 return error("Unable to get pack file %s\n%s", url,
1060 curl_errorstr);
1061 }
1062 } else {
1063 fclose(packfile);
1064 return error("Unable to start request");
1065 }
1066
1067 fclose(packfile);
1068
1069 ret = move_temp_to_file(tmpfile, filename);
1070 if (ret)
1071 return ret;
1072
1073 lst = &repo->packs;
1074 while (*lst != target)
1075 lst = &((*lst)->next);
1076 *lst = (*lst)->next;
1077
1078 if (verify_pack(target, 0))
1079 return -1;
1080 install_packed_git(target);
1081
1082 return 0;
1083 }
1084
1085 static void abort_object_request(struct object_request *obj_req)
1086 {
1087 if (obj_req->local >= 0) {
1088 close(obj_req->local);
1089 obj_req->local = -1;
1090 }
1091 unlink(obj_req->tmpfile);
1092 if (obj_req->slot) {
1093 release_active_slot(obj_req->slot);
1094 obj_req->slot = NULL;
1095 }
1096 release_object_request(obj_req);
1097 }
1098
1099 static int fetch_object(struct alt_base *repo, unsigned char *sha1)
1100 {
1101 char *hex = sha1_to_hex(sha1);
1102 int ret = 0;
1103 struct object_request *obj_req = object_queue_head;
1104
1105 while (obj_req != NULL && hashcmp(obj_req->sha1, sha1))
1106 obj_req = obj_req->next;
1107 if (obj_req == NULL)
1108 return error("Couldn't find request for %s in the queue", hex);
1109
1110 if (has_sha1_file(obj_req->sha1)) {
1111 abort_object_request(obj_req);
1112 return 0;
1113 }
1114
1115 #ifdef USE_CURL_MULTI
1116 while (obj_req->state == WAITING) {
1117 step_active_slots();
1118 }
1119 #else
1120 start_object_request(obj_req);
1121 #endif
1122
1123 while (obj_req->state == ACTIVE) {
1124 run_active_slot(obj_req->slot);
1125 }
1126 if (obj_req->local != -1) {
1127 close(obj_req->local); obj_req->local = -1;
1128 }
1129
1130 if (obj_req->state == ABORTED) {
1131 ret = error("Request for %s aborted", hex);
1132 } else if (obj_req->curl_result != CURLE_OK &&
1133 obj_req->http_code != 416) {
1134 if (missing_target(obj_req))
1135 ret = -1; /* Be silent, it is probably in a pack. */
1136 else
1137 ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
1138 obj_req->errorstr, obj_req->curl_result,
1139 obj_req->http_code, hex);
1140 } else if (obj_req->zret != Z_STREAM_END) {
1141 corrupt_object_found++;
1142 ret = error("File %s (%s) corrupt", hex, obj_req->url);
1143 } else if (hashcmp(obj_req->sha1, obj_req->real_sha1)) {
1144 ret = error("File %s has bad hash", hex);
1145 } else if (obj_req->rename < 0) {
1146 ret = error("unable to write sha1 filename %s",
1147 obj_req->filename);
1148 }
1149
1150 release_object_request(obj_req);
1151 return ret;
1152 }
1153
1154 int fetch(unsigned char *sha1)
1155 {
1156 struct alt_base *altbase = alt;
1157
1158 if (!fetch_object(altbase, sha1))
1159 return 0;
1160 while (altbase) {
1161 if (!fetch_pack(altbase, sha1))
1162 return 0;
1163 fetch_alternates(alt->base);
1164 altbase = altbase->next;
1165 }
1166 return error("Unable to find %s under %s", sha1_to_hex(sha1),
1167 alt->base);
1168 }
1169
1170 static inline int needs_quote(int ch)
1171 {
1172 if (((ch >= 'A') && (ch <= 'Z'))
1173 || ((ch >= 'a') && (ch <= 'z'))
1174 || ((ch >= '0') && (ch <= '9'))
1175 || (ch == '/')
1176 || (ch == '-')
1177 || (ch == '.'))
1178 return 0;
1179 return 1;
1180 }
1181
1182 static inline int hex(int v)
1183 {
1184 if (v < 10) return '0' + v;
1185 else return 'A' + v - 10;
1186 }
1187
1188 static char *quote_ref_url(const char *base, const char *ref)
1189 {
1190 const char *cp;
1191 char *dp, *qref;
1192 int len, baselen, ch;
1193
1194 baselen = strlen(base);
1195 len = baselen + 6; /* "refs/" + NUL */
1196 for (cp = ref; (ch = *cp) != 0; cp++, len++)
1197 if (needs_quote(ch))
1198 len += 2; /* extra two hex plus replacement % */
1199 qref = xmalloc(len);
1200 memcpy(qref, base, baselen);
1201 memcpy(qref + baselen, "refs/", 5);
1202 for (cp = ref, dp = qref + baselen + 5; (ch = *cp) != 0; cp++) {
1203 if (needs_quote(ch)) {
1204 *dp++ = '%';
1205 *dp++ = hex((ch >> 4) & 0xF);
1206 *dp++ = hex(ch & 0xF);
1207 }
1208 else
1209 *dp++ = ch;
1210 }
1211 *dp = 0;
1212
1213 return qref;
1214 }
1215
1216 int fetch_ref(char *ref, unsigned char *sha1)
1217 {
1218 char *url;
1219 char hex[42];
1220 struct buffer buffer;
1221 const char *base = alt->base;
1222 struct active_request_slot *slot;
1223 struct slot_results results;
1224 buffer.size = 41;
1225 buffer.posn = 0;
1226 buffer.buffer = hex;
1227 hex[41] = '\0';
1228
1229 url = quote_ref_url(base, ref);
1230 slot = get_active_slot();
1231 slot->results = &results;
1232 curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
1233 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
1234 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
1235 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1236 if (start_active_slot(slot)) {
1237 run_active_slot(slot);
1238 if (results.curl_result != CURLE_OK)
1239 return error("Couldn't get %s for %s\n%s",
1240 url, ref, curl_errorstr);
1241 } else {
1242 return error("Unable to start request");
1243 }
1244
1245 hex[40] = '\0';
1246 get_sha1_hex(hex, sha1);
1247 return 0;
1248 }
1249
1250 int main(int argc, const char **argv)
1251 {
1252 int commits;
1253 const char **write_ref = NULL;
1254 char **commit_id;
1255 const char *url;
1256 char *path;
1257 int arg = 1;
1258 int rc = 0;
1259
1260 setup_ident();
1261 setup_git_directory();
1262 git_config(git_default_config);
1263
1264 while (arg < argc && argv[arg][0] == '-') {
1265 if (argv[arg][1] == 't') {
1266 get_tree = 1;
1267 } else if (argv[arg][1] == 'c') {
1268 get_history = 1;
1269 } else if (argv[arg][1] == 'a') {
1270 get_all = 1;
1271 get_tree = 1;
1272 get_history = 1;
1273 } else if (argv[arg][1] == 'v') {
1274 get_verbosely = 1;
1275 } else if (argv[arg][1] == 'w') {
1276 write_ref = &argv[arg + 1];
1277 arg++;
1278 } else if (!strcmp(argv[arg], "--recover")) {
1279 get_recover = 1;
1280 } else if (!strcmp(argv[arg], "--stdin")) {
1281 commits_on_stdin = 1;
1282 }
1283 arg++;
1284 }
1285 if (argc < arg + 2 - commits_on_stdin) {
1286 usage("git-http-fetch [-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin] commit-id url");
1287 return 1;
1288 }
1289 if (commits_on_stdin) {
1290 commits = pull_targets_stdin(&commit_id, &write_ref);
1291 } else {
1292 commit_id = (char **) &argv[arg++];
1293 commits = 1;
1294 }
1295 url = argv[arg];
1296
1297 http_init();
1298
1299 no_pragma_header = curl_slist_append(no_pragma_header, "Pragma:");
1300
1301 alt = xmalloc(sizeof(*alt));
1302 alt->base = url;
1303 alt->got_indices = 0;
1304 alt->packs = NULL;
1305 alt->next = NULL;
1306 path = strstr(url, "//");
1307 if (path) {
1308 path = strchr(path+2, '/');
1309 if (path)
1310 alt->path_len = strlen(path);
1311 }
1312
1313 if (pull(commits, commit_id, write_ref, url))
1314 rc = 1;
1315
1316 http_cleanup();
1317
1318 curl_slist_free_all(no_pragma_header);
1319
1320 if (commits_on_stdin)
1321 pull_targets_free(commits, commit_id, write_ref);
1322
1323 if (corrupt_object_found) {
1324 fprintf(stderr,
1325 "Some loose object were found to be corrupt, but they might be just\n"
1326 "a false '404 Not Found' error message sent with incorrect HTTP\n"
1327 "status code. Suggest running git fsck-objects.\n");
1328 }
1329 return rc;
1330 }