]> git.ipfire.org Git - thirdparty/git.git/blob - http-fetch.c
git-svnimport: Parse log message for Signed-off-by: lines
[thirdparty/git.git] / http-fetch.c
1 #include "cache.h"
2 #include "commit.h"
3 #include "pack.h"
4 #include "fetch.h"
5 #include "http.h"
6
7 #ifndef NO_EXPAT
8 #include <expat.h>
9
10 /* Definitions for DAV requests */
11 #define DAV_PROPFIND "PROPFIND"
12 #define DAV_PROPFIND_RESP ".multistatus.response"
13 #define DAV_PROPFIND_NAME ".multistatus.response.href"
14 #define DAV_PROPFIND_COLLECTION ".multistatus.response.propstat.prop.resourcetype.collection"
15 #define PROPFIND_ALL_REQUEST "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n<D:propfind xmlns:D=\"DAV:\">\n<D:allprop/>\n</D:propfind>"
16
17 /* Definitions for processing XML DAV responses */
18 #ifndef XML_STATUS_OK
19 enum XML_Status {
20 XML_STATUS_OK = 1,
21 XML_STATUS_ERROR = 0
22 };
23 #define XML_STATUS_OK 1
24 #define XML_STATUS_ERROR 0
25 #endif
26
27 /* Flags that control remote_ls processing */
28 #define PROCESS_FILES (1u << 0)
29 #define PROCESS_DIRS (1u << 1)
30 #define RECURSIVE (1u << 2)
31
32 /* Flags that remote_ls passes to callback functions */
33 #define IS_DIR (1u << 0)
34 #endif
35
36 #define PREV_BUF_SIZE 4096
37 #define RANGE_HEADER_SIZE 30
38
39 static int commits_on_stdin;
40
41 static int got_alternates = -1;
42 static int corrupt_object_found;
43
44 static struct curl_slist *no_pragma_header;
45
46 struct alt_base
47 {
48 const char *base;
49 int path_len;
50 int got_indices;
51 struct packed_git *packs;
52 struct alt_base *next;
53 };
54
55 static struct alt_base *alt;
56
57 enum object_request_state {
58 WAITING,
59 ABORTED,
60 ACTIVE,
61 COMPLETE,
62 };
63
64 struct object_request
65 {
66 unsigned char sha1[20];
67 struct alt_base *repo;
68 char *url;
69 char filename[PATH_MAX];
70 char tmpfile[PATH_MAX];
71 int local;
72 enum object_request_state state;
73 CURLcode curl_result;
74 char errorstr[CURL_ERROR_SIZE];
75 long http_code;
76 unsigned char real_sha1[20];
77 SHA_CTX c;
78 z_stream stream;
79 int zret;
80 int rename;
81 struct active_request_slot *slot;
82 struct object_request *next;
83 };
84
85 struct alternates_request {
86 const char *base;
87 char *url;
88 struct buffer *buffer;
89 struct active_request_slot *slot;
90 int http_specific;
91 };
92
93 #ifndef NO_EXPAT
94 struct xml_ctx
95 {
96 char *name;
97 int len;
98 char *cdata;
99 void (*userFunc)(struct xml_ctx *ctx, int tag_closed);
100 void *userData;
101 };
102
103 struct remote_ls_ctx
104 {
105 struct alt_base *repo;
106 char *path;
107 void (*userFunc)(struct remote_ls_ctx *ls);
108 void *userData;
109 int flags;
110 char *dentry_name;
111 int dentry_flags;
112 int rc;
113 struct remote_ls_ctx *parent;
114 };
115 #endif
116
117 static struct object_request *object_queue_head;
118
119 static size_t fwrite_sha1_file(void *ptr, size_t eltsize, size_t nmemb,
120 void *data)
121 {
122 unsigned char expn[4096];
123 size_t size = eltsize * nmemb;
124 int posn = 0;
125 struct object_request *obj_req = (struct object_request *)data;
126 do {
127 ssize_t retval = write(obj_req->local,
128 (char *) ptr + posn, size - posn);
129 if (retval < 0)
130 return posn;
131 posn += retval;
132 } while (posn < size);
133
134 obj_req->stream.avail_in = size;
135 obj_req->stream.next_in = ptr;
136 do {
137 obj_req->stream.next_out = expn;
138 obj_req->stream.avail_out = sizeof(expn);
139 obj_req->zret = inflate(&obj_req->stream, Z_SYNC_FLUSH);
140 SHA1_Update(&obj_req->c, expn,
141 sizeof(expn) - obj_req->stream.avail_out);
142 } while (obj_req->stream.avail_in && obj_req->zret == Z_OK);
143 data_received++;
144 return size;
145 }
146
147 static int missing__target(int code, int result)
148 {
149 return /* file:// URL -- do we ever use one??? */
150 (result == CURLE_FILE_COULDNT_READ_FILE) ||
151 /* http:// and https:// URL */
152 (code == 404 && result == CURLE_HTTP_RETURNED_ERROR) ||
153 /* ftp:// URL */
154 (code == 550 && result == CURLE_FTP_COULDNT_RETR_FILE)
155 ;
156 }
157
158 #define missing_target(a) missing__target((a)->http_code, (a)->curl_result)
159
160 static void fetch_alternates(const char *base);
161
162 static void process_object_response(void *callback_data);
163
164 static void start_object_request(struct object_request *obj_req)
165 {
166 char *hex = sha1_to_hex(obj_req->sha1);
167 char prevfile[PATH_MAX];
168 char *url;
169 char *posn;
170 int prevlocal;
171 unsigned char prev_buf[PREV_BUF_SIZE];
172 ssize_t prev_read = 0;
173 long prev_posn = 0;
174 char range[RANGE_HEADER_SIZE];
175 struct curl_slist *range_header = NULL;
176 struct active_request_slot *slot;
177
178 snprintf(prevfile, sizeof(prevfile), "%s.prev", obj_req->filename);
179 unlink(prevfile);
180 rename(obj_req->tmpfile, prevfile);
181 unlink(obj_req->tmpfile);
182
183 if (obj_req->local != -1)
184 error("fd leakage in start: %d", obj_req->local);
185 obj_req->local = open(obj_req->tmpfile,
186 O_WRONLY | O_CREAT | O_EXCL, 0666);
187 /* This could have failed due to the "lazy directory creation";
188 * try to mkdir the last path component.
189 */
190 if (obj_req->local < 0 && errno == ENOENT) {
191 char *dir = strrchr(obj_req->tmpfile, '/');
192 if (dir) {
193 *dir = 0;
194 mkdir(obj_req->tmpfile, 0777);
195 *dir = '/';
196 }
197 obj_req->local = open(obj_req->tmpfile,
198 O_WRONLY | O_CREAT | O_EXCL, 0666);
199 }
200
201 if (obj_req->local < 0) {
202 obj_req->state = ABORTED;
203 error("Couldn't create temporary file %s for %s: %s",
204 obj_req->tmpfile, obj_req->filename, strerror(errno));
205 return;
206 }
207
208 memset(&obj_req->stream, 0, sizeof(obj_req->stream));
209
210 inflateInit(&obj_req->stream);
211
212 SHA1_Init(&obj_req->c);
213
214 url = xmalloc(strlen(obj_req->repo->base) + 50);
215 obj_req->url = xmalloc(strlen(obj_req->repo->base) + 50);
216 strcpy(url, obj_req->repo->base);
217 posn = url + strlen(obj_req->repo->base);
218 strcpy(posn, "objects/");
219 posn += 8;
220 memcpy(posn, hex, 2);
221 posn += 2;
222 *(posn++) = '/';
223 strcpy(posn, hex + 2);
224 strcpy(obj_req->url, url);
225
226 /* If a previous temp file is present, process what was already
227 fetched. */
228 prevlocal = open(prevfile, O_RDONLY);
229 if (prevlocal != -1) {
230 do {
231 prev_read = read(prevlocal, prev_buf, PREV_BUF_SIZE);
232 if (prev_read>0) {
233 if (fwrite_sha1_file(prev_buf,
234 1,
235 prev_read,
236 obj_req) == prev_read) {
237 prev_posn += prev_read;
238 } else {
239 prev_read = -1;
240 }
241 }
242 } while (prev_read > 0);
243 close(prevlocal);
244 }
245 unlink(prevfile);
246
247 /* Reset inflate/SHA1 if there was an error reading the previous temp
248 file; also rewind to the beginning of the local file. */
249 if (prev_read == -1) {
250 memset(&obj_req->stream, 0, sizeof(obj_req->stream));
251 inflateInit(&obj_req->stream);
252 SHA1_Init(&obj_req->c);
253 if (prev_posn>0) {
254 prev_posn = 0;
255 lseek(obj_req->local, SEEK_SET, 0);
256 ftruncate(obj_req->local, 0);
257 }
258 }
259
260 slot = get_active_slot();
261 slot->callback_func = process_object_response;
262 slot->callback_data = obj_req;
263 obj_req->slot = slot;
264
265 curl_easy_setopt(slot->curl, CURLOPT_FILE, obj_req);
266 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
267 curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, obj_req->errorstr);
268 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
269 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
270
271 /* If we have successfully processed data from a previous fetch
272 attempt, only fetch the data we don't already have. */
273 if (prev_posn>0) {
274 if (get_verbosely)
275 fprintf(stderr,
276 "Resuming fetch of object %s at byte %ld\n",
277 hex, prev_posn);
278 sprintf(range, "Range: bytes=%ld-", prev_posn);
279 range_header = curl_slist_append(range_header, range);
280 curl_easy_setopt(slot->curl,
281 CURLOPT_HTTPHEADER, range_header);
282 }
283
284 /* Try to get the request started, abort the request on error */
285 obj_req->state = ACTIVE;
286 if (!start_active_slot(slot)) {
287 obj_req->state = ABORTED;
288 obj_req->slot = NULL;
289 close(obj_req->local); obj_req->local = -1;
290 free(obj_req->url);
291 return;
292 }
293 }
294
295 static void finish_object_request(struct object_request *obj_req)
296 {
297 struct stat st;
298
299 fchmod(obj_req->local, 0444);
300 close(obj_req->local); obj_req->local = -1;
301
302 if (obj_req->http_code == 416) {
303 fprintf(stderr, "Warning: requested range invalid; we may already have all the data.\n");
304 } else if (obj_req->curl_result != CURLE_OK) {
305 if (stat(obj_req->tmpfile, &st) == 0)
306 if (st.st_size == 0)
307 unlink(obj_req->tmpfile);
308 return;
309 }
310
311 inflateEnd(&obj_req->stream);
312 SHA1_Final(obj_req->real_sha1, &obj_req->c);
313 if (obj_req->zret != Z_STREAM_END) {
314 unlink(obj_req->tmpfile);
315 return;
316 }
317 if (hashcmp(obj_req->sha1, obj_req->real_sha1)) {
318 unlink(obj_req->tmpfile);
319 return;
320 }
321 obj_req->rename =
322 move_temp_to_file(obj_req->tmpfile, obj_req->filename);
323
324 if (obj_req->rename == 0)
325 pull_say("got %s\n", sha1_to_hex(obj_req->sha1));
326 }
327
328 static void process_object_response(void *callback_data)
329 {
330 struct object_request *obj_req =
331 (struct object_request *)callback_data;
332
333 obj_req->curl_result = obj_req->slot->curl_result;
334 obj_req->http_code = obj_req->slot->http_code;
335 obj_req->slot = NULL;
336 obj_req->state = COMPLETE;
337
338 /* Use alternates if necessary */
339 if (missing_target(obj_req)) {
340 fetch_alternates(alt->base);
341 if (obj_req->repo->next != NULL) {
342 obj_req->repo =
343 obj_req->repo->next;
344 close(obj_req->local);
345 obj_req->local = -1;
346 start_object_request(obj_req);
347 return;
348 }
349 }
350
351 finish_object_request(obj_req);
352 }
353
354 static void release_object_request(struct object_request *obj_req)
355 {
356 struct object_request *entry = object_queue_head;
357
358 if (obj_req->local != -1)
359 error("fd leakage in release: %d", obj_req->local);
360 if (obj_req == object_queue_head) {
361 object_queue_head = obj_req->next;
362 } else {
363 while (entry->next != NULL && entry->next != obj_req)
364 entry = entry->next;
365 if (entry->next == obj_req)
366 entry->next = entry->next->next;
367 }
368
369 free(obj_req->url);
370 free(obj_req);
371 }
372
373 #ifdef USE_CURL_MULTI
374 void fill_active_slots(void)
375 {
376 struct object_request *obj_req = object_queue_head;
377 struct active_request_slot *slot = active_queue_head;
378 int num_transfers;
379
380 while (active_requests < max_requests && obj_req != NULL) {
381 if (obj_req->state == WAITING) {
382 if (has_sha1_file(obj_req->sha1))
383 obj_req->state = COMPLETE;
384 else
385 start_object_request(obj_req);
386 curl_multi_perform(curlm, &num_transfers);
387 }
388 obj_req = obj_req->next;
389 }
390
391 while (slot != NULL) {
392 if (!slot->in_use && slot->curl != NULL) {
393 curl_easy_cleanup(slot->curl);
394 slot->curl = NULL;
395 }
396 slot = slot->next;
397 }
398 }
399 #endif
400
401 void prefetch(unsigned char *sha1)
402 {
403 struct object_request *newreq;
404 struct object_request *tail;
405 char *filename = sha1_file_name(sha1);
406
407 newreq = xmalloc(sizeof(*newreq));
408 hashcpy(newreq->sha1, sha1);
409 newreq->repo = alt;
410 newreq->url = NULL;
411 newreq->local = -1;
412 newreq->state = WAITING;
413 snprintf(newreq->filename, sizeof(newreq->filename), "%s", filename);
414 snprintf(newreq->tmpfile, sizeof(newreq->tmpfile),
415 "%s.temp", filename);
416 newreq->slot = NULL;
417 newreq->next = NULL;
418
419 if (object_queue_head == NULL) {
420 object_queue_head = newreq;
421 } else {
422 tail = object_queue_head;
423 while (tail->next != NULL) {
424 tail = tail->next;
425 }
426 tail->next = newreq;
427 }
428
429 #ifdef USE_CURL_MULTI
430 fill_active_slots();
431 step_active_slots();
432 #endif
433 }
434
435 static int fetch_index(struct alt_base *repo, unsigned char *sha1)
436 {
437 char *hex = sha1_to_hex(sha1);
438 char *filename;
439 char *url;
440 char tmpfile[PATH_MAX];
441 long prev_posn = 0;
442 char range[RANGE_HEADER_SIZE];
443 struct curl_slist *range_header = NULL;
444
445 FILE *indexfile;
446 struct active_request_slot *slot;
447 struct slot_results results;
448
449 if (has_pack_index(sha1))
450 return 0;
451
452 if (get_verbosely)
453 fprintf(stderr, "Getting index for pack %s\n", hex);
454
455 url = xmalloc(strlen(repo->base) + 64);
456 sprintf(url, "%s/objects/pack/pack-%s.idx", repo->base, hex);
457
458 filename = sha1_pack_index_name(sha1);
459 snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
460 indexfile = fopen(tmpfile, "a");
461 if (!indexfile)
462 return error("Unable to open local file %s for pack index",
463 filename);
464
465 slot = get_active_slot();
466 slot->results = &results;
467 curl_easy_setopt(slot->curl, CURLOPT_FILE, indexfile);
468 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
469 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
470 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
471 slot->local = indexfile;
472
473 /* If there is data present from a previous transfer attempt,
474 resume where it left off */
475 prev_posn = ftell(indexfile);
476 if (prev_posn>0) {
477 if (get_verbosely)
478 fprintf(stderr,
479 "Resuming fetch of index for pack %s at byte %ld\n",
480 hex, prev_posn);
481 sprintf(range, "Range: bytes=%ld-", prev_posn);
482 range_header = curl_slist_append(range_header, range);
483 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
484 }
485
486 if (start_active_slot(slot)) {
487 run_active_slot(slot);
488 if (results.curl_result != CURLE_OK) {
489 fclose(indexfile);
490 return error("Unable to get pack index %s\n%s", url,
491 curl_errorstr);
492 }
493 } else {
494 fclose(indexfile);
495 return error("Unable to start request");
496 }
497
498 fclose(indexfile);
499
500 return move_temp_to_file(tmpfile, filename);
501 }
502
503 static int setup_index(struct alt_base *repo, unsigned char *sha1)
504 {
505 struct packed_git *new_pack;
506 if (has_pack_file(sha1))
507 return 0; /* don't list this as something we can get */
508
509 if (fetch_index(repo, sha1))
510 return -1;
511
512 new_pack = parse_pack_index(sha1);
513 new_pack->next = repo->packs;
514 repo->packs = new_pack;
515 return 0;
516 }
517
518 static void process_alternates_response(void *callback_data)
519 {
520 struct alternates_request *alt_req =
521 (struct alternates_request *)callback_data;
522 struct active_request_slot *slot = alt_req->slot;
523 struct alt_base *tail = alt;
524 const char *base = alt_req->base;
525 static const char null_byte = '\0';
526 char *data;
527 int i = 0;
528
529 if (alt_req->http_specific) {
530 if (slot->curl_result != CURLE_OK ||
531 !alt_req->buffer->posn) {
532
533 /* Try reusing the slot to get non-http alternates */
534 alt_req->http_specific = 0;
535 sprintf(alt_req->url, "%s/objects/info/alternates",
536 base);
537 curl_easy_setopt(slot->curl, CURLOPT_URL,
538 alt_req->url);
539 active_requests++;
540 slot->in_use = 1;
541 if (slot->finished != NULL)
542 (*slot->finished) = 0;
543 if (!start_active_slot(slot)) {
544 got_alternates = -1;
545 slot->in_use = 0;
546 if (slot->finished != NULL)
547 (*slot->finished) = 1;
548 }
549 return;
550 }
551 } else if (slot->curl_result != CURLE_OK) {
552 if (!missing_target(slot)) {
553 got_alternates = -1;
554 return;
555 }
556 }
557
558 fwrite_buffer(&null_byte, 1, 1, alt_req->buffer);
559 alt_req->buffer->posn--;
560 data = alt_req->buffer->buffer;
561
562 while (i < alt_req->buffer->posn) {
563 int posn = i;
564 while (posn < alt_req->buffer->posn && data[posn] != '\n')
565 posn++;
566 if (data[posn] == '\n') {
567 int okay = 0;
568 int serverlen = 0;
569 struct alt_base *newalt;
570 char *target = NULL;
571 char *path;
572 if (data[i] == '/') {
573 /* This counts
574 * http://git.host/pub/scm/linux.git/
575 * -----------here^
576 * so memcpy(dst, base, serverlen) will
577 * copy up to "...git.host".
578 */
579 const char *colon_ss = strstr(base,"://");
580 if (colon_ss) {
581 serverlen = (strchr(colon_ss + 3, '/')
582 - base);
583 okay = 1;
584 }
585 } else if (!memcmp(data + i, "../", 3)) {
586 /* Relative URL; chop the corresponding
587 * number of subpath from base (and ../
588 * from data), and concatenate the result.
589 *
590 * The code first drops ../ from data, and
591 * then drops one ../ from data and one path
592 * from base. IOW, one extra ../ is dropped
593 * from data than path is dropped from base.
594 *
595 * This is not wrong. The alternate in
596 * http://git.host/pub/scm/linux.git/
597 * to borrow from
598 * http://git.host/pub/scm/linus.git/
599 * is ../../linus.git/objects/. You need
600 * two ../../ to borrow from your direct
601 * neighbour.
602 */
603 i += 3;
604 serverlen = strlen(base);
605 while (i + 2 < posn &&
606 !memcmp(data + i, "../", 3)) {
607 do {
608 serverlen--;
609 } while (serverlen &&
610 base[serverlen - 1] != '/');
611 i += 3;
612 }
613 /* If the server got removed, give up. */
614 okay = strchr(base, ':') - base + 3 <
615 serverlen;
616 } else if (alt_req->http_specific) {
617 char *colon = strchr(data + i, ':');
618 char *slash = strchr(data + i, '/');
619 if (colon && slash && colon < data + posn &&
620 slash < data + posn && colon < slash) {
621 okay = 1;
622 }
623 }
624 /* skip "objects\n" at end */
625 if (okay) {
626 target = xmalloc(serverlen + posn - i - 6);
627 memcpy(target, base, serverlen);
628 memcpy(target + serverlen, data + i,
629 posn - i - 7);
630 target[serverlen + posn - i - 7] = 0;
631 if (get_verbosely)
632 fprintf(stderr,
633 "Also look at %s\n", target);
634 newalt = xmalloc(sizeof(*newalt));
635 newalt->next = NULL;
636 newalt->base = target;
637 newalt->got_indices = 0;
638 newalt->packs = NULL;
639 path = strstr(target, "//");
640 if (path) {
641 path = strchr(path+2, '/');
642 if (path)
643 newalt->path_len = strlen(path);
644 }
645
646 while (tail->next != NULL)
647 tail = tail->next;
648 tail->next = newalt;
649 }
650 }
651 i = posn + 1;
652 }
653
654 got_alternates = 1;
655 }
656
657 static void fetch_alternates(const char *base)
658 {
659 struct buffer buffer;
660 char *url;
661 char *data;
662 struct active_request_slot *slot;
663 struct alternates_request alt_req;
664
665 /* If another request has already started fetching alternates,
666 wait for them to arrive and return to processing this request's
667 curl message */
668 #ifdef USE_CURL_MULTI
669 while (got_alternates == 0) {
670 step_active_slots();
671 }
672 #endif
673
674 /* Nothing to do if they've already been fetched */
675 if (got_alternates == 1)
676 return;
677
678 /* Start the fetch */
679 got_alternates = 0;
680
681 data = xmalloc(4096);
682 buffer.size = 4096;
683 buffer.posn = 0;
684 buffer.buffer = data;
685
686 if (get_verbosely)
687 fprintf(stderr, "Getting alternates list for %s\n", base);
688
689 url = xmalloc(strlen(base) + 31);
690 sprintf(url, "%s/objects/info/http-alternates", base);
691
692 /* Use a callback to process the result, since another request
693 may fail and need to have alternates loaded before continuing */
694 slot = get_active_slot();
695 slot->callback_func = process_alternates_response;
696 slot->callback_data = &alt_req;
697
698 curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
699 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
700 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
701
702 alt_req.base = base;
703 alt_req.url = url;
704 alt_req.buffer = &buffer;
705 alt_req.http_specific = 1;
706 alt_req.slot = slot;
707
708 if (start_active_slot(slot))
709 run_active_slot(slot);
710 else
711 got_alternates = -1;
712
713 free(data);
714 free(url);
715 }
716
717 #ifndef NO_EXPAT
718 static void
719 xml_start_tag(void *userData, const char *name, const char **atts)
720 {
721 struct xml_ctx *ctx = (struct xml_ctx *)userData;
722 const char *c = strchr(name, ':');
723 int new_len;
724
725 if (c == NULL)
726 c = name;
727 else
728 c++;
729
730 new_len = strlen(ctx->name) + strlen(c) + 2;
731
732 if (new_len > ctx->len) {
733 ctx->name = xrealloc(ctx->name, new_len);
734 ctx->len = new_len;
735 }
736 strcat(ctx->name, ".");
737 strcat(ctx->name, c);
738
739 free(ctx->cdata);
740 ctx->cdata = NULL;
741
742 ctx->userFunc(ctx, 0);
743 }
744
745 static void
746 xml_end_tag(void *userData, const char *name)
747 {
748 struct xml_ctx *ctx = (struct xml_ctx *)userData;
749 const char *c = strchr(name, ':');
750 char *ep;
751
752 ctx->userFunc(ctx, 1);
753
754 if (c == NULL)
755 c = name;
756 else
757 c++;
758
759 ep = ctx->name + strlen(ctx->name) - strlen(c) - 1;
760 *ep = 0;
761 }
762
763 static void
764 xml_cdata(void *userData, const XML_Char *s, int len)
765 {
766 struct xml_ctx *ctx = (struct xml_ctx *)userData;
767 free(ctx->cdata);
768 ctx->cdata = xmalloc(len + 1);
769 strlcpy(ctx->cdata, s, len + 1);
770 }
771
772 static int remote_ls(struct alt_base *repo, const char *path, int flags,
773 void (*userFunc)(struct remote_ls_ctx *ls),
774 void *userData);
775
776 static void handle_remote_ls_ctx(struct xml_ctx *ctx, int tag_closed)
777 {
778 struct remote_ls_ctx *ls = (struct remote_ls_ctx *)ctx->userData;
779
780 if (tag_closed) {
781 if (!strcmp(ctx->name, DAV_PROPFIND_RESP) && ls->dentry_name) {
782 if (ls->dentry_flags & IS_DIR) {
783 if (ls->flags & PROCESS_DIRS) {
784 ls->userFunc(ls);
785 }
786 if (strcmp(ls->dentry_name, ls->path) &&
787 ls->flags & RECURSIVE) {
788 ls->rc = remote_ls(ls->repo,
789 ls->dentry_name,
790 ls->flags,
791 ls->userFunc,
792 ls->userData);
793 }
794 } else if (ls->flags & PROCESS_FILES) {
795 ls->userFunc(ls);
796 }
797 } else if (!strcmp(ctx->name, DAV_PROPFIND_NAME) && ctx->cdata) {
798 ls->dentry_name = xmalloc(strlen(ctx->cdata) -
799 ls->repo->path_len + 1);
800 strcpy(ls->dentry_name, ctx->cdata + ls->repo->path_len);
801 } else if (!strcmp(ctx->name, DAV_PROPFIND_COLLECTION)) {
802 ls->dentry_flags |= IS_DIR;
803 }
804 } else if (!strcmp(ctx->name, DAV_PROPFIND_RESP)) {
805 free(ls->dentry_name);
806 ls->dentry_name = NULL;
807 ls->dentry_flags = 0;
808 }
809 }
810
811 static int remote_ls(struct alt_base *repo, const char *path, int flags,
812 void (*userFunc)(struct remote_ls_ctx *ls),
813 void *userData)
814 {
815 char *url = xmalloc(strlen(repo->base) + strlen(path) + 1);
816 struct active_request_slot *slot;
817 struct slot_results results;
818 struct buffer in_buffer;
819 struct buffer out_buffer;
820 char *in_data;
821 char *out_data;
822 XML_Parser parser = XML_ParserCreate(NULL);
823 enum XML_Status result;
824 struct curl_slist *dav_headers = NULL;
825 struct xml_ctx ctx;
826 struct remote_ls_ctx ls;
827
828 ls.flags = flags;
829 ls.repo = repo;
830 ls.path = xstrdup(path);
831 ls.dentry_name = NULL;
832 ls.dentry_flags = 0;
833 ls.userData = userData;
834 ls.userFunc = userFunc;
835 ls.rc = 0;
836
837 sprintf(url, "%s%s", repo->base, path);
838
839 out_buffer.size = strlen(PROPFIND_ALL_REQUEST);
840 out_data = xmalloc(out_buffer.size + 1);
841 snprintf(out_data, out_buffer.size + 1, PROPFIND_ALL_REQUEST);
842 out_buffer.posn = 0;
843 out_buffer.buffer = out_data;
844
845 in_buffer.size = 4096;
846 in_data = xmalloc(in_buffer.size);
847 in_buffer.posn = 0;
848 in_buffer.buffer = in_data;
849
850 dav_headers = curl_slist_append(dav_headers, "Depth: 1");
851 dav_headers = curl_slist_append(dav_headers, "Content-Type: text/xml");
852
853 slot = get_active_slot();
854 slot->results = &results;
855 curl_easy_setopt(slot->curl, CURLOPT_INFILE, &out_buffer);
856 curl_easy_setopt(slot->curl, CURLOPT_INFILESIZE, out_buffer.size);
857 curl_easy_setopt(slot->curl, CURLOPT_READFUNCTION, fread_buffer);
858 curl_easy_setopt(slot->curl, CURLOPT_FILE, &in_buffer);
859 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
860 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
861 curl_easy_setopt(slot->curl, CURLOPT_UPLOAD, 1);
862 curl_easy_setopt(slot->curl, CURLOPT_CUSTOMREQUEST, DAV_PROPFIND);
863 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, dav_headers);
864
865 if (start_active_slot(slot)) {
866 run_active_slot(slot);
867 if (results.curl_result == CURLE_OK) {
868 ctx.name = xcalloc(10, 1);
869 ctx.len = 0;
870 ctx.cdata = NULL;
871 ctx.userFunc = handle_remote_ls_ctx;
872 ctx.userData = &ls;
873 XML_SetUserData(parser, &ctx);
874 XML_SetElementHandler(parser, xml_start_tag,
875 xml_end_tag);
876 XML_SetCharacterDataHandler(parser, xml_cdata);
877 result = XML_Parse(parser, in_buffer.buffer,
878 in_buffer.posn, 1);
879 free(ctx.name);
880
881 if (result != XML_STATUS_OK) {
882 ls.rc = error("XML error: %s",
883 XML_ErrorString(
884 XML_GetErrorCode(parser)));
885 }
886 } else {
887 ls.rc = -1;
888 }
889 } else {
890 ls.rc = error("Unable to start PROPFIND request");
891 }
892
893 free(ls.path);
894 free(url);
895 free(out_data);
896 free(in_buffer.buffer);
897 curl_slist_free_all(dav_headers);
898
899 return ls.rc;
900 }
901
902 static void process_ls_pack(struct remote_ls_ctx *ls)
903 {
904 unsigned char sha1[20];
905
906 if (strlen(ls->dentry_name) == 63 &&
907 !strncmp(ls->dentry_name, "objects/pack/pack-", 18) &&
908 has_extension(ls->dentry_name, ".pack")) {
909 get_sha1_hex(ls->dentry_name + 18, sha1);
910 setup_index(ls->repo, sha1);
911 }
912 }
913 #endif
914
915 static int fetch_indices(struct alt_base *repo)
916 {
917 unsigned char sha1[20];
918 char *url;
919 struct buffer buffer;
920 char *data;
921 int i = 0;
922
923 struct active_request_slot *slot;
924 struct slot_results results;
925
926 if (repo->got_indices)
927 return 0;
928
929 data = xmalloc(4096);
930 buffer.size = 4096;
931 buffer.posn = 0;
932 buffer.buffer = data;
933
934 if (get_verbosely)
935 fprintf(stderr, "Getting pack list for %s\n", repo->base);
936
937 #ifndef NO_EXPAT
938 if (remote_ls(repo, "objects/pack/", PROCESS_FILES,
939 process_ls_pack, NULL) == 0)
940 return 0;
941 #endif
942
943 url = xmalloc(strlen(repo->base) + 21);
944 sprintf(url, "%s/objects/info/packs", repo->base);
945
946 slot = get_active_slot();
947 slot->results = &results;
948 curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
949 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
950 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
951 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
952 if (start_active_slot(slot)) {
953 run_active_slot(slot);
954 if (results.curl_result != CURLE_OK) {
955 if (missing_target(&results)) {
956 repo->got_indices = 1;
957 free(buffer.buffer);
958 return 0;
959 } else {
960 repo->got_indices = 0;
961 free(buffer.buffer);
962 return error("%s", curl_errorstr);
963 }
964 }
965 } else {
966 repo->got_indices = 0;
967 free(buffer.buffer);
968 return error("Unable to start request");
969 }
970
971 data = buffer.buffer;
972 while (i < buffer.posn) {
973 switch (data[i]) {
974 case 'P':
975 i++;
976 if (i + 52 <= buffer.posn &&
977 !strncmp(data + i, " pack-", 6) &&
978 !strncmp(data + i + 46, ".pack\n", 6)) {
979 get_sha1_hex(data + i + 6, sha1);
980 setup_index(repo, sha1);
981 i += 51;
982 break;
983 }
984 default:
985 while (i < buffer.posn && data[i] != '\n')
986 i++;
987 }
988 i++;
989 }
990
991 free(buffer.buffer);
992 repo->got_indices = 1;
993 return 0;
994 }
995
996 static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
997 {
998 char *url;
999 struct packed_git *target;
1000 struct packed_git **lst;
1001 FILE *packfile;
1002 char *filename;
1003 char tmpfile[PATH_MAX];
1004 int ret;
1005 long prev_posn = 0;
1006 char range[RANGE_HEADER_SIZE];
1007 struct curl_slist *range_header = NULL;
1008
1009 struct active_request_slot *slot;
1010 struct slot_results results;
1011
1012 if (fetch_indices(repo))
1013 return -1;
1014 target = find_sha1_pack(sha1, repo->packs);
1015 if (!target)
1016 return -1;
1017
1018 if (get_verbosely) {
1019 fprintf(stderr, "Getting pack %s\n",
1020 sha1_to_hex(target->sha1));
1021 fprintf(stderr, " which contains %s\n",
1022 sha1_to_hex(sha1));
1023 }
1024
1025 url = xmalloc(strlen(repo->base) + 65);
1026 sprintf(url, "%s/objects/pack/pack-%s.pack",
1027 repo->base, sha1_to_hex(target->sha1));
1028
1029 filename = sha1_pack_name(target->sha1);
1030 snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
1031 packfile = fopen(tmpfile, "a");
1032 if (!packfile)
1033 return error("Unable to open local file %s for pack",
1034 filename);
1035
1036 slot = get_active_slot();
1037 slot->results = &results;
1038 curl_easy_setopt(slot->curl, CURLOPT_FILE, packfile);
1039 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
1040 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1041 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
1042 slot->local = packfile;
1043
1044 /* If there is data present from a previous transfer attempt,
1045 resume where it left off */
1046 prev_posn = ftell(packfile);
1047 if (prev_posn>0) {
1048 if (get_verbosely)
1049 fprintf(stderr,
1050 "Resuming fetch of pack %s at byte %ld\n",
1051 sha1_to_hex(target->sha1), prev_posn);
1052 sprintf(range, "Range: bytes=%ld-", prev_posn);
1053 range_header = curl_slist_append(range_header, range);
1054 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
1055 }
1056
1057 if (start_active_slot(slot)) {
1058 run_active_slot(slot);
1059 if (results.curl_result != CURLE_OK) {
1060 fclose(packfile);
1061 return error("Unable to get pack file %s\n%s", url,
1062 curl_errorstr);
1063 }
1064 } else {
1065 fclose(packfile);
1066 return error("Unable to start request");
1067 }
1068
1069 fclose(packfile);
1070
1071 ret = move_temp_to_file(tmpfile, filename);
1072 if (ret)
1073 return ret;
1074
1075 lst = &repo->packs;
1076 while (*lst != target)
1077 lst = &((*lst)->next);
1078 *lst = (*lst)->next;
1079
1080 if (verify_pack(target, 0))
1081 return -1;
1082 install_packed_git(target);
1083
1084 return 0;
1085 }
1086
1087 static void abort_object_request(struct object_request *obj_req)
1088 {
1089 if (obj_req->local >= 0) {
1090 close(obj_req->local);
1091 obj_req->local = -1;
1092 }
1093 unlink(obj_req->tmpfile);
1094 if (obj_req->slot) {
1095 release_active_slot(obj_req->slot);
1096 obj_req->slot = NULL;
1097 }
1098 release_object_request(obj_req);
1099 }
1100
1101 static int fetch_object(struct alt_base *repo, unsigned char *sha1)
1102 {
1103 char *hex = sha1_to_hex(sha1);
1104 int ret = 0;
1105 struct object_request *obj_req = object_queue_head;
1106
1107 while (obj_req != NULL && hashcmp(obj_req->sha1, sha1))
1108 obj_req = obj_req->next;
1109 if (obj_req == NULL)
1110 return error("Couldn't find request for %s in the queue", hex);
1111
1112 if (has_sha1_file(obj_req->sha1)) {
1113 abort_object_request(obj_req);
1114 return 0;
1115 }
1116
1117 #ifdef USE_CURL_MULTI
1118 while (obj_req->state == WAITING) {
1119 step_active_slots();
1120 }
1121 #else
1122 start_object_request(obj_req);
1123 #endif
1124
1125 while (obj_req->state == ACTIVE) {
1126 run_active_slot(obj_req->slot);
1127 }
1128 if (obj_req->local != -1) {
1129 close(obj_req->local); obj_req->local = -1;
1130 }
1131
1132 if (obj_req->state == ABORTED) {
1133 ret = error("Request for %s aborted", hex);
1134 } else if (obj_req->curl_result != CURLE_OK &&
1135 obj_req->http_code != 416) {
1136 if (missing_target(obj_req))
1137 ret = -1; /* Be silent, it is probably in a pack. */
1138 else
1139 ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
1140 obj_req->errorstr, obj_req->curl_result,
1141 obj_req->http_code, hex);
1142 } else if (obj_req->zret != Z_STREAM_END) {
1143 corrupt_object_found++;
1144 ret = error("File %s (%s) corrupt", hex, obj_req->url);
1145 } else if (hashcmp(obj_req->sha1, obj_req->real_sha1)) {
1146 ret = error("File %s has bad hash", hex);
1147 } else if (obj_req->rename < 0) {
1148 ret = error("unable to write sha1 filename %s",
1149 obj_req->filename);
1150 }
1151
1152 release_object_request(obj_req);
1153 return ret;
1154 }
1155
1156 int fetch(unsigned char *sha1)
1157 {
1158 struct alt_base *altbase = alt;
1159
1160 if (!fetch_object(altbase, sha1))
1161 return 0;
1162 while (altbase) {
1163 if (!fetch_pack(altbase, sha1))
1164 return 0;
1165 fetch_alternates(alt->base);
1166 altbase = altbase->next;
1167 }
1168 return error("Unable to find %s under %s", sha1_to_hex(sha1),
1169 alt->base);
1170 }
1171
1172 static inline int needs_quote(int ch)
1173 {
1174 if (((ch >= 'A') && (ch <= 'Z'))
1175 || ((ch >= 'a') && (ch <= 'z'))
1176 || ((ch >= '0') && (ch <= '9'))
1177 || (ch == '/')
1178 || (ch == '-')
1179 || (ch == '.'))
1180 return 0;
1181 return 1;
1182 }
1183
1184 static inline int hex(int v)
1185 {
1186 if (v < 10) return '0' + v;
1187 else return 'A' + v - 10;
1188 }
1189
1190 static char *quote_ref_url(const char *base, const char *ref)
1191 {
1192 const char *cp;
1193 char *dp, *qref;
1194 int len, baselen, ch;
1195
1196 baselen = strlen(base);
1197 len = baselen + 6; /* "refs/" + NUL */
1198 for (cp = ref; (ch = *cp) != 0; cp++, len++)
1199 if (needs_quote(ch))
1200 len += 2; /* extra two hex plus replacement % */
1201 qref = xmalloc(len);
1202 memcpy(qref, base, baselen);
1203 memcpy(qref + baselen, "refs/", 5);
1204 for (cp = ref, dp = qref + baselen + 5; (ch = *cp) != 0; cp++) {
1205 if (needs_quote(ch)) {
1206 *dp++ = '%';
1207 *dp++ = hex((ch >> 4) & 0xF);
1208 *dp++ = hex(ch & 0xF);
1209 }
1210 else
1211 *dp++ = ch;
1212 }
1213 *dp = 0;
1214
1215 return qref;
1216 }
1217
1218 int fetch_ref(char *ref, unsigned char *sha1)
1219 {
1220 char *url;
1221 char hex[42];
1222 struct buffer buffer;
1223 const char *base = alt->base;
1224 struct active_request_slot *slot;
1225 struct slot_results results;
1226 buffer.size = 41;
1227 buffer.posn = 0;
1228 buffer.buffer = hex;
1229 hex[41] = '\0';
1230
1231 url = quote_ref_url(base, ref);
1232 slot = get_active_slot();
1233 slot->results = &results;
1234 curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
1235 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
1236 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
1237 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1238 if (start_active_slot(slot)) {
1239 run_active_slot(slot);
1240 if (results.curl_result != CURLE_OK)
1241 return error("Couldn't get %s for %s\n%s",
1242 url, ref, curl_errorstr);
1243 } else {
1244 return error("Unable to start request");
1245 }
1246
1247 hex[40] = '\0';
1248 get_sha1_hex(hex, sha1);
1249 return 0;
1250 }
1251
1252 int main(int argc, const char **argv)
1253 {
1254 int commits;
1255 const char **write_ref = NULL;
1256 char **commit_id;
1257 const char *url;
1258 char *path;
1259 int arg = 1;
1260 int rc = 0;
1261
1262 setup_ident();
1263 setup_git_directory();
1264 git_config(git_default_config);
1265
1266 while (arg < argc && argv[arg][0] == '-') {
1267 if (argv[arg][1] == 't') {
1268 get_tree = 1;
1269 } else if (argv[arg][1] == 'c') {
1270 get_history = 1;
1271 } else if (argv[arg][1] == 'a') {
1272 get_all = 1;
1273 get_tree = 1;
1274 get_history = 1;
1275 } else if (argv[arg][1] == 'v') {
1276 get_verbosely = 1;
1277 } else if (argv[arg][1] == 'w') {
1278 write_ref = &argv[arg + 1];
1279 arg++;
1280 } else if (!strcmp(argv[arg], "--recover")) {
1281 get_recover = 1;
1282 } else if (!strcmp(argv[arg], "--stdin")) {
1283 commits_on_stdin = 1;
1284 }
1285 arg++;
1286 }
1287 if (argc < arg + 2 - commits_on_stdin) {
1288 usage("git-http-fetch [-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin] commit-id url");
1289 return 1;
1290 }
1291 if (commits_on_stdin) {
1292 commits = pull_targets_stdin(&commit_id, &write_ref);
1293 } else {
1294 commit_id = (char **) &argv[arg++];
1295 commits = 1;
1296 }
1297 url = argv[arg];
1298
1299 http_init();
1300
1301 no_pragma_header = curl_slist_append(no_pragma_header, "Pragma:");
1302
1303 alt = xmalloc(sizeof(*alt));
1304 alt->base = url;
1305 alt->got_indices = 0;
1306 alt->packs = NULL;
1307 alt->next = NULL;
1308 path = strstr(url, "//");
1309 if (path) {
1310 path = strchr(path+2, '/');
1311 if (path)
1312 alt->path_len = strlen(path);
1313 }
1314
1315 if (pull(commits, commit_id, write_ref, url))
1316 rc = 1;
1317
1318 http_cleanup();
1319
1320 curl_slist_free_all(no_pragma_header);
1321
1322 if (commits_on_stdin)
1323 pull_targets_free(commits, commit_id, write_ref);
1324
1325 if (corrupt_object_found) {
1326 fprintf(stderr,
1327 "Some loose object were found to be corrupt, but they might be just\n"
1328 "a false '404 Not Found' error message sent with incorrect HTTP\n"
1329 "status code. Suggest running git fsck-objects.\n");
1330 }
1331 return rc;
1332 }