]>
Commit | Line | Data |
---|---|---|
1 | #include "cache.h" | |
2 | #include "repository.h" | |
3 | #include "commit.h" | |
4 | #include "walker.h" | |
5 | #include "http.h" | |
6 | #include "list.h" | |
7 | #include "transport.h" | |
8 | #include "packfile.h" | |
9 | #include "object-store.h" | |
10 | ||
11 | struct alt_base { | |
12 | char *base; | |
13 | int got_indices; | |
14 | struct packed_git *packs; | |
15 | struct alt_base *next; | |
16 | }; | |
17 | ||
18 | enum object_request_state { | |
19 | WAITING, | |
20 | ABORTED, | |
21 | ACTIVE, | |
22 | COMPLETE | |
23 | }; | |
24 | ||
25 | struct object_request { | |
26 | struct walker *walker; | |
27 | struct object_id oid; | |
28 | struct alt_base *repo; | |
29 | enum object_request_state state; | |
30 | struct http_object_request *req; | |
31 | struct list_head node; | |
32 | }; | |
33 | ||
34 | struct alternates_request { | |
35 | struct walker *walker; | |
36 | const char *base; | |
37 | struct strbuf *url; | |
38 | struct strbuf *buffer; | |
39 | struct active_request_slot *slot; | |
40 | int http_specific; | |
41 | }; | |
42 | ||
43 | struct walker_data { | |
44 | const char *url; | |
45 | int got_alternates; | |
46 | struct alt_base *alt; | |
47 | }; | |
48 | ||
49 | static LIST_HEAD(object_queue_head); | |
50 | ||
51 | static void fetch_alternates(struct walker *walker, const char *base); | |
52 | ||
53 | static void process_object_response(void *callback_data); | |
54 | ||
55 | static void start_object_request(struct walker *walker, | |
56 | struct object_request *obj_req) | |
57 | { | |
58 | struct active_request_slot *slot; | |
59 | struct http_object_request *req; | |
60 | ||
61 | req = new_http_object_request(obj_req->repo->base, obj_req->oid.hash); | |
62 | if (req == NULL) { | |
63 | obj_req->state = ABORTED; | |
64 | return; | |
65 | } | |
66 | obj_req->req = req; | |
67 | ||
68 | slot = req->slot; | |
69 | slot->callback_func = process_object_response; | |
70 | slot->callback_data = obj_req; | |
71 | ||
72 | /* Try to get the request started, abort the request on error */ | |
73 | obj_req->state = ACTIVE; | |
74 | if (!start_active_slot(slot)) { | |
75 | obj_req->state = ABORTED; | |
76 | release_http_object_request(req); | |
77 | return; | |
78 | } | |
79 | } | |
80 | ||
81 | static void finish_object_request(struct object_request *obj_req) | |
82 | { | |
83 | if (finish_http_object_request(obj_req->req)) | |
84 | return; | |
85 | ||
86 | if (obj_req->req->rename == 0) | |
87 | walker_say(obj_req->walker, "got %s\n", oid_to_hex(&obj_req->oid)); | |
88 | } | |
89 | ||
90 | static void process_object_response(void *callback_data) | |
91 | { | |
92 | struct object_request *obj_req = | |
93 | (struct object_request *)callback_data; | |
94 | struct walker *walker = obj_req->walker; | |
95 | struct walker_data *data = walker->data; | |
96 | struct alt_base *alt = data->alt; | |
97 | ||
98 | process_http_object_request(obj_req->req); | |
99 | obj_req->state = COMPLETE; | |
100 | ||
101 | /* Use alternates if necessary */ | |
102 | if (missing_target(obj_req->req)) { | |
103 | fetch_alternates(walker, alt->base); | |
104 | if (obj_req->repo->next != NULL) { | |
105 | obj_req->repo = | |
106 | obj_req->repo->next; | |
107 | release_http_object_request(obj_req->req); | |
108 | start_object_request(walker, obj_req); | |
109 | return; | |
110 | } | |
111 | } | |
112 | ||
113 | finish_object_request(obj_req); | |
114 | } | |
115 | ||
116 | static void release_object_request(struct object_request *obj_req) | |
117 | { | |
118 | if (obj_req->req !=NULL && obj_req->req->localfile != -1) | |
119 | error("fd leakage in release: %d", obj_req->req->localfile); | |
120 | ||
121 | list_del(&obj_req->node); | |
122 | free(obj_req); | |
123 | } | |
124 | ||
125 | #ifdef USE_CURL_MULTI | |
126 | static int fill_active_slot(struct walker *walker) | |
127 | { | |
128 | struct object_request *obj_req; | |
129 | struct list_head *pos, *tmp, *head = &object_queue_head; | |
130 | ||
131 | list_for_each_safe(pos, tmp, head) { | |
132 | obj_req = list_entry(pos, struct object_request, node); | |
133 | if (obj_req->state == WAITING) { | |
134 | if (has_sha1_file(obj_req->oid.hash)) | |
135 | obj_req->state = COMPLETE; | |
136 | else { | |
137 | start_object_request(walker, obj_req); | |
138 | return 1; | |
139 | } | |
140 | } | |
141 | } | |
142 | return 0; | |
143 | } | |
144 | #endif | |
145 | ||
146 | static void prefetch(struct walker *walker, unsigned char *sha1) | |
147 | { | |
148 | struct object_request *newreq; | |
149 | struct walker_data *data = walker->data; | |
150 | ||
151 | newreq = xmalloc(sizeof(*newreq)); | |
152 | newreq->walker = walker; | |
153 | hashcpy(newreq->oid.hash, sha1); | |
154 | newreq->repo = data->alt; | |
155 | newreq->state = WAITING; | |
156 | newreq->req = NULL; | |
157 | ||
158 | http_is_verbose = walker->get_verbosely; | |
159 | list_add_tail(&newreq->node, &object_queue_head); | |
160 | ||
161 | #ifdef USE_CURL_MULTI | |
162 | fill_active_slots(); | |
163 | step_active_slots(); | |
164 | #endif | |
165 | } | |
166 | ||
167 | static int is_alternate_allowed(const char *url) | |
168 | { | |
169 | const char *protocols[] = { | |
170 | "http", "https", "ftp", "ftps" | |
171 | }; | |
172 | int i; | |
173 | ||
174 | if (http_follow_config != HTTP_FOLLOW_ALWAYS) { | |
175 | warning("alternate disabled by http.followRedirects: %s", url); | |
176 | return 0; | |
177 | } | |
178 | ||
179 | for (i = 0; i < ARRAY_SIZE(protocols); i++) { | |
180 | const char *end; | |
181 | if (skip_prefix(url, protocols[i], &end) && | |
182 | starts_with(end, "://")) | |
183 | break; | |
184 | } | |
185 | ||
186 | if (i >= ARRAY_SIZE(protocols)) { | |
187 | warning("ignoring alternate with unknown protocol: %s", url); | |
188 | return 0; | |
189 | } | |
190 | if (!is_transport_allowed(protocols[i], 0)) { | |
191 | warning("ignoring alternate with restricted protocol: %s", url); | |
192 | return 0; | |
193 | } | |
194 | ||
195 | return 1; | |
196 | } | |
197 | ||
198 | static void process_alternates_response(void *callback_data) | |
199 | { | |
200 | struct alternates_request *alt_req = | |
201 | (struct alternates_request *)callback_data; | |
202 | struct walker *walker = alt_req->walker; | |
203 | struct walker_data *cdata = walker->data; | |
204 | struct active_request_slot *slot = alt_req->slot; | |
205 | struct alt_base *tail = cdata->alt; | |
206 | const char *base = alt_req->base; | |
207 | const char null_byte = '\0'; | |
208 | char *data; | |
209 | int i = 0; | |
210 | ||
211 | if (alt_req->http_specific) { | |
212 | if (slot->curl_result != CURLE_OK || | |
213 | !alt_req->buffer->len) { | |
214 | ||
215 | /* Try reusing the slot to get non-http alternates */ | |
216 | alt_req->http_specific = 0; | |
217 | strbuf_reset(alt_req->url); | |
218 | strbuf_addf(alt_req->url, "%s/objects/info/alternates", | |
219 | base); | |
220 | curl_easy_setopt(slot->curl, CURLOPT_URL, | |
221 | alt_req->url->buf); | |
222 | active_requests++; | |
223 | slot->in_use = 1; | |
224 | if (slot->finished != NULL) | |
225 | (*slot->finished) = 0; | |
226 | if (!start_active_slot(slot)) { | |
227 | cdata->got_alternates = -1; | |
228 | slot->in_use = 0; | |
229 | if (slot->finished != NULL) | |
230 | (*slot->finished) = 1; | |
231 | } | |
232 | return; | |
233 | } | |
234 | } else if (slot->curl_result != CURLE_OK) { | |
235 | if (!missing_target(slot)) { | |
236 | cdata->got_alternates = -1; | |
237 | return; | |
238 | } | |
239 | } | |
240 | ||
241 | fwrite_buffer((char *)&null_byte, 1, 1, alt_req->buffer); | |
242 | alt_req->buffer->len--; | |
243 | data = alt_req->buffer->buf; | |
244 | ||
245 | while (i < alt_req->buffer->len) { | |
246 | int posn = i; | |
247 | while (posn < alt_req->buffer->len && data[posn] != '\n') | |
248 | posn++; | |
249 | if (data[posn] == '\n') { | |
250 | int okay = 0; | |
251 | int serverlen = 0; | |
252 | struct alt_base *newalt; | |
253 | if (data[i] == '/') { | |
254 | /* | |
255 | * This counts | |
256 | * http://git.host/pub/scm/linux.git/ | |
257 | * -----------here^ | |
258 | * so memcpy(dst, base, serverlen) will | |
259 | * copy up to "...git.host". | |
260 | */ | |
261 | const char *colon_ss = strstr(base,"://"); | |
262 | if (colon_ss) { | |
263 | serverlen = (strchr(colon_ss + 3, '/') | |
264 | - base); | |
265 | okay = 1; | |
266 | } | |
267 | } else if (!memcmp(data + i, "../", 3)) { | |
268 | /* | |
269 | * Relative URL; chop the corresponding | |
270 | * number of subpath from base (and ../ | |
271 | * from data), and concatenate the result. | |
272 | * | |
273 | * The code first drops ../ from data, and | |
274 | * then drops one ../ from data and one path | |
275 | * from base. IOW, one extra ../ is dropped | |
276 | * from data than path is dropped from base. | |
277 | * | |
278 | * This is not wrong. The alternate in | |
279 | * http://git.host/pub/scm/linux.git/ | |
280 | * to borrow from | |
281 | * http://git.host/pub/scm/linus.git/ | |
282 | * is ../../linus.git/objects/. You need | |
283 | * two ../../ to borrow from your direct | |
284 | * neighbour. | |
285 | */ | |
286 | i += 3; | |
287 | serverlen = strlen(base); | |
288 | while (i + 2 < posn && | |
289 | !memcmp(data + i, "../", 3)) { | |
290 | do { | |
291 | serverlen--; | |
292 | } while (serverlen && | |
293 | base[serverlen - 1] != '/'); | |
294 | i += 3; | |
295 | } | |
296 | /* If the server got removed, give up. */ | |
297 | okay = strchr(base, ':') - base + 3 < | |
298 | serverlen; | |
299 | } else if (alt_req->http_specific) { | |
300 | char *colon = strchr(data + i, ':'); | |
301 | char *slash = strchr(data + i, '/'); | |
302 | if (colon && slash && colon < data + posn && | |
303 | slash < data + posn && colon < slash) { | |
304 | okay = 1; | |
305 | } | |
306 | } | |
307 | if (okay) { | |
308 | struct strbuf target = STRBUF_INIT; | |
309 | strbuf_add(&target, base, serverlen); | |
310 | strbuf_add(&target, data + i, posn - i); | |
311 | if (!strbuf_strip_suffix(&target, "objects")) { | |
312 | warning("ignoring alternate that does" | |
313 | " not end in 'objects': %s", | |
314 | target.buf); | |
315 | strbuf_release(&target); | |
316 | } else if (is_alternate_allowed(target.buf)) { | |
317 | warning("adding alternate object store: %s", | |
318 | target.buf); | |
319 | newalt = xmalloc(sizeof(*newalt)); | |
320 | newalt->next = NULL; | |
321 | newalt->base = strbuf_detach(&target, NULL); | |
322 | newalt->got_indices = 0; | |
323 | newalt->packs = NULL; | |
324 | ||
325 | while (tail->next != NULL) | |
326 | tail = tail->next; | |
327 | tail->next = newalt; | |
328 | } else { | |
329 | strbuf_release(&target); | |
330 | } | |
331 | } | |
332 | } | |
333 | i = posn + 1; | |
334 | } | |
335 | ||
336 | cdata->got_alternates = 1; | |
337 | } | |
338 | ||
339 | static void fetch_alternates(struct walker *walker, const char *base) | |
340 | { | |
341 | struct strbuf buffer = STRBUF_INIT; | |
342 | struct strbuf url = STRBUF_INIT; | |
343 | struct active_request_slot *slot; | |
344 | struct alternates_request alt_req; | |
345 | struct walker_data *cdata = walker->data; | |
346 | ||
347 | /* | |
348 | * If another request has already started fetching alternates, | |
349 | * wait for them to arrive and return to processing this request's | |
350 | * curl message | |
351 | */ | |
352 | #ifdef USE_CURL_MULTI | |
353 | while (cdata->got_alternates == 0) { | |
354 | step_active_slots(); | |
355 | } | |
356 | #endif | |
357 | ||
358 | /* Nothing to do if they've already been fetched */ | |
359 | if (cdata->got_alternates == 1) | |
360 | return; | |
361 | ||
362 | /* Start the fetch */ | |
363 | cdata->got_alternates = 0; | |
364 | ||
365 | if (walker->get_verbosely) | |
366 | fprintf(stderr, "Getting alternates list for %s\n", base); | |
367 | ||
368 | strbuf_addf(&url, "%s/objects/info/http-alternates", base); | |
369 | ||
370 | /* | |
371 | * Use a callback to process the result, since another request | |
372 | * may fail and need to have alternates loaded before continuing | |
373 | */ | |
374 | slot = get_active_slot(); | |
375 | slot->callback_func = process_alternates_response; | |
376 | alt_req.walker = walker; | |
377 | slot->callback_data = &alt_req; | |
378 | ||
379 | curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer); | |
380 | curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer); | |
381 | curl_easy_setopt(slot->curl, CURLOPT_URL, url.buf); | |
382 | ||
383 | alt_req.base = base; | |
384 | alt_req.url = &url; | |
385 | alt_req.buffer = &buffer; | |
386 | alt_req.http_specific = 1; | |
387 | alt_req.slot = slot; | |
388 | ||
389 | if (start_active_slot(slot)) | |
390 | run_active_slot(slot); | |
391 | else | |
392 | cdata->got_alternates = -1; | |
393 | ||
394 | strbuf_release(&buffer); | |
395 | strbuf_release(&url); | |
396 | } | |
397 | ||
398 | static int fetch_indices(struct walker *walker, struct alt_base *repo) | |
399 | { | |
400 | int ret; | |
401 | ||
402 | if (repo->got_indices) | |
403 | return 0; | |
404 | ||
405 | if (walker->get_verbosely) | |
406 | fprintf(stderr, "Getting pack list for %s\n", repo->base); | |
407 | ||
408 | switch (http_get_info_packs(repo->base, &repo->packs)) { | |
409 | case HTTP_OK: | |
410 | case HTTP_MISSING_TARGET: | |
411 | repo->got_indices = 1; | |
412 | ret = 0; | |
413 | break; | |
414 | default: | |
415 | repo->got_indices = 0; | |
416 | ret = -1; | |
417 | } | |
418 | ||
419 | return ret; | |
420 | } | |
421 | ||
422 | static int http_fetch_pack(struct walker *walker, struct alt_base *repo, unsigned char *sha1) | |
423 | { | |
424 | struct packed_git *target; | |
425 | int ret; | |
426 | struct slot_results results; | |
427 | struct http_pack_request *preq; | |
428 | ||
429 | if (fetch_indices(walker, repo)) | |
430 | return -1; | |
431 | target = find_sha1_pack(sha1, repo->packs); | |
432 | if (!target) | |
433 | return -1; | |
434 | ||
435 | if (walker->get_verbosely) { | |
436 | fprintf(stderr, "Getting pack %s\n", | |
437 | sha1_to_hex(target->sha1)); | |
438 | fprintf(stderr, " which contains %s\n", | |
439 | sha1_to_hex(sha1)); | |
440 | } | |
441 | ||
442 | preq = new_http_pack_request(target, repo->base); | |
443 | if (preq == NULL) | |
444 | goto abort; | |
445 | preq->lst = &repo->packs; | |
446 | preq->slot->results = &results; | |
447 | ||
448 | if (start_active_slot(preq->slot)) { | |
449 | run_active_slot(preq->slot); | |
450 | if (results.curl_result != CURLE_OK) { | |
451 | error("Unable to get pack file %s\n%s", preq->url, | |
452 | curl_errorstr); | |
453 | goto abort; | |
454 | } | |
455 | } else { | |
456 | error("Unable to start request"); | |
457 | goto abort; | |
458 | } | |
459 | ||
460 | ret = finish_http_pack_request(preq); | |
461 | release_http_pack_request(preq); | |
462 | if (ret) | |
463 | return ret; | |
464 | ||
465 | return 0; | |
466 | ||
467 | abort: | |
468 | return -1; | |
469 | } | |
470 | ||
471 | static void abort_object_request(struct object_request *obj_req) | |
472 | { | |
473 | release_object_request(obj_req); | |
474 | } | |
475 | ||
476 | static int fetch_object(struct walker *walker, unsigned char *sha1) | |
477 | { | |
478 | char *hex = sha1_to_hex(sha1); | |
479 | int ret = 0; | |
480 | struct object_request *obj_req = NULL; | |
481 | struct http_object_request *req; | |
482 | struct list_head *pos, *head = &object_queue_head; | |
483 | ||
484 | list_for_each(pos, head) { | |
485 | obj_req = list_entry(pos, struct object_request, node); | |
486 | if (hasheq(obj_req->oid.hash, sha1)) | |
487 | break; | |
488 | } | |
489 | if (obj_req == NULL) | |
490 | return error("Couldn't find request for %s in the queue", hex); | |
491 | ||
492 | if (has_sha1_file(obj_req->oid.hash)) { | |
493 | if (obj_req->req != NULL) | |
494 | abort_http_object_request(obj_req->req); | |
495 | abort_object_request(obj_req); | |
496 | return 0; | |
497 | } | |
498 | ||
499 | #ifdef USE_CURL_MULTI | |
500 | while (obj_req->state == WAITING) | |
501 | step_active_slots(); | |
502 | #else | |
503 | start_object_request(walker, obj_req); | |
504 | #endif | |
505 | ||
506 | /* | |
507 | * obj_req->req might change when fetching alternates in the callback | |
508 | * process_object_response; therefore, the "shortcut" variable, req, | |
509 | * is used only after we're done with slots. | |
510 | */ | |
511 | while (obj_req->state == ACTIVE) | |
512 | run_active_slot(obj_req->req->slot); | |
513 | ||
514 | req = obj_req->req; | |
515 | ||
516 | if (req->localfile != -1) { | |
517 | close(req->localfile); | |
518 | req->localfile = -1; | |
519 | } | |
520 | ||
521 | /* | |
522 | * we turned off CURLOPT_FAILONERROR to avoid losing a | |
523 | * persistent connection and got CURLE_OK. | |
524 | */ | |
525 | if (req->http_code >= 300 && req->curl_result == CURLE_OK && | |
526 | (starts_with(req->url, "http://") || | |
527 | starts_with(req->url, "https://"))) { | |
528 | req->curl_result = CURLE_HTTP_RETURNED_ERROR; | |
529 | xsnprintf(req->errorstr, sizeof(req->errorstr), | |
530 | "HTTP request failed"); | |
531 | } | |
532 | ||
533 | if (obj_req->state == ABORTED) { | |
534 | ret = error("Request for %s aborted", hex); | |
535 | } else if (req->curl_result != CURLE_OK && | |
536 | req->http_code != 416) { | |
537 | if (missing_target(req)) | |
538 | ret = -1; /* Be silent, it is probably in a pack. */ | |
539 | else | |
540 | ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)", | |
541 | req->errorstr, req->curl_result, | |
542 | req->http_code, hex); | |
543 | } else if (req->zret != Z_STREAM_END) { | |
544 | walker->corrupt_object_found++; | |
545 | ret = error("File %s (%s) corrupt", hex, req->url); | |
546 | } else if (!hasheq(obj_req->oid.hash, req->real_sha1)) { | |
547 | ret = error("File %s has bad hash", hex); | |
548 | } else if (req->rename < 0) { | |
549 | struct strbuf buf = STRBUF_INIT; | |
550 | sha1_file_name(the_repository, &buf, req->sha1); | |
551 | ret = error("unable to write sha1 filename %s", buf.buf); | |
552 | strbuf_release(&buf); | |
553 | } | |
554 | ||
555 | release_http_object_request(req); | |
556 | release_object_request(obj_req); | |
557 | return ret; | |
558 | } | |
559 | ||
560 | static int fetch(struct walker *walker, unsigned char *sha1) | |
561 | { | |
562 | struct walker_data *data = walker->data; | |
563 | struct alt_base *altbase = data->alt; | |
564 | ||
565 | if (!fetch_object(walker, sha1)) | |
566 | return 0; | |
567 | while (altbase) { | |
568 | if (!http_fetch_pack(walker, altbase, sha1)) | |
569 | return 0; | |
570 | fetch_alternates(walker, data->alt->base); | |
571 | altbase = altbase->next; | |
572 | } | |
573 | return error("Unable to find %s under %s", sha1_to_hex(sha1), | |
574 | data->alt->base); | |
575 | } | |
576 | ||
577 | static int fetch_ref(struct walker *walker, struct ref *ref) | |
578 | { | |
579 | struct walker_data *data = walker->data; | |
580 | return http_fetch_ref(data->alt->base, ref); | |
581 | } | |
582 | ||
583 | static void cleanup(struct walker *walker) | |
584 | { | |
585 | struct walker_data *data = walker->data; | |
586 | struct alt_base *alt, *alt_next; | |
587 | ||
588 | if (data) { | |
589 | alt = data->alt; | |
590 | while (alt) { | |
591 | alt_next = alt->next; | |
592 | ||
593 | free(alt->base); | |
594 | free(alt); | |
595 | ||
596 | alt = alt_next; | |
597 | } | |
598 | free(data); | |
599 | walker->data = NULL; | |
600 | } | |
601 | } | |
602 | ||
603 | struct walker *get_http_walker(const char *url) | |
604 | { | |
605 | char *s; | |
606 | struct walker_data *data = xmalloc(sizeof(struct walker_data)); | |
607 | struct walker *walker = xmalloc(sizeof(struct walker)); | |
608 | ||
609 | data->alt = xmalloc(sizeof(*data->alt)); | |
610 | data->alt->base = xstrdup(url); | |
611 | for (s = data->alt->base + strlen(data->alt->base) - 1; *s == '/'; --s) | |
612 | *s = 0; | |
613 | ||
614 | data->alt->got_indices = 0; | |
615 | data->alt->packs = NULL; | |
616 | data->alt->next = NULL; | |
617 | data->got_alternates = -1; | |
618 | ||
619 | walker->corrupt_object_found = 0; | |
620 | walker->fetch = fetch; | |
621 | walker->fetch_ref = fetch_ref; | |
622 | walker->prefetch = prefetch; | |
623 | walker->cleanup = cleanup; | |
624 | walker->data = data; | |
625 | ||
626 | #ifdef USE_CURL_MULTI | |
627 | add_fill_function(walker, (int (*)(void *)) fill_active_slot); | |
628 | #endif | |
629 | ||
630 | return walker; | |
631 | } |