1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
7 #include "alloc-util.h"
9 #include "format-util.h"
10 #include "gcrypt-util.h"
11 #include "hexdecoct.h"
12 #include "import-util.h"
14 #include "machine-pool.h"
15 #include "parse-util.h"
16 #include "pull-common.h"
18 #include "string-util.h"
20 #include "sync-util.h"
21 #include "xattr-util.h"
23 void pull_job_close_disk_fd(PullJob
*j
) {
28 safe_close(j
->disk_fd
);
33 PullJob
* pull_job_unref(PullJob
*j
) {
37 pull_job_close_disk_fd(j
);
39 curl_glue_remove_and_free(j
->glue
, j
->curl
);
40 curl_slist_free_all(j
->request_header
);
42 import_compress_free(&j
->compress
);
46 EVP_MD_CTX_free(j
->checksum_ctx
);
48 gcry_md_close(j
->checksum_ctx
);
53 strv_free(j
->old_etags
);
60 static void pull_job_finish(PullJob
*j
, int ret
) {
63 if (IN_SET(j
->state
, PULL_JOB_DONE
, PULL_JOB_FAILED
))
67 j
->state
= PULL_JOB_DONE
;
68 j
->progress_percent
= 100;
69 log_info("Download of %s complete.", j
->url
);
71 j
->state
= PULL_JOB_FAILED
;
79 static int pull_job_restart(PullJob
*j
, const char *new_url
) {
85 r
= free_and_strdup(&j
->url
, new_url
);
89 j
->state
= PULL_JOB_INIT
;
91 j
->payload
= mfree(j
->payload
);
93 j
->written_compressed
= 0;
94 j
->written_uncompressed
= 0;
95 j
->content_length
= UINT64_MAX
;
96 j
->etag
= mfree(j
->etag
);
97 j
->etag_exists
= false;
99 j
->checksum
= mfree(j
->checksum
);
101 curl_glue_remove_and_free(j
->glue
, j
->curl
);
104 curl_slist_free_all(j
->request_header
);
105 j
->request_header
= NULL
;
107 import_compress_free(&j
->compress
);
109 if (j
->checksum_ctx
) {
111 EVP_MD_CTX_free(j
->checksum_ctx
);
113 gcry_md_close(j
->checksum_ctx
);
115 j
->checksum_ctx
= NULL
;
118 r
= pull_job_begin(j
);
125 void pull_job_curl_on_finished(CurlGlue
*g
, CURL
*curl
, CURLcode result
) {
131 if (curl_easy_getinfo(curl
, CURLINFO_PRIVATE
, (char **)&j
) != CURLE_OK
)
134 if (!j
|| IN_SET(j
->state
, PULL_JOB_DONE
, PULL_JOB_FAILED
))
137 if (result
!= CURLE_OK
) {
138 r
= log_error_errno(SYNTHETIC_ERRNO(EIO
), "Transfer failed: %s", curl_easy_strerror(result
));
142 code
= curl_easy_getinfo(curl
, CURLINFO_PROTOCOL
, &protocol
);
143 if (code
!= CURLE_OK
) {
144 r
= log_error_errno(SYNTHETIC_ERRNO(EIO
), "Failed to retrieve response code: %s", curl_easy_strerror(code
));
148 if (IN_SET(protocol
, CURLPROTO_HTTP
, CURLPROTO_HTTPS
)) {
151 code
= curl_easy_getinfo(curl
, CURLINFO_RESPONSE_CODE
, &status
);
152 if (code
!= CURLE_OK
) {
153 r
= log_error_errno(SYNTHETIC_ERRNO(EIO
), "Failed to retrieve response code: %s", curl_easy_strerror(code
));
158 log_info("Image already downloaded. Skipping download.");
159 j
->etag_exists
= true;
162 } else if (status
>= 300) {
164 if (status
== 404 && j
->on_not_found
) {
165 _cleanup_free_
char *new_url
= NULL
;
167 /* This resource wasn't found, but the implementor wants to maybe let us know a new URL, query for it. */
168 r
= j
->on_not_found(j
, &new_url
);
172 if (r
> 0) { /* A new url to use */
175 r
= pull_job_restart(j
, new_url
);
179 code
= curl_easy_getinfo(j
->curl
, CURLINFO_RESPONSE_CODE
, &status
);
180 if (code
!= CURLE_OK
) {
181 r
= log_error_errno(SYNTHETIC_ERRNO(EIO
), "Failed to retrieve response code: %s", curl_easy_strerror(code
));
191 status
== 404 ? SYNTHETIC_ERRNO(ENOMEDIUM
) : SYNTHETIC_ERRNO(EIO
), /* Make the most common error recognizable */
192 "HTTP request to %s failed with code %li.", j
->url
, status
);
194 } else if (status
< 200) {
195 r
= log_error_errno(SYNTHETIC_ERRNO(EIO
), "HTTP request to %s finished with unexpected code %li.", j
->url
, status
);
200 if (j
->state
!= PULL_JOB_RUNNING
) {
201 r
= log_error_errno(SYNTHETIC_ERRNO(EIO
), "Premature connection termination.");
205 if (j
->content_length
!= UINT64_MAX
&&
206 j
->content_length
!= j
->written_compressed
) {
207 r
= log_error_errno(SYNTHETIC_ERRNO(EIO
), "Download truncated.");
211 if (j
->checksum_ctx
) {
212 unsigned checksum_len
;
214 uint8_t k
[EVP_MAX_MD_SIZE
];
216 r
= EVP_DigestFinal_ex(j
->checksum_ctx
, k
, &checksum_len
);
218 r
= log_error_errno(SYNTHETIC_ERRNO(EIO
), "Failed to get checksum.");
221 assert(checksum_len
<= sizeof k
);
225 k
= gcry_md_read(j
->checksum_ctx
, GCRY_MD_SHA256
);
227 r
= log_error_errno(SYNTHETIC_ERRNO(EIO
), "Failed to get checksum.");
231 checksum_len
= gcry_md_get_algo_dlen(GCRY_MD_SHA256
);
234 j
->checksum
= hexmem(k
, checksum_len
);
240 log_debug("SHA256 of %s is %s.", j
->url
, j
->checksum
);
243 /* Do a couple of finishing disk operations, but only if we are the sole owner of the file (i.e. no
244 * offset is specified, which indicates we only own the file partially) */
246 if (j
->disk_fd
>= 0) {
248 if (S_ISREG(j
->disk_stat
.st_mode
)) {
250 if (j
->offset
== UINT64_MAX
) {
252 if (j
->written_compressed
> 0) {
253 /* Make sure the file size is right, in case the file was sparse and we just seeked
254 * for the last part */
255 if (ftruncate(j
->disk_fd
, j
->written_uncompressed
) < 0) {
256 r
= log_error_errno(errno
, "Failed to truncate file: %m");
262 (void) fsetxattr(j
->disk_fd
, "user.source_etag", j
->etag
, strlen(j
->etag
), 0);
264 (void) fsetxattr(j
->disk_fd
, "user.source_url", j
->url
, strlen(j
->url
), 0);
269 timespec_store(&ut
, j
->mtime
);
271 if (futimens(j
->disk_fd
, (struct timespec
[]) { ut
, ut
}) < 0)
272 log_debug_errno(errno
, "Failed to adjust atime/mtime of created image, ignoring: %m");
274 r
= fd_setcrtime(j
->disk_fd
, j
->mtime
);
276 log_debug_errno(r
, "Failed to adjust crtime of created image, ignoring: %m");
281 r
= fsync_full(j
->disk_fd
);
283 log_error_errno(r
, "Failed to synchronize file to disk: %m");
288 } else if (S_ISBLK(j
->disk_stat
.st_mode
) && j
->sync
) {
290 if (fsync(j
->disk_fd
) < 0) {
291 r
= log_error_errno(errno
, "Failed to synchronize block device: %m");
297 log_info("Acquired %s.", FORMAT_BYTES(j
->written_uncompressed
));
302 pull_job_finish(j
, r
);
305 static int pull_job_write_uncompressed(const void *p
, size_t sz
, void *userdata
) {
306 PullJob
*j
= userdata
;
307 bool too_much
= false;
314 if (j
->written_uncompressed
> UINT64_MAX
- sz
)
315 return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW
), "File too large, overflow");
317 if (j
->written_uncompressed
>= j
->uncompressed_max
) {
322 if (j
->written_uncompressed
+ sz
> j
->uncompressed_max
) {
324 sz
= j
->uncompressed_max
- j
->written_uncompressed
; /* since we have the data in memory
325 * already, we might as well write it to
329 if (j
->disk_fd
>= 0) {
331 if (S_ISREG(j
->disk_stat
.st_mode
) && j
->offset
== UINT64_MAX
) {
334 n
= sparse_write(j
->disk_fd
, p
, sz
, 64);
336 return log_error_errno((int) n
, "Failed to write file: %m");
338 return log_error_errno(SYNTHETIC_ERRNO(EIO
), "Short write");
340 r
= loop_write(j
->disk_fd
, p
, sz
, false);
342 return log_error_errno(r
, "Failed to write file: %m");
346 if (j
->disk_fd
< 0 || j
->force_memory
) {
347 if (!GREEDY_REALLOC(j
->payload
, j
->payload_size
+ sz
))
350 memcpy(j
->payload
+ j
->payload_size
, p
, sz
);
351 j
->payload_size
+= sz
;
354 j
->written_uncompressed
+= sz
;
358 return log_error_errno(SYNTHETIC_ERRNO(EFBIG
), "File overly large, refusing.");
363 static int pull_job_write_compressed(PullJob
*j
, void *p
, size_t sz
) {
372 if (j
->written_compressed
+ sz
< j
->written_compressed
)
373 return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW
), "File too large, overflow");
375 if (j
->written_compressed
+ sz
> j
->compressed_max
)
376 return log_error_errno(SYNTHETIC_ERRNO(EFBIG
), "File overly large, refusing.");
378 if (j
->content_length
!= UINT64_MAX
&&
379 j
->written_compressed
+ sz
> j
->content_length
)
380 return log_error_errno(SYNTHETIC_ERRNO(EFBIG
),
381 "Content length incorrect.");
383 if (j
->checksum_ctx
) {
385 r
= EVP_DigestUpdate(j
->checksum_ctx
, p
, sz
);
387 return log_error_errno(SYNTHETIC_ERRNO(EIO
),
388 "Could not hash chunk.");
390 gcry_md_write(j
->checksum_ctx
, p
, sz
);
394 r
= import_uncompress(&j
->compress
, p
, sz
, pull_job_write_uncompressed
, j
);
398 j
->written_compressed
+= sz
;
403 static int pull_job_open_disk(PullJob
*j
) {
408 if (j
->on_open_disk
) {
409 r
= j
->on_open_disk(j
);
414 if (j
->disk_fd
>= 0) {
415 if (fstat(j
->disk_fd
, &j
->disk_stat
) < 0)
416 return log_error_errno(errno
, "Failed to stat disk file: %m");
418 if (j
->offset
!= UINT64_MAX
) {
419 if (lseek(j
->disk_fd
, j
->offset
, SEEK_SET
) == (off_t
) -1)
420 return log_error_errno(errno
, "Failed to seek on file descriptor: %m");
424 if (j
->calc_checksum
) {
426 j
->checksum_ctx
= EVP_MD_CTX_new();
427 if (!j
->checksum_ctx
)
430 r
= EVP_DigestInit_ex(j
->checksum_ctx
, EVP_sha256(), NULL
);
432 return log_error_errno(SYNTHETIC_ERRNO(EIO
),
433 "Failed to initialize hash context.");
435 initialize_libgcrypt(false);
437 if (gcry_md_open(&j
->checksum_ctx
, GCRY_MD_SHA256
, 0) != 0)
438 return log_error_errno(SYNTHETIC_ERRNO(EIO
),
439 "Failed to initialize hash context.");
446 static int pull_job_detect_compression(PullJob
*j
) {
447 _cleanup_free_
uint8_t *stub
= NULL
;
454 r
= import_uncompress_detect(&j
->compress
, j
->payload
, j
->payload_size
);
456 return log_error_errno(r
, "Failed to initialize compressor: %m");
460 log_debug("Stream is compressed: %s", import_compress_type_to_string(j
->compress
.type
));
462 r
= pull_job_open_disk(j
);
466 /* Now, take the payload we read so far, and decompress it */
468 stub_size
= j
->payload_size
;
473 j
->state
= PULL_JOB_RUNNING
;
475 r
= pull_job_write_compressed(j
, stub
, stub_size
);
482 static size_t pull_job_write_callback(void *contents
, size_t size
, size_t nmemb
, void *userdata
) {
483 PullJob
*j
= userdata
;
484 size_t sz
= size
* nmemb
;
492 case PULL_JOB_ANALYZING
:
493 /* Let's first check what it actually is */
495 if (!GREEDY_REALLOC(j
->payload
, j
->payload_size
+ sz
)) {
500 memcpy(j
->payload
+ j
->payload_size
, contents
, sz
);
501 j
->payload_size
+= sz
;
503 r
= pull_job_detect_compression(j
);
509 case PULL_JOB_RUNNING
:
511 r
= pull_job_write_compressed(j
, contents
, sz
);
518 case PULL_JOB_FAILED
:
523 assert_not_reached();
529 pull_job_finish(j
, r
);
533 static int http_status_ok(CURLcode status
) {
534 /* Consider all HTTP status code in the 2xx range as OK */
535 return status
>= 200 && status
<= 299;
538 static int http_status_etag_exists(CURLcode status
) {
539 /* This one is special, it's triggered by our etag mgmt logic */
540 return status
== 304;
543 static size_t pull_job_header_callback(void *contents
, size_t size
, size_t nmemb
, void *userdata
) {
544 _cleanup_free_
char *length
= NULL
, *last_modified
= NULL
, *etag
= NULL
;
545 size_t sz
= size
* nmemb
;
546 PullJob
*j
= userdata
;
554 if (IN_SET(j
->state
, PULL_JOB_DONE
, PULL_JOB_FAILED
)) {
559 assert(j
->state
== PULL_JOB_ANALYZING
);
561 code
= curl_easy_getinfo(j
->curl
, CURLINFO_RESPONSE_CODE
, &status
);
562 if (code
!= CURLE_OK
) {
563 r
= log_error_errno(SYNTHETIC_ERRNO(EIO
), "Failed to retrieve response code: %s", curl_easy_strerror(code
));
567 if (http_status_ok(status
) || http_status_etag_exists(status
)) {
568 /* Check Etag on OK and etag exists responses. */
570 r
= curl_header_strdup(contents
, sz
, "ETag:", &etag
);
576 free_and_replace(j
->etag
, etag
);
578 if (strv_contains(j
->old_etags
, j
->etag
)) {
579 log_info("Image already downloaded. Skipping download. (%s)", j
->etag
);
580 j
->etag_exists
= true;
581 pull_job_finish(j
, 0);
589 if (!http_status_ok(status
)) /* Let's ignore the rest here, these requests are probably redirects and
590 * stuff where the headers aren't interesting to us */
593 r
= curl_header_strdup(contents
, sz
, "Content-Length:", &length
);
599 (void) safe_atou64(length
, &j
->content_length
);
601 if (j
->content_length
!= UINT64_MAX
) {
602 if (j
->content_length
> j
->compressed_max
) {
603 r
= log_error_errno(SYNTHETIC_ERRNO(EFBIG
), "Content too large.");
607 log_info("Downloading %s for %s.", FORMAT_BYTES(j
->content_length
), j
->url
);
613 r
= curl_header_strdup(contents
, sz
, "Last-Modified:", &last_modified
);
619 (void) curl_parse_http_time(last_modified
, &j
->mtime
);
624 r
= j
->on_header(j
, contents
, sz
);
632 pull_job_finish(j
, r
);
636 static int pull_job_progress_callback(void *userdata
, curl_off_t dltotal
, curl_off_t dlnow
, curl_off_t ultotal
, curl_off_t ulnow
) {
637 PullJob
*j
= userdata
;
646 percent
= ((100 * dlnow
) / dltotal
);
647 n
= now(CLOCK_MONOTONIC
);
649 if (n
> j
->last_status_usec
+ USEC_PER_SEC
&&
650 percent
!= j
->progress_percent
&&
653 if (n
- j
->start_usec
> USEC_PER_SEC
&& dlnow
> 0) {
656 done
= n
- j
->start_usec
;
657 left
= (usec_t
) (((double) done
* (double) dltotal
) / dlnow
) - done
;
659 log_info("Got %u%% of %s. %s left at %s/s.",
662 FORMAT_TIMESPAN(left
, USEC_PER_SEC
),
663 FORMAT_BYTES((uint64_t) ((double) dlnow
/ ((double) done
/ (double) USEC_PER_SEC
))));
665 log_info("Got %u%% of %s.", percent
, j
->url
);
667 j
->progress_percent
= percent
;
668 j
->last_status_usec
= n
;
683 _cleanup_(pull_job_unrefp
) PullJob
*j
= NULL
;
684 _cleanup_free_
char *u
= NULL
;
699 .state
= PULL_JOB_INIT
,
701 .close_disk_fd
= true,
702 .userdata
= userdata
,
704 .content_length
= UINT64_MAX
,
705 .start_usec
= now(CLOCK_MONOTONIC
),
706 .compressed_max
= 64LLU * 1024LLU * 1024LLU * 1024LLU, /* 64GB safety limit */
707 .uncompressed_max
= 64LLU * 1024LLU * 1024LLU * 1024LLU, /* 64GB safety limit */
709 .offset
= UINT64_MAX
,
718 int pull_job_begin(PullJob
*j
) {
723 if (j
->state
!= PULL_JOB_INIT
)
726 r
= curl_glue_make(&j
->curl
, j
->url
, j
);
730 if (!strv_isempty(j
->old_etags
)) {
731 _cleanup_free_
char *cc
= NULL
, *hdr
= NULL
;
733 cc
= strv_join(j
->old_etags
, ", ");
737 hdr
= strjoin("If-None-Match: ", cc
);
741 if (!j
->request_header
) {
742 j
->request_header
= curl_slist_new(hdr
, NULL
);
743 if (!j
->request_header
)
746 struct curl_slist
*l
;
748 l
= curl_slist_append(j
->request_header
, hdr
);
752 j
->request_header
= l
;
756 if (j
->request_header
) {
757 if (curl_easy_setopt(j
->curl
, CURLOPT_HTTPHEADER
, j
->request_header
) != CURLE_OK
)
761 if (curl_easy_setopt(j
->curl
, CURLOPT_WRITEFUNCTION
, pull_job_write_callback
) != CURLE_OK
)
764 if (curl_easy_setopt(j
->curl
, CURLOPT_WRITEDATA
, j
) != CURLE_OK
)
767 if (curl_easy_setopt(j
->curl
, CURLOPT_HEADERFUNCTION
, pull_job_header_callback
) != CURLE_OK
)
770 if (curl_easy_setopt(j
->curl
, CURLOPT_HEADERDATA
, j
) != CURLE_OK
)
773 if (curl_easy_setopt(j
->curl
, CURLOPT_XFERINFOFUNCTION
, pull_job_progress_callback
) != CURLE_OK
)
776 if (curl_easy_setopt(j
->curl
, CURLOPT_XFERINFODATA
, j
) != CURLE_OK
)
779 if (curl_easy_setopt(j
->curl
, CURLOPT_NOPROGRESS
, 0) != CURLE_OK
)
782 r
= curl_glue_add(j
->glue
, j
->curl
);
786 j
->state
= PULL_JOB_ANALYZING
;