1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
7 #include "alloc-util.h"
9 #include "format-util.h"
10 #include "gcrypt-util.h"
11 #include "hexdecoct.h"
12 #include "import-util.h"
14 #include "machine-pool.h"
15 #include "parse-util.h"
16 #include "pull-common.h"
18 #include "string-util.h"
20 #include "xattr-util.h"
22 PullJob
* pull_job_unref(PullJob
*j
) {
26 curl_glue_remove_and_free(j
->glue
, j
->curl
);
27 curl_slist_free_all(j
->request_header
);
29 safe_close(j
->disk_fd
);
31 import_compress_free(&j
->compress
);
33 if (j
->checksum_context
)
34 gcry_md_close(j
->checksum_context
);
38 strv_free(j
->old_etags
);
45 static void pull_job_finish(PullJob
*j
, int ret
) {
48 if (IN_SET(j
->state
, PULL_JOB_DONE
, PULL_JOB_FAILED
))
52 j
->state
= PULL_JOB_DONE
;
53 j
->progress_percent
= 100;
54 log_info("Download of %s complete.", j
->url
);
56 j
->state
= PULL_JOB_FAILED
;
64 static int pull_job_restart(PullJob
*j
, const char *new_url
) {
70 r
= free_and_strdup(&j
->url
, new_url
);
74 j
->state
= PULL_JOB_INIT
;
76 j
->payload
= mfree(j
->payload
);
78 j
->payload_allocated
= 0;
79 j
->written_compressed
= 0;
80 j
->written_uncompressed
= 0;
81 j
->content_length
= UINT64_MAX
;
82 j
->etag
= mfree(j
->etag
);
83 j
->etag_exists
= false;
85 j
->checksum
= mfree(j
->checksum
);
87 curl_glue_remove_and_free(j
->glue
, j
->curl
);
90 curl_slist_free_all(j
->request_header
);
91 j
->request_header
= NULL
;
93 import_compress_free(&j
->compress
);
95 if (j
->checksum_context
) {
96 gcry_md_close(j
->checksum_context
);
97 j
->checksum_context
= NULL
;
100 r
= pull_job_begin(j
);
107 void pull_job_curl_on_finished(CurlGlue
*g
, CURL
*curl
, CURLcode result
) {
113 if (curl_easy_getinfo(curl
, CURLINFO_PRIVATE
, (char **)&j
) != CURLE_OK
)
116 if (!j
|| IN_SET(j
->state
, PULL_JOB_DONE
, PULL_JOB_FAILED
))
119 if (result
!= CURLE_OK
) {
120 log_error("Transfer failed: %s", curl_easy_strerror(result
));
125 code
= curl_easy_getinfo(curl
, CURLINFO_RESPONSE_CODE
, &status
);
126 if (code
!= CURLE_OK
) {
127 log_error("Failed to retrieve response code: %s", curl_easy_strerror(code
));
130 } else if (status
== 304) {
131 log_info("Image already downloaded. Skipping download.");
132 j
->etag_exists
= true;
135 } else if (status
>= 300) {
137 if (status
== 404 && j
->on_not_found
) {
138 _cleanup_free_
char *new_url
= NULL
;
140 /* This resource wasn't found, but the implementor wants to maybe let us know a new URL, query for it. */
141 r
= j
->on_not_found(j
, &new_url
);
145 if (r
> 0) { /* A new url to use */
148 r
= pull_job_restart(j
, new_url
);
152 code
= curl_easy_getinfo(j
->curl
, CURLINFO_RESPONSE_CODE
, &status
);
153 if (code
!= CURLE_OK
) {
154 log_error("Failed to retrieve response code: %s", curl_easy_strerror(code
));
164 log_error("HTTP request to %s failed with code %li.", j
->url
, status
);
167 } else if (status
< 200) {
168 log_error("HTTP request to %s finished with unexpected code %li.", j
->url
, status
);
173 if (j
->state
!= PULL_JOB_RUNNING
) {
174 log_error("Premature connection termination.");
179 if (j
->content_length
!= (uint64_t) -1 &&
180 j
->content_length
!= j
->written_compressed
) {
181 log_error("Download truncated.");
186 if (j
->checksum_context
) {
189 k
= gcry_md_read(j
->checksum_context
, GCRY_MD_SHA256
);
191 log_error("Failed to get checksum.");
196 j
->checksum
= hexmem(k
, gcry_md_get_algo_dlen(GCRY_MD_SHA256
));
202 log_debug("SHA256 of %s is %s.", j
->url
, j
->checksum
);
205 if (j
->disk_fd
>= 0 && j
->allow_sparse
) {
206 /* Make sure the file size is right, in case the file was
207 * sparse and we just seeked for the last part */
209 if (ftruncate(j
->disk_fd
, j
->written_uncompressed
) < 0) {
210 r
= log_error_errno(errno
, "Failed to truncate file: %m");
215 (void) fsetxattr(j
->disk_fd
, "user.source_etag", j
->etag
, strlen(j
->etag
), 0);
217 (void) fsetxattr(j
->disk_fd
, "user.source_url", j
->url
, strlen(j
->url
), 0);
220 struct timespec ut
[2];
222 timespec_store(&ut
[0], j
->mtime
);
224 (void) futimens(j
->disk_fd
, ut
);
226 (void) fd_setcrtime(j
->disk_fd
, j
->mtime
);
233 pull_job_finish(j
, r
);
236 static int pull_job_write_uncompressed(const void *p
, size_t sz
, void *userdata
) {
237 PullJob
*j
= userdata
;
246 if (j
->written_uncompressed
+ sz
< j
->written_uncompressed
)
247 return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW
),
248 "File too large, overflow");
250 if (j
->written_uncompressed
+ sz
> j
->uncompressed_max
)
251 return log_error_errno(SYNTHETIC_ERRNO(EFBIG
),
252 "File overly large, refusing");
254 if (j
->disk_fd
>= 0) {
257 n
= sparse_write(j
->disk_fd
, p
, sz
, 64);
259 n
= write(j
->disk_fd
, p
, sz
);
264 return log_error_errno((int) n
, "Failed to write file: %m");
266 return log_error_errno(SYNTHETIC_ERRNO(EIO
), "Short write");
269 if (!GREEDY_REALLOC(j
->payload
, j
->payload_allocated
, j
->payload_size
+ sz
))
272 memcpy(j
->payload
+ j
->payload_size
, p
, sz
);
273 j
->payload_size
+= sz
;
276 j
->written_uncompressed
+= sz
;
281 static int pull_job_write_compressed(PullJob
*j
, void *p
, size_t sz
) {
290 if (j
->written_compressed
+ sz
< j
->written_compressed
)
291 return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW
), "File too large, overflow");
293 if (j
->written_compressed
+ sz
> j
->compressed_max
)
294 return log_error_errno(SYNTHETIC_ERRNO(EFBIG
), "File overly large, refusing.");
296 if (j
->content_length
!= (uint64_t) -1 &&
297 j
->written_compressed
+ sz
> j
->content_length
)
298 return log_error_errno(SYNTHETIC_ERRNO(EFBIG
),
299 "Content length incorrect.");
301 if (j
->checksum_context
)
302 gcry_md_write(j
->checksum_context
, p
, sz
);
304 r
= import_uncompress(&j
->compress
, p
, sz
, pull_job_write_uncompressed
, j
);
308 j
->written_compressed
+= sz
;
313 static int pull_job_open_disk(PullJob
*j
) {
318 if (j
->on_open_disk
) {
319 r
= j
->on_open_disk(j
);
324 if (j
->disk_fd
>= 0) {
325 /* Check if we can do sparse files */
327 if (lseek(j
->disk_fd
, SEEK_SET
, 0) == 0)
328 j
->allow_sparse
= true;
331 return log_error_errno(errno
, "Failed to seek on file descriptor: %m");
333 j
->allow_sparse
= false;
337 if (j
->calc_checksum
) {
338 initialize_libgcrypt(false);
340 if (gcry_md_open(&j
->checksum_context
, GCRY_MD_SHA256
, 0) != 0)
341 return log_error_errno(SYNTHETIC_ERRNO(EIO
),
342 "Failed to initialize hash context.");
348 static int pull_job_detect_compression(PullJob
*j
) {
349 _cleanup_free_
uint8_t *stub
= NULL
;
356 r
= import_uncompress_detect(&j
->compress
, j
->payload
, j
->payload_size
);
358 return log_error_errno(r
, "Failed to initialize compressor: %m");
362 log_debug("Stream is compressed: %s", import_compress_type_to_string(j
->compress
.type
));
364 r
= pull_job_open_disk(j
);
368 /* Now, take the payload we read so far, and decompress it */
370 stub_size
= j
->payload_size
;
374 j
->payload_allocated
= 0;
376 j
->state
= PULL_JOB_RUNNING
;
378 r
= pull_job_write_compressed(j
, stub
, stub_size
);
385 static size_t pull_job_write_callback(void *contents
, size_t size
, size_t nmemb
, void *userdata
) {
386 PullJob
*j
= userdata
;
387 size_t sz
= size
* nmemb
;
395 case PULL_JOB_ANALYZING
:
396 /* Let's first check what it actually is */
398 if (!GREEDY_REALLOC(j
->payload
, j
->payload_allocated
, j
->payload_size
+ sz
)) {
403 memcpy(j
->payload
+ j
->payload_size
, contents
, sz
);
404 j
->payload_size
+= sz
;
406 r
= pull_job_detect_compression(j
);
412 case PULL_JOB_RUNNING
:
414 r
= pull_job_write_compressed(j
, contents
, sz
);
421 case PULL_JOB_FAILED
:
426 assert_not_reached("Impossible state.");
432 pull_job_finish(j
, r
);
436 static size_t pull_job_header_callback(void *contents
, size_t size
, size_t nmemb
, void *userdata
) {
437 _cleanup_free_
char *length
= NULL
, *last_modified
= NULL
, *etag
= NULL
;
438 PullJob
*j
= userdata
;
439 size_t sz
= size
* nmemb
;
445 if (IN_SET(j
->state
, PULL_JOB_DONE
, PULL_JOB_FAILED
)) {
450 assert(j
->state
== PULL_JOB_ANALYZING
);
452 r
= curl_header_strdup(contents
, sz
, "ETag:", &etag
);
458 free_and_replace(j
->etag
, etag
);
460 if (strv_contains(j
->old_etags
, j
->etag
)) {
461 log_info("Image already downloaded. Skipping download.");
462 j
->etag_exists
= true;
463 pull_job_finish(j
, 0);
470 r
= curl_header_strdup(contents
, sz
, "Content-Length:", &length
);
476 (void) safe_atou64(length
, &j
->content_length
);
478 if (j
->content_length
!= (uint64_t) -1) {
479 char bytes
[FORMAT_BYTES_MAX
];
481 if (j
->content_length
> j
->compressed_max
) {
482 log_error("Content too large.");
487 log_info("Downloading %s for %s.", format_bytes(bytes
, sizeof(bytes
), j
->content_length
), j
->url
);
493 r
= curl_header_strdup(contents
, sz
, "Last-Modified:", &last_modified
);
499 (void) curl_parse_http_time(last_modified
, &j
->mtime
);
504 r
= j
->on_header(j
, contents
, sz
);
512 pull_job_finish(j
, r
);
516 static int pull_job_progress_callback(void *userdata
, curl_off_t dltotal
, curl_off_t dlnow
, curl_off_t ultotal
, curl_off_t ulnow
) {
517 PullJob
*j
= userdata
;
526 percent
= ((100 * dlnow
) / dltotal
);
527 n
= now(CLOCK_MONOTONIC
);
529 if (n
> j
->last_status_usec
+ USEC_PER_SEC
&&
530 percent
!= j
->progress_percent
&&
532 char buf
[FORMAT_TIMESPAN_MAX
];
534 if (n
- j
->start_usec
> USEC_PER_SEC
&& dlnow
> 0) {
535 char y
[FORMAT_BYTES_MAX
];
538 done
= n
- j
->start_usec
;
539 left
= (usec_t
) (((double) done
* (double) dltotal
) / dlnow
) - done
;
541 log_info("Got %u%% of %s. %s left at %s/s.",
544 format_timespan(buf
, sizeof(buf
), left
, USEC_PER_SEC
),
545 format_bytes(y
, sizeof(y
), (uint64_t) ((double) dlnow
/ ((double) done
/ (double) USEC_PER_SEC
))));
547 log_info("Got %u%% of %s.", percent
, j
->url
);
549 j
->progress_percent
= percent
;
550 j
->last_status_usec
= n
;
559 int pull_job_new(PullJob
**ret
, const char *url
, CurlGlue
*glue
, void *userdata
) {
560 _cleanup_(pull_job_unrefp
) PullJob
*j
= NULL
;
561 _cleanup_free_
char *u
= NULL
;
576 .state
= PULL_JOB_INIT
,
578 .userdata
= userdata
,
580 .content_length
= (uint64_t) -1,
581 .start_usec
= now(CLOCK_MONOTONIC
),
582 .compressed_max
= 64LLU * 1024LLU * 1024LLU * 1024LLU, /* 64GB safety limit */
583 .uncompressed_max
= 64LLU * 1024LLU * 1024LLU * 1024LLU, /* 64GB safety limit */
592 int pull_job_begin(PullJob
*j
) {
597 if (j
->state
!= PULL_JOB_INIT
)
600 r
= curl_glue_make(&j
->curl
, j
->url
, j
);
604 if (!strv_isempty(j
->old_etags
)) {
605 _cleanup_free_
char *cc
= NULL
, *hdr
= NULL
;
607 cc
= strv_join(j
->old_etags
, ", ");
611 hdr
= strjoin("If-None-Match: ", cc
);
615 if (!j
->request_header
) {
616 j
->request_header
= curl_slist_new(hdr
, NULL
);
617 if (!j
->request_header
)
620 struct curl_slist
*l
;
622 l
= curl_slist_append(j
->request_header
, hdr
);
626 j
->request_header
= l
;
630 if (j
->request_header
) {
631 if (curl_easy_setopt(j
->curl
, CURLOPT_HTTPHEADER
, j
->request_header
) != CURLE_OK
)
635 if (curl_easy_setopt(j
->curl
, CURLOPT_WRITEFUNCTION
, pull_job_write_callback
) != CURLE_OK
)
638 if (curl_easy_setopt(j
->curl
, CURLOPT_WRITEDATA
, j
) != CURLE_OK
)
641 if (curl_easy_setopt(j
->curl
, CURLOPT_HEADERFUNCTION
, pull_job_header_callback
) != CURLE_OK
)
644 if (curl_easy_setopt(j
->curl
, CURLOPT_HEADERDATA
, j
) != CURLE_OK
)
647 if (curl_easy_setopt(j
->curl
, CURLOPT_XFERINFOFUNCTION
, pull_job_progress_callback
) != CURLE_OK
)
650 if (curl_easy_setopt(j
->curl
, CURLOPT_XFERINFODATA
, j
) != CURLE_OK
)
653 if (curl_easy_setopt(j
->curl
, CURLOPT_NOPROGRESS
, 0) != CURLE_OK
)
656 r
= curl_glue_add(j
->glue
, j
->curl
);
660 j
->state
= PULL_JOB_ANALYZING
;