src/import/pull-job.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2 /***
   3   This file is part of systemd.
   4
   5   Copyright 2015 Lennart Poettering
   6
   7   systemd is free software; you can redistribute it and/or modify it
   8   under the terms of the GNU Lesser General Public License as published by
   9   the Free Software Foundation; either version 2.1 of the License, or
  10   (at your option) any later version.
  11
  12   systemd is distributed in the hope that it will be useful, but
  13   WITHOUT ANY WARRANTY; without even the implied warranty of
  14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15   Lesser General Public License for more details.
  16
  17   You should have received a copy of the GNU Lesser General Public License
  18   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  19 ***/
  20
  21 #include <sys/xattr.h>
  22
  23 #include "alloc-util.h"
  24 #include "fd-util.h"
  25 #include "hexdecoct.h"
  26 #include "import-util.h"
  27 #include "io-util.h"
  28 #include "machine-pool.h"
  29 #include "parse-util.h"
  30 #include "pull-common.h"
  31 #include "pull-job.h"
  32 #include "string-util.h"
  33 #include "strv.h"
  34 #include "xattr-util.h"
  35
  36 PullJob* pull_job_unref(PullJob *j) {
  37         if (!j)
  38                 return NULL;
  39
  40         curl_glue_remove_and_free(j->glue, j->curl);
  41         curl_slist_free_all(j->request_header);
  42
  43         safe_close(j->disk_fd);
  44
  45         import_compress_free(&j->compress);
  46
  47         if (j->checksum_context)
  48                 gcry_md_close(j->checksum_context);
  49
  50         free(j->url);
  51         free(j->etag);
  52         strv_free(j->old_etags);
  53         free(j->payload);
  54         free(j->checksum);
  55
  56         return mfree(j);
  57 }
  58
  59 static void pull_job_finish(PullJob *j, int ret) {
  60         assert(j);
  61
  62         if (IN_SET(j->state, PULL_JOB_DONE, PULL_JOB_FAILED))
  63                 return;
  64
  65         if (ret == 0) {
  66                 j->state = PULL_JOB_DONE;
  67                 j->progress_percent = 100;
  68                 log_info("Download of %s complete.", j->url);
  69         } else {
  70                 j->state = PULL_JOB_FAILED;
  71                 j->error = ret;
  72         }
  73
  74         if (j->on_finished)
  75                 j->on_finished(j);
  76 }
  77
  78 static int pull_job_restart(PullJob *j) {
  79         int r;
  80         char *chksum_url = NULL;
  81
  82         r = import_url_change_last_component(j->url, "SHA256SUMS", &chksum_url);
  83         if (r < 0)
  84                 return r;
  85
  86         free(j->url);
  87         j->url = chksum_url;
  88         j->state = PULL_JOB_INIT;
  89         j->payload = mfree(j->payload);
  90         j->payload_size = 0;
  91         j->payload_allocated = 0;
  92         j->written_compressed = 0;
  93         j->written_uncompressed = 0;
  94         j->written_since_last_grow = 0;
  95
  96         r = pull_job_begin(j);
  97         if (r < 0)
  98                 return r;
  99
 100         return 0;
 101 }
 102
 103 void pull_job_curl_on_finished(CurlGlue *g, CURL *curl, CURLcode result) {
 104         PullJob *j = NULL;
 105         CURLcode code;
 106         long status;
 107         int r;
 108
 109         if (curl_easy_getinfo(curl, CURLINFO_PRIVATE, (char **)&j) != CURLE_OK)
 110                 return;
 111
 112         if (!j || IN_SET(j->state, PULL_JOB_DONE, PULL_JOB_FAILED))
 113                 return;
 114
 115         if (result != CURLE_OK) {
 116                 log_error("Transfer failed: %s", curl_easy_strerror(result));
 117                 r = -EIO;
 118                 goto finish;
 119         }
 120
 121         code = curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &status);
 122         if (code != CURLE_OK) {
 123                 log_error("Failed to retrieve response code: %s", curl_easy_strerror(code));
 124                 r = -EIO;
 125                 goto finish;
 126         } else if (status == 304) {
 127                 log_info("Image already downloaded. Skipping download.");
 128                 j->etag_exists = true;
 129                 r = 0;
 130                 goto finish;
 131         } else if (status >= 300) {
 132                 if (status == 404 && j->style == VERIFICATION_PER_FILE) {
 133
 134                         /* retry pull job with SHA256SUMS file */
 135                         r = pull_job_restart(j);
 136                         if (r < 0)
 137                                 goto finish;
 138
 139                         code = curl_easy_getinfo(j->curl, CURLINFO_RESPONSE_CODE, &status);
 140                         if (code != CURLE_OK) {
 141                                 log_error("Failed to retrieve response code: %s", curl_easy_strerror(code));
 142                                 r = -EIO;
 143                                 goto finish;
 144                         }
 145
 146                         if (status == 0) {
 147                                 j->style = VERIFICATION_PER_DIRECTORY;
 148                                 return;
 149                         }
 150                 }
 151
 152                 log_error("HTTP request to %s failed with code %li.", j->url, status);
 153                 r = -EIO;
 154                 goto finish;
 155         } else if (status < 200) {
 156                 log_error("HTTP request to %s finished with unexpected code %li.", j->url, status);
 157                 r = -EIO;
 158                 goto finish;
 159         }
 160
 161         if (j->state != PULL_JOB_RUNNING) {
 162                 log_error("Premature connection termination.");
 163                 r = -EIO;
 164                 goto finish;
 165         }
 166
 167         if (j->content_length != (uint64_t) -1 &&
 168             j->content_length != j->written_compressed) {
 169                 log_error("Download truncated.");
 170                 r = -EIO;
 171                 goto finish;
 172         }
 173
 174         if (j->checksum_context) {
 175                 uint8_t *k;
 176
 177                 k = gcry_md_read(j->checksum_context, GCRY_MD_SHA256);
 178                 if (!k) {
 179                         log_error("Failed to get checksum.");
 180                         r = -EIO;
 181                         goto finish;
 182                 }
 183
 184                 j->checksum = hexmem(k, gcry_md_get_algo_dlen(GCRY_MD_SHA256));
 185                 if (!j->checksum) {
 186                         r = log_oom();
 187                         goto finish;
 188                 }
 189
 190                 log_debug("SHA256 of %s is %s.", j->url, j->checksum);
 191         }
 192
 193         if (j->disk_fd >= 0 && j->allow_sparse) {
 194                 /* Make sure the file size is right, in case the file was
 195                  * sparse and we just seeked for the last part */
 196
 197                 if (ftruncate(j->disk_fd, j->written_uncompressed) < 0) {
 198                         r = log_error_errno(errno, "Failed to truncate file: %m");
 199                         goto finish;
 200                 }
 201
 202                 if (j->etag)
 203                         (void) fsetxattr(j->disk_fd, "user.source_etag", j->etag, strlen(j->etag), 0);
 204                 if (j->url)
 205                         (void) fsetxattr(j->disk_fd, "user.source_url", j->url, strlen(j->url), 0);
 206
 207                 if (j->mtime != 0) {
 208                         struct timespec ut[2];
 209
 210                         timespec_store(&ut[0], j->mtime);
 211                         ut[1] = ut[0];
 212                         (void) futimens(j->disk_fd, ut);
 213
 214                         (void) fd_setcrtime(j->disk_fd, j->mtime);
 215                 }
 216         }
 217
 218         r = 0;
 219
 220 finish:
 221         pull_job_finish(j, r);
 222 }
 223
 224 static int pull_job_write_uncompressed(const void *p, size_t sz, void *userdata) {
 225         PullJob *j = userdata;
 226         ssize_t n;
 227
 228         assert(j);
 229         assert(p);
 230
 231         if (sz <= 0)
 232                 return 0;
 233
 234         if (j->written_uncompressed + sz < j->written_uncompressed) {
 235                 log_error("File too large, overflow");
 236                 return -EOVERFLOW;
 237         }
 238
 239         if (j->written_uncompressed + sz > j->uncompressed_max) {
 240                 log_error("File overly large, refusing");
 241                 return -EFBIG;
 242         }
 243
 244         if (j->disk_fd >= 0) {
 245
 246                 if (j->grow_machine_directory && j->written_since_last_grow >= GROW_INTERVAL_BYTES) {
 247                         j->written_since_last_grow = 0;
 248                         grow_machine_directory();
 249                 }
 250
 251                 if (j->allow_sparse)
 252                         n = sparse_write(j->disk_fd, p, sz, 64);
 253                 else
 254                         n = write(j->disk_fd, p, sz);
 255                 if (n < 0)
 256                         return log_error_errno(errno, "Failed to write file: %m");
 257                 if ((size_t) n < sz) {
 258                         log_error("Short write");
 259                         return -EIO;
 260                 }
 261         } else {
 262
 263                 if (!GREEDY_REALLOC(j->payload, j->payload_allocated, j->payload_size + sz))
 264                         return log_oom();
 265
 266                 memcpy(j->payload + j->payload_size, p, sz);
 267                 j->payload_size += sz;
 268         }
 269
 270         j->written_uncompressed += sz;
 271         j->written_since_last_grow += sz;
 272
 273         return 0;
 274 }
 275
 276 static int pull_job_write_compressed(PullJob *j, void *p, size_t sz) {
 277         int r;
 278
 279         assert(j);
 280         assert(p);
 281
 282         if (sz <= 0)
 283                 return 0;
 284
 285         if (j->written_compressed + sz < j->written_compressed) {
 286                 log_error("File too large, overflow");
 287                 return -EOVERFLOW;
 288         }
 289
 290         if (j->written_compressed + sz > j->compressed_max) {
 291                 log_error("File overly large, refusing.");
 292                 return -EFBIG;
 293         }
 294
 295         if (j->content_length != (uint64_t) -1 &&
 296             j->written_compressed + sz > j->content_length) {
 297                 log_error("Content length incorrect.");
 298                 return -EFBIG;
 299         }
 300
 301         if (j->checksum_context)
 302                 gcry_md_write(j->checksum_context, p, sz);
 303
 304         r = import_uncompress(&j->compress, p, sz, pull_job_write_uncompressed, j);
 305         if (r < 0)
 306                 return r;
 307
 308         j->written_compressed += sz;
 309
 310         return 0;
 311 }
 312
 313 static int pull_job_open_disk(PullJob *j) {
 314         int r;
 315
 316         assert(j);
 317
 318         if (j->on_open_disk) {
 319                 r = j->on_open_disk(j);
 320                 if (r < 0)
 321                         return r;
 322         }
 323
 324         if (j->disk_fd >= 0) {
 325                 /* Check if we can do sparse files */
 326
 327                 if (lseek(j->disk_fd, SEEK_SET, 0) == 0)
 328                         j->allow_sparse = true;
 329                 else {
 330                         if (errno != ESPIPE)
 331                                 return log_error_errno(errno, "Failed to seek on file descriptor: %m");
 332
 333                         j->allow_sparse = false;
 334                 }
 335         }
 336
 337         if (j->calc_checksum) {
 338                 if (gcry_md_open(&j->checksum_context, GCRY_MD_SHA256, 0) != 0) {
 339                         log_error("Failed to initialize hash context.");
 340                         return -EIO;
 341                 }
 342         }
 343
 344         return 0;
 345 }
 346
 347 static int pull_job_detect_compression(PullJob *j) {
 348         _cleanup_free_ uint8_t *stub = NULL;
 349         size_t stub_size;
 350
 351         int r;
 352
 353         assert(j);
 354
 355         r = import_uncompress_detect(&j->compress, j->payload, j->payload_size);
 356         if (r < 0)
 357                 return log_error_errno(r, "Failed to initialize compressor: %m");
 358         if (r == 0)
 359                 return 0;
 360
 361         log_debug("Stream is compressed: %s", import_compress_type_to_string(j->compress.type));
 362
 363         r = pull_job_open_disk(j);
 364         if (r < 0)
 365                 return r;
 366
 367         /* Now, take the payload we read so far, and decompress it */
 368         stub = j->payload;
 369         stub_size = j->payload_size;
 370
 371         j->payload = NULL;
 372         j->payload_size = 0;
 373         j->payload_allocated = 0;
 374
 375         j->state = PULL_JOB_RUNNING;
 376
 377         r = pull_job_write_compressed(j, stub, stub_size);
 378         if (r < 0)
 379                 return r;
 380
 381         return 0;
 382 }
 383
 384 static size_t pull_job_write_callback(void *contents, size_t size, size_t nmemb, void *userdata) {
 385         PullJob *j = userdata;
 386         size_t sz = size * nmemb;
 387         int r;
 388
 389         assert(contents);
 390         assert(j);
 391
 392         switch (j->state) {
 393
 394         case PULL_JOB_ANALYZING:
 395                 /* Let's first check what it actually is */
 396
 397                 if (!GREEDY_REALLOC(j->payload, j->payload_allocated, j->payload_size + sz)) {
 398                         r = log_oom();
 399                         goto fail;
 400                 }
 401
 402                 memcpy(j->payload + j->payload_size, contents, sz);
 403                 j->payload_size += sz;
 404
 405                 r = pull_job_detect_compression(j);
 406                 if (r < 0)
 407                         goto fail;
 408
 409                 break;
 410
 411         case PULL_JOB_RUNNING:
 412
 413                 r = pull_job_write_compressed(j, contents, sz);
 414                 if (r < 0)
 415                         goto fail;
 416
 417                 break;
 418
 419         case PULL_JOB_DONE:
 420         case PULL_JOB_FAILED:
 421                 r = -ESTALE;
 422                 goto fail;
 423
 424         default:
 425                 assert_not_reached("Impossible state.");
 426         }
 427
 428         return sz;
 429
 430 fail:
 431         pull_job_finish(j, r);
 432         return 0;
 433 }
 434
 435 static size_t pull_job_header_callback(void *contents, size_t size, size_t nmemb, void *userdata) {
 436         PullJob *j = userdata;
 437         size_t sz = size * nmemb;
 438         _cleanup_free_ char *length = NULL, *last_modified = NULL;
 439         char *etag;
 440         int r;
 441
 442         assert(contents);
 443         assert(j);
 444
 445         if (IN_SET(j->state, PULL_JOB_DONE, PULL_JOB_FAILED)) {
 446                 r = -ESTALE;
 447                 goto fail;
 448         }
 449
 450         assert(j->state == PULL_JOB_ANALYZING);
 451
 452         r = curl_header_strdup(contents, sz, "ETag:", &etag);
 453         if (r < 0) {
 454                 log_oom();
 455                 goto fail;
 456         }
 457         if (r > 0) {
 458                 free(j->etag);
 459                 j->etag = etag;
 460
 461                 if (strv_contains(j->old_etags, j->etag)) {
 462                         log_info("Image already downloaded. Skipping download.");
 463                         j->etag_exists = true;
 464                         pull_job_finish(j, 0);
 465                         return sz;
 466                 }
 467
 468                 return sz;
 469         }
 470
 471         r = curl_header_strdup(contents, sz, "Content-Length:", &length);
 472         if (r < 0) {
 473                 log_oom();
 474                 goto fail;
 475         }
 476         if (r > 0) {
 477                 (void) safe_atou64(length, &j->content_length);
 478
 479                 if (j->content_length != (uint64_t) -1) {
 480                         char bytes[FORMAT_BYTES_MAX];
 481
 482                         if (j->content_length > j->compressed_max) {
 483                                 log_error("Content too large.");
 484                                 r = -EFBIG;
 485                                 goto fail;
 486                         }
 487
 488                         log_info("Downloading %s for %s.", format_bytes(bytes, sizeof(bytes), j->content_length), j->url);
 489                 }
 490
 491                 return sz;
 492         }
 493
 494         r = curl_header_strdup(contents, sz, "Last-Modified:", &last_modified);
 495         if (r < 0) {
 496                 log_oom();
 497                 goto fail;
 498         }
 499         if (r > 0) {
 500                 (void) curl_parse_http_time(last_modified, &j->mtime);
 501                 return sz;
 502         }
 503
 504         if (j->on_header) {
 505                 r = j->on_header(j, contents, sz);
 506                 if (r < 0)
 507                         goto fail;
 508         }
 509
 510         return sz;
 511
 512 fail:
 513         pull_job_finish(j, r);
 514         return 0;
 515 }
 516
 517 static int pull_job_progress_callback(void *userdata, curl_off_t dltotal, curl_off_t dlnow, curl_off_t ultotal, curl_off_t ulnow) {
 518         PullJob *j = userdata;
 519         unsigned percent;
 520         usec_t n;
 521
 522         assert(j);
 523
 524         if (dltotal <= 0)
 525                 return 0;
 526
 527         percent = ((100 * dlnow) / dltotal);
 528         n = now(CLOCK_MONOTONIC);
 529
 530         if (n > j->last_status_usec + USEC_PER_SEC &&
 531             percent != j->progress_percent &&
 532             dlnow < dltotal) {
 533                 char buf[FORMAT_TIMESPAN_MAX];
 534
 535                 if (n - j->start_usec > USEC_PER_SEC && dlnow > 0) {
 536                         char y[FORMAT_BYTES_MAX];
 537                         usec_t left, done;
 538
 539                         done = n - j->start_usec;
 540                         left = (usec_t) (((double) done * (double) dltotal) / dlnow) - done;
 541
 542                         log_info("Got %u%% of %s. %s left at %s/s.",
 543                                  percent,
 544                                  j->url,
 545                                  format_timespan(buf, sizeof(buf), left, USEC_PER_SEC),
 546                                  format_bytes(y, sizeof(y), (uint64_t) ((double) dlnow / ((double) done / (double) USEC_PER_SEC))));
 547                 } else
 548                         log_info("Got %u%% of %s.", percent, j->url);
 549
 550                 j->progress_percent = percent;
 551                 j->last_status_usec = n;
 552
 553                 if (j->on_progress)
 554                         j->on_progress(j);
 555         }
 556
 557         return 0;
 558 }
 559
 560 int pull_job_new(PullJob **ret, const char *url, CurlGlue *glue, void *userdata) {
 561         _cleanup_(pull_job_unrefp) PullJob *j = NULL;
 562
 563         assert(url);
 564         assert(glue);
 565         assert(ret);
 566
 567         j = new0(PullJob, 1);
 568         if (!j)
 569                 return -ENOMEM;
 570
 571         j->state = PULL_JOB_INIT;
 572         j->disk_fd = -1;
 573         j->userdata = userdata;
 574         j->glue = glue;
 575         j->content_length = (uint64_t) -1;
 576         j->start_usec = now(CLOCK_MONOTONIC);
 577         j->compressed_max = j->uncompressed_max = 64LLU * 1024LLU * 1024LLU * 1024LLU; /* 64GB safety limit */
 578         j->style = VERIFICATION_STYLE_UNSET;
 579
 580         j->url = strdup(url);
 581         if (!j->url)
 582                 return -ENOMEM;
 583
 584         *ret = j;
 585         j = NULL;
 586
 587         return 0;
 588 }
 589
 590 int pull_job_begin(PullJob *j) {
 591         int r;
 592
 593         assert(j);
 594
 595         if (j->state != PULL_JOB_INIT)
 596                 return -EBUSY;
 597
 598         if (j->grow_machine_directory)
 599                 grow_machine_directory();
 600
 601         r = curl_glue_make(&j->curl, j->url, j);
 602         if (r < 0)
 603                 return r;
 604
 605         if (!strv_isempty(j->old_etags)) {
 606                 _cleanup_free_ char *cc = NULL, *hdr = NULL;
 607
 608                 cc = strv_join(j->old_etags, ", ");
 609                 if (!cc)
 610                         return -ENOMEM;
 611
 612                 hdr = strappend("If-None-Match: ", cc);
 613                 if (!hdr)
 614                         return -ENOMEM;
 615
 616                 if (!j->request_header) {
 617                         j->request_header = curl_slist_new(hdr, NULL);
 618                         if (!j->request_header)
 619                                 return -ENOMEM;
 620                 } else {
 621                         struct curl_slist *l;
 622
 623                         l = curl_slist_append(j->request_header, hdr);
 624                         if (!l)
 625                                 return -ENOMEM;
 626
 627                         j->request_header = l;
 628                 }
 629         }
 630
 631         if (j->request_header) {
 632                 if (curl_easy_setopt(j->curl, CURLOPT_HTTPHEADER, j->request_header) != CURLE_OK)
 633                         return -EIO;
 634         }
 635
 636         if (curl_easy_setopt(j->curl, CURLOPT_WRITEFUNCTION, pull_job_write_callback) != CURLE_OK)
 637                 return -EIO;
 638
 639         if (curl_easy_setopt(j->curl, CURLOPT_WRITEDATA, j) != CURLE_OK)
 640                 return -EIO;
 641
 642         if (curl_easy_setopt(j->curl, CURLOPT_HEADERFUNCTION, pull_job_header_callback) != CURLE_OK)
 643                 return -EIO;
 644
 645         if (curl_easy_setopt(j->curl, CURLOPT_HEADERDATA, j) != CURLE_OK)
 646                 return -EIO;
 647
 648         if (curl_easy_setopt(j->curl, CURLOPT_XFERINFOFUNCTION, pull_job_progress_callback) != CURLE_OK)
 649                 return -EIO;
 650
 651         if (curl_easy_setopt(j->curl, CURLOPT_XFERINFODATA, j) != CURLE_OK)
 652                 return -EIO;
 653
 654         if (curl_easy_setopt(j->curl, CURLOPT_NOPROGRESS, 0) != CURLE_OK)
 655                 return -EIO;
 656
 657         r = curl_glue_add(j->glue, j->curl);
 658         if (r < 0)
 659                 return r;
 660
 661         j->state = PULL_JOB_ANALYZING;
 662
 663         return 0;
 664 }