src/import/pull-job.c

   1 /***
   2   This file is part of systemd.
   3
   4   Copyright 2015 Lennart Poettering
   5
   6   systemd is free software; you can redistribute it and/or modify it
   7   under the terms of the GNU Lesser General Public License as published by
   8   the Free Software Foundation; either version 2.1 of the License, or
   9   (at your option) any later version.
  10
  11   systemd is distributed in the hope that it will be useful, but
  12   WITHOUT ANY WARRANTY; without even the implied warranty of
  13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14   Lesser General Public License for more details.
  15
  16   You should have received a copy of the GNU Lesser General Public License
  17   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  18 ***/
  19
  20 #include <sys/xattr.h>
  21
  22 #include "alloc-util.h"
  23 #include "fd-util.h"
  24 #include "hexdecoct.h"
  25 #include "import-util.h"
  26 #include "io-util.h"
  27 #include "machine-pool.h"
  28 #include "parse-util.h"
  29 #include "pull-common.h"
  30 #include "pull-job.h"
  31 #include "string-util.h"
  32 #include "strv.h"
  33 #include "xattr-util.h"
  34
  35 PullJob* pull_job_unref(PullJob *j) {
  36         if (!j)
  37                 return NULL;
  38
  39         curl_glue_remove_and_free(j->glue, j->curl);
  40         curl_slist_free_all(j->request_header);
  41
  42         safe_close(j->disk_fd);
  43
  44         import_compress_free(&j->compress);
  45
  46         if (j->checksum_context)
  47                 gcry_md_close(j->checksum_context);
  48
  49         free(j->url);
  50         free(j->etag);
  51         strv_free(j->old_etags);
  52         free(j->payload);
  53         free(j->checksum);
  54
  55         return mfree(j);
  56 }
  57
  58 static void pull_job_finish(PullJob *j, int ret) {
  59         assert(j);
  60
  61         if (IN_SET(j->state, PULL_JOB_DONE, PULL_JOB_FAILED))
  62                 return;
  63
  64         if (ret == 0) {
  65                 j->state = PULL_JOB_DONE;
  66                 j->progress_percent = 100;
  67                 log_info("Download of %s complete.", j->url);
  68         } else {
  69                 j->state = PULL_JOB_FAILED;
  70                 j->error = ret;
  71         }
  72
  73         if (j->on_finished)
  74                 j->on_finished(j);
  75 }
  76
  77 static int pull_job_restart(PullJob *j) {
  78         int r;
  79         char *chksum_url = NULL;
  80
  81         r = import_url_change_last_component(j->url, "SHA256SUMS", &chksum_url);
  82         if (r < 0)
  83                 return r;
  84
  85         free(j->url);
  86         j->url = chksum_url;
  87         j->state = PULL_JOB_INIT;
  88         j->payload = mfree(j->payload);
  89         j->payload_size = 0;
  90         j->payload_allocated = 0;
  91         j->written_compressed = 0;
  92         j->written_uncompressed = 0;
  93         j->written_since_last_grow = 0;
  94
  95         r = pull_job_begin(j);
  96         if (r < 0)
  97                 return r;
  98
  99         return 0;
 100 }
 101
 102 void pull_job_curl_on_finished(CurlGlue *g, CURL *curl, CURLcode result) {
 103         PullJob *j = NULL;
 104         CURLcode code;
 105         long status;
 106         int r;
 107
 108         if (curl_easy_getinfo(curl, CURLINFO_PRIVATE, (char **)&j) != CURLE_OK)
 109                 return;
 110
 111         if (!j || j->state == PULL_JOB_DONE || j->state == PULL_JOB_FAILED)
 112                 return;
 113
 114         if (result != CURLE_OK) {
 115                 log_error("Transfer failed: %s", curl_easy_strerror(result));
 116                 r = -EIO;
 117                 goto finish;
 118         }
 119
 120         code = curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &status);
 121         if (code != CURLE_OK) {
 122                 log_error("Failed to retrieve response code: %s", curl_easy_strerror(code));
 123                 r = -EIO;
 124                 goto finish;
 125         } else if (status == 304) {
 126                 log_info("Image already downloaded. Skipping download.");
 127                 j->etag_exists = true;
 128                 r = 0;
 129                 goto finish;
 130         } else if (status >= 300) {
 131                 if (status == 404 && j->style == VERIFICATION_PER_FILE) {
 132
 133                         /* retry pull job with SHA256SUMS file */
 134                         r = pull_job_restart(j);
 135                         if (r < 0)
 136                                 goto finish;
 137
 138                         code = curl_easy_getinfo(j->curl, CURLINFO_RESPONSE_CODE, &status);
 139                         if (code != CURLE_OK) {
 140                                 log_error("Failed to retrieve response code: %s", curl_easy_strerror(code));
 141                                 r = -EIO;
 142                                 goto finish;
 143                         }
 144
 145                         if (status == 0) {
 146                                 j->style = VERIFICATION_PER_DIRECTORY;
 147                                 return;
 148                         }
 149                 }
 150
 151                 log_error("HTTP request to %s failed with code %li.", j->url, status);
 152                 r = -EIO;
 153                 goto finish;
 154         } else if (status < 200) {
 155                 log_error("HTTP request to %s finished with unexpected code %li.", j->url, status);
 156                 r = -EIO;
 157                 goto finish;
 158         }
 159
 160         if (j->state != PULL_JOB_RUNNING) {
 161                 log_error("Premature connection termination.");
 162                 r = -EIO;
 163                 goto finish;
 164         }
 165
 166         if (j->content_length != (uint64_t) -1 &&
 167             j->content_length != j->written_compressed) {
 168                 log_error("Download truncated.");
 169                 r = -EIO;
 170                 goto finish;
 171         }
 172
 173         if (j->checksum_context) {
 174                 uint8_t *k;
 175
 176                 k = gcry_md_read(j->checksum_context, GCRY_MD_SHA256);
 177                 if (!k) {
 178                         log_error("Failed to get checksum.");
 179                         r = -EIO;
 180                         goto finish;
 181                 }
 182
 183                 j->checksum = hexmem(k, gcry_md_get_algo_dlen(GCRY_MD_SHA256));
 184                 if (!j->checksum) {
 185                         r = log_oom();
 186                         goto finish;
 187                 }
 188
 189                 log_debug("SHA256 of %s is %s.", j->url, j->checksum);
 190         }
 191
 192         if (j->disk_fd >= 0 && j->allow_sparse) {
 193                 /* Make sure the file size is right, in case the file was
 194                  * sparse and we just seeked for the last part */
 195
 196                 if (ftruncate(j->disk_fd, j->written_uncompressed) < 0) {
 197                         r = log_error_errno(errno, "Failed to truncate file: %m");
 198                         goto finish;
 199                 }
 200
 201                 if (j->etag)
 202                         (void) fsetxattr(j->disk_fd, "user.source_etag", j->etag, strlen(j->etag), 0);
 203                 if (j->url)
 204                         (void) fsetxattr(j->disk_fd, "user.source_url", j->url, strlen(j->url), 0);
 205
 206                 if (j->mtime != 0) {
 207                         struct timespec ut[2];
 208
 209                         timespec_store(&ut[0], j->mtime);
 210                         ut[1] = ut[0];
 211                         (void) futimens(j->disk_fd, ut);
 212
 213                         (void) fd_setcrtime(j->disk_fd, j->mtime);
 214                 }
 215         }
 216
 217         r = 0;
 218
 219 finish:
 220         pull_job_finish(j, r);
 221 }
 222
 223 static int pull_job_write_uncompressed(const void *p, size_t sz, void *userdata) {
 224         PullJob *j = userdata;
 225         ssize_t n;
 226
 227         assert(j);
 228         assert(p);
 229
 230         if (sz <= 0)
 231                 return 0;
 232
 233         if (j->written_uncompressed + sz < j->written_uncompressed) {
 234                 log_error("File too large, overflow");
 235                 return -EOVERFLOW;
 236         }
 237
 238         if (j->written_uncompressed + sz > j->uncompressed_max) {
 239                 log_error("File overly large, refusing");
 240                 return -EFBIG;
 241         }
 242
 243         if (j->disk_fd >= 0) {
 244
 245                 if (j->grow_machine_directory && j->written_since_last_grow >= GROW_INTERVAL_BYTES) {
 246                         j->written_since_last_grow = 0;
 247                         grow_machine_directory();
 248                 }
 249
 250                 if (j->allow_sparse)
 251                         n = sparse_write(j->disk_fd, p, sz, 64);
 252                 else
 253                         n = write(j->disk_fd, p, sz);
 254                 if (n < 0)
 255                         return log_error_errno(errno, "Failed to write file: %m");
 256                 if ((size_t) n < sz) {
 257                         log_error("Short write");
 258                         return -EIO;
 259                 }
 260         } else {
 261
 262                 if (!GREEDY_REALLOC(j->payload, j->payload_allocated, j->payload_size + sz))
 263                         return log_oom();
 264
 265                 memcpy(j->payload + j->payload_size, p, sz);
 266                 j->payload_size += sz;
 267         }
 268
 269         j->written_uncompressed += sz;
 270         j->written_since_last_grow += sz;
 271
 272         return 0;
 273 }
 274
 275 static int pull_job_write_compressed(PullJob *j, void *p, size_t sz) {
 276         int r;
 277
 278         assert(j);
 279         assert(p);
 280
 281         if (sz <= 0)
 282                 return 0;
 283
 284         if (j->written_compressed + sz < j->written_compressed) {
 285                 log_error("File too large, overflow");
 286                 return -EOVERFLOW;
 287         }
 288
 289         if (j->written_compressed + sz > j->compressed_max) {
 290                 log_error("File overly large, refusing.");
 291                 return -EFBIG;
 292         }
 293
 294         if (j->content_length != (uint64_t) -1 &&
 295             j->written_compressed + sz > j->content_length) {
 296                 log_error("Content length incorrect.");
 297                 return -EFBIG;
 298         }
 299
 300         if (j->checksum_context)
 301                 gcry_md_write(j->checksum_context, p, sz);
 302
 303         r = import_uncompress(&j->compress, p, sz, pull_job_write_uncompressed, j);
 304         if (r < 0)
 305                 return r;
 306
 307         j->written_compressed += sz;
 308
 309         return 0;
 310 }
 311
 312 static int pull_job_open_disk(PullJob *j) {
 313         int r;
 314
 315         assert(j);
 316
 317         if (j->on_open_disk) {
 318                 r = j->on_open_disk(j);
 319                 if (r < 0)
 320                         return r;
 321         }
 322
 323         if (j->disk_fd >= 0) {
 324                 /* Check if we can do sparse files */
 325
 326                 if (lseek(j->disk_fd, SEEK_SET, 0) == 0)
 327                         j->allow_sparse = true;
 328                 else {
 329                         if (errno != ESPIPE)
 330                                 return log_error_errno(errno, "Failed to seek on file descriptor: %m");
 331
 332                         j->allow_sparse = false;
 333                 }
 334         }
 335
 336         if (j->calc_checksum) {
 337                 if (gcry_md_open(&j->checksum_context, GCRY_MD_SHA256, 0) != 0) {
 338                         log_error("Failed to initialize hash context.");
 339                         return -EIO;
 340                 }
 341         }
 342
 343         return 0;
 344 }
 345
 346 static int pull_job_detect_compression(PullJob *j) {
 347         _cleanup_free_ uint8_t *stub = NULL;
 348         size_t stub_size;
 349
 350         int r;
 351
 352         assert(j);
 353
 354         r = import_uncompress_detect(&j->compress, j->payload, j->payload_size);
 355         if (r < 0)
 356                 return log_error_errno(r, "Failed to initialize compressor: %m");
 357         if (r == 0)
 358                 return 0;
 359
 360         log_debug("Stream is compressed: %s", import_compress_type_to_string(j->compress.type));
 361
 362         r = pull_job_open_disk(j);
 363         if (r < 0)
 364                 return r;
 365
 366         /* Now, take the payload we read so far, and decompress it */
 367         stub = j->payload;
 368         stub_size = j->payload_size;
 369
 370         j->payload = NULL;
 371         j->payload_size = 0;
 372         j->payload_allocated = 0;
 373
 374         j->state = PULL_JOB_RUNNING;
 375
 376         r = pull_job_write_compressed(j, stub, stub_size);
 377         if (r < 0)
 378                 return r;
 379
 380         return 0;
 381 }
 382
 383 static size_t pull_job_write_callback(void *contents, size_t size, size_t nmemb, void *userdata) {
 384         PullJob *j = userdata;
 385         size_t sz = size * nmemb;
 386         int r;
 387
 388         assert(contents);
 389         assert(j);
 390
 391         switch (j->state) {
 392
 393         case PULL_JOB_ANALYZING:
 394                 /* Let's first check what it actually is */
 395
 396                 if (!GREEDY_REALLOC(j->payload, j->payload_allocated, j->payload_size + sz)) {
 397                         r = log_oom();
 398                         goto fail;
 399                 }
 400
 401                 memcpy(j->payload + j->payload_size, contents, sz);
 402                 j->payload_size += sz;
 403
 404                 r = pull_job_detect_compression(j);
 405                 if (r < 0)
 406                         goto fail;
 407
 408                 break;
 409
 410         case PULL_JOB_RUNNING:
 411
 412                 r = pull_job_write_compressed(j, contents, sz);
 413                 if (r < 0)
 414                         goto fail;
 415
 416                 break;
 417
 418         case PULL_JOB_DONE:
 419         case PULL_JOB_FAILED:
 420                 r = -ESTALE;
 421                 goto fail;
 422
 423         default:
 424                 assert_not_reached("Impossible state.");
 425         }
 426
 427         return sz;
 428
 429 fail:
 430         pull_job_finish(j, r);
 431         return 0;
 432 }
 433
 434 static size_t pull_job_header_callback(void *contents, size_t size, size_t nmemb, void *userdata) {
 435         PullJob *j = userdata;
 436         size_t sz = size * nmemb;
 437         _cleanup_free_ char *length = NULL, *last_modified = NULL;
 438         char *etag;
 439         int r;
 440
 441         assert(contents);
 442         assert(j);
 443
 444         if (IN_SET(j->state, PULL_JOB_DONE, PULL_JOB_FAILED)) {
 445                 r = -ESTALE;
 446                 goto fail;
 447         }
 448
 449         assert(j->state == PULL_JOB_ANALYZING);
 450
 451         r = curl_header_strdup(contents, sz, "ETag:", &etag);
 452         if (r < 0) {
 453                 log_oom();
 454                 goto fail;
 455         }
 456         if (r > 0) {
 457                 free(j->etag);
 458                 j->etag = etag;
 459
 460                 if (strv_contains(j->old_etags, j->etag)) {
 461                         log_info("Image already downloaded. Skipping download.");
 462                         j->etag_exists = true;
 463                         pull_job_finish(j, 0);
 464                         return sz;
 465                 }
 466
 467                 return sz;
 468         }
 469
 470         r = curl_header_strdup(contents, sz, "Content-Length:", &length);
 471         if (r < 0) {
 472                 log_oom();
 473                 goto fail;
 474         }
 475         if (r > 0) {
 476                 (void) safe_atou64(length, &j->content_length);
 477
 478                 if (j->content_length != (uint64_t) -1) {
 479                         char bytes[FORMAT_BYTES_MAX];
 480
 481                         if (j->content_length > j->compressed_max) {
 482                                 log_error("Content too large.");
 483                                 r = -EFBIG;
 484                                 goto fail;
 485                         }
 486
 487                         log_info("Downloading %s for %s.", format_bytes(bytes, sizeof(bytes), j->content_length), j->url);
 488                 }
 489
 490                 return sz;
 491         }
 492
 493         r = curl_header_strdup(contents, sz, "Last-Modified:", &last_modified);
 494         if (r < 0) {
 495                 log_oom();
 496                 goto fail;
 497         }
 498         if (r > 0) {
 499                 (void) curl_parse_http_time(last_modified, &j->mtime);
 500                 return sz;
 501         }
 502
 503         if (j->on_header) {
 504                 r = j->on_header(j, contents, sz);
 505                 if (r < 0)
 506                         goto fail;
 507         }
 508
 509         return sz;
 510
 511 fail:
 512         pull_job_finish(j, r);
 513         return 0;
 514 }
 515
 516 static int pull_job_progress_callback(void *userdata, curl_off_t dltotal, curl_off_t dlnow, curl_off_t ultotal, curl_off_t ulnow) {
 517         PullJob *j = userdata;
 518         unsigned percent;
 519         usec_t n;
 520
 521         assert(j);
 522
 523         if (dltotal <= 0)
 524                 return 0;
 525
 526         percent = ((100 * dlnow) / dltotal);
 527         n = now(CLOCK_MONOTONIC);
 528
 529         if (n > j->last_status_usec + USEC_PER_SEC &&
 530             percent != j->progress_percent &&
 531             dlnow < dltotal) {
 532                 char buf[FORMAT_TIMESPAN_MAX];
 533
 534                 if (n - j->start_usec > USEC_PER_SEC && dlnow > 0) {
 535                         char y[FORMAT_BYTES_MAX];
 536                         usec_t left, done;
 537
 538                         done = n - j->start_usec;
 539                         left = (usec_t) (((double) done * (double) dltotal) / dlnow) - done;
 540
 541                         log_info("Got %u%% of %s. %s left at %s/s.",
 542                                  percent,
 543                                  j->url,
 544                                  format_timespan(buf, sizeof(buf), left, USEC_PER_SEC),
 545                                  format_bytes(y, sizeof(y), (uint64_t) ((double) dlnow / ((double) done / (double) USEC_PER_SEC))));
 546                 } else
 547                         log_info("Got %u%% of %s.", percent, j->url);
 548
 549                 j->progress_percent = percent;
 550                 j->last_status_usec = n;
 551
 552                 if (j->on_progress)
 553                         j->on_progress(j);
 554         }
 555
 556         return 0;
 557 }
 558
 559 int pull_job_new(PullJob **ret, const char *url, CurlGlue *glue, void *userdata) {
 560         _cleanup_(pull_job_unrefp) PullJob *j = NULL;
 561
 562         assert(url);
 563         assert(glue);
 564         assert(ret);
 565
 566         j = new0(PullJob, 1);
 567         if (!j)
 568                 return -ENOMEM;
 569
 570         j->state = PULL_JOB_INIT;
 571         j->disk_fd = -1;
 572         j->userdata = userdata;
 573         j->glue = glue;
 574         j->content_length = (uint64_t) -1;
 575         j->start_usec = now(CLOCK_MONOTONIC);
 576         j->compressed_max = j->uncompressed_max = 64LLU * 1024LLU * 1024LLU * 1024LLU; /* 64GB safety limit */
 577         j->style = VERIFICATION_STYLE_UNSET;
 578
 579         j->url = strdup(url);
 580         if (!j->url)
 581                 return -ENOMEM;
 582
 583         *ret = j;
 584         j = NULL;
 585
 586         return 0;
 587 }
 588
 589 int pull_job_begin(PullJob *j) {
 590         int r;
 591
 592         assert(j);
 593
 594         if (j->state != PULL_JOB_INIT)
 595                 return -EBUSY;
 596
 597         if (j->grow_machine_directory)
 598                 grow_machine_directory();
 599
 600         r = curl_glue_make(&j->curl, j->url, j);
 601         if (r < 0)
 602                 return r;
 603
 604         if (!strv_isempty(j->old_etags)) {
 605                 _cleanup_free_ char *cc = NULL, *hdr = NULL;
 606
 607                 cc = strv_join(j->old_etags, ", ");
 608                 if (!cc)
 609                         return -ENOMEM;
 610
 611                 hdr = strappend("If-None-Match: ", cc);
 612                 if (!hdr)
 613                         return -ENOMEM;
 614
 615                 if (!j->request_header) {
 616                         j->request_header = curl_slist_new(hdr, NULL);
 617                         if (!j->request_header)
 618                                 return -ENOMEM;
 619                 } else {
 620                         struct curl_slist *l;
 621
 622                         l = curl_slist_append(j->request_header, hdr);
 623                         if (!l)
 624                                 return -ENOMEM;
 625
 626                         j->request_header = l;
 627                 }
 628         }
 629
 630         if (j->request_header) {
 631                 if (curl_easy_setopt(j->curl, CURLOPT_HTTPHEADER, j->request_header) != CURLE_OK)
 632                         return -EIO;
 633         }
 634
 635         if (curl_easy_setopt(j->curl, CURLOPT_WRITEFUNCTION, pull_job_write_callback) != CURLE_OK)
 636                 return -EIO;
 637
 638         if (curl_easy_setopt(j->curl, CURLOPT_WRITEDATA, j) != CURLE_OK)
 639                 return -EIO;
 640
 641         if (curl_easy_setopt(j->curl, CURLOPT_HEADERFUNCTION, pull_job_header_callback) != CURLE_OK)
 642                 return -EIO;
 643
 644         if (curl_easy_setopt(j->curl, CURLOPT_HEADERDATA, j) != CURLE_OK)
 645                 return -EIO;
 646
 647         if (curl_easy_setopt(j->curl, CURLOPT_XFERINFOFUNCTION, pull_job_progress_callback) != CURLE_OK)
 648                 return -EIO;
 649
 650         if (curl_easy_setopt(j->curl, CURLOPT_XFERINFODATA, j) != CURLE_OK)
 651                 return -EIO;
 652
 653         if (curl_easy_setopt(j->curl, CURLOPT_NOPROGRESS, 0) != CURLE_OK)
 654                 return -EIO;
 655
 656         r = curl_glue_add(j->glue, j->curl);
 657         if (r < 0)
 658                 return r;
 659
 660         j->state = PULL_JOB_ANALYZING;
 661
 662         return 0;
 663 }