src/import/pull-job.c

   1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
   2
   3 /***
   4   This file is part of systemd.
   5
   6   Copyright 2015 Lennart Poettering
   7
   8   systemd is free software; you can redistribute it and/or modify it
   9   under the terms of the GNU Lesser General Public License as published by
  10   the Free Software Foundation; either version 2.1 of the License, or
  11   (at your option) any later version.
  12
  13   systemd is distributed in the hope that it will be useful, but
  14   WITHOUT ANY WARRANTY; without even the implied warranty of
  15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16   Lesser General Public License for more details.
  17
  18   You should have received a copy of the GNU Lesser General Public License
  19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  20 ***/
  21
  22 #include <sys/xattr.h>
  23
  24 #include "alloc-util.h"
  25 #include "fd-util.h"
  26 #include "hexdecoct.h"
  27 #include "io-util.h"
  28 #include "machine-pool.h"
  29 #include "parse-util.h"
  30 #include "pull-job.h"
  31 #include "string-util.h"
  32 #include "strv.h"
  33 #include "xattr-util.h"
  34
  35 PullJob* pull_job_unref(PullJob *j) {
  36         if (!j)
  37                 return NULL;
  38
  39         curl_glue_remove_and_free(j->glue, j->curl);
  40         curl_slist_free_all(j->request_header);
  41
  42         safe_close(j->disk_fd);
  43
  44         import_compress_free(&j->compress);
  45
  46         if (j->checksum_context)
  47                 gcry_md_close(j->checksum_context);
  48
  49         free(j->url);
  50         free(j->etag);
  51         strv_free(j->old_etags);
  52         free(j->payload);
  53         free(j->checksum);
  54
  55         free(j);
  56
  57         return NULL;
  58 }
  59
  60 static void pull_job_finish(PullJob *j, int ret) {
  61         assert(j);
  62
  63         if (j->state == PULL_JOB_DONE ||
  64             j->state == PULL_JOB_FAILED)
  65                 return;
  66
  67         if (ret == 0) {
  68                 j->state = PULL_JOB_DONE;
  69                 j->progress_percent = 100;
  70                 log_info("Download of %s complete.", j->url);
  71         } else {
  72                 j->state = PULL_JOB_FAILED;
  73                 j->error = ret;
  74         }
  75
  76         if (j->on_finished)
  77                 j->on_finished(j);
  78 }
  79
  80 void pull_job_curl_on_finished(CurlGlue *g, CURL *curl, CURLcode result) {
  81         PullJob *j = NULL;
  82         CURLcode code;
  83         long status;
  84         int r;
  85
  86         if (curl_easy_getinfo(curl, CURLINFO_PRIVATE, (char **)&j) != CURLE_OK)
  87                 return;
  88
  89         if (!j || j->state == PULL_JOB_DONE || j->state == PULL_JOB_FAILED)
  90                 return;
  91
  92         if (result != CURLE_OK) {
  93                 log_error("Transfer failed: %s", curl_easy_strerror(result));
  94                 r = -EIO;
  95                 goto finish;
  96         }
  97
  98         code = curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &status);
  99         if (code != CURLE_OK) {
 100                 log_error("Failed to retrieve response code: %s", curl_easy_strerror(code));
 101                 r = -EIO;
 102                 goto finish;
 103         } else if (status == 304) {
 104                 log_info("Image already downloaded. Skipping download.");
 105                 j->etag_exists = true;
 106                 r = 0;
 107                 goto finish;
 108         } else if (status >= 300) {
 109                 log_error("HTTP request to %s failed with code %li.", j->url, status);
 110                 r = -EIO;
 111                 goto finish;
 112         } else if (status < 200) {
 113                 log_error("HTTP request to %s finished with unexpected code %li.", j->url, status);
 114                 r = -EIO;
 115                 goto finish;
 116         }
 117
 118         if (j->state != PULL_JOB_RUNNING) {
 119                 log_error("Premature connection termination.");
 120                 r = -EIO;
 121                 goto finish;
 122         }
 123
 124         if (j->content_length != (uint64_t) -1 &&
 125             j->content_length != j->written_compressed) {
 126                 log_error("Download truncated.");
 127                 r = -EIO;
 128                 goto finish;
 129         }
 130
 131         if (j->checksum_context) {
 132                 uint8_t *k;
 133
 134                 k = gcry_md_read(j->checksum_context, GCRY_MD_SHA256);
 135                 if (!k) {
 136                         log_error("Failed to get checksum.");
 137                         r = -EIO;
 138                         goto finish;
 139                 }
 140
 141                 j->checksum = hexmem(k, gcry_md_get_algo_dlen(GCRY_MD_SHA256));
 142                 if (!j->checksum) {
 143                         r = log_oom();
 144                         goto finish;
 145                 }
 146
 147                 log_debug("SHA256 of %s is %s.", j->url, j->checksum);
 148         }
 149
 150         if (j->disk_fd >= 0 && j->allow_sparse) {
 151                 /* Make sure the file size is right, in case the file was
 152                  * sparse and we just seeked for the last part */
 153
 154                 if (ftruncate(j->disk_fd, j->written_uncompressed) < 0) {
 155                         r = log_error_errno(errno, "Failed to truncate file: %m");
 156                         goto finish;
 157                 }
 158
 159                 if (j->etag)
 160                         (void) fsetxattr(j->disk_fd, "user.source_etag", j->etag, strlen(j->etag), 0);
 161                 if (j->url)
 162                         (void) fsetxattr(j->disk_fd, "user.source_url", j->url, strlen(j->url), 0);
 163
 164                 if (j->mtime != 0) {
 165                         struct timespec ut[2];
 166
 167                         timespec_store(&ut[0], j->mtime);
 168                         ut[1] = ut[0];
 169                         (void) futimens(j->disk_fd, ut);
 170
 171                         (void) fd_setcrtime(j->disk_fd, j->mtime);
 172                 }
 173         }
 174
 175         r = 0;
 176
 177 finish:
 178         pull_job_finish(j, r);
 179 }
 180
 181 static int pull_job_write_uncompressed(const void *p, size_t sz, void *userdata) {
 182         PullJob *j = userdata;
 183         ssize_t n;
 184
 185         assert(j);
 186         assert(p);
 187
 188         if (sz <= 0)
 189                 return 0;
 190
 191         if (j->written_uncompressed + sz < j->written_uncompressed) {
 192                 log_error("File too large, overflow");
 193                 return -EOVERFLOW;
 194         }
 195
 196         if (j->written_uncompressed + sz > j->uncompressed_max) {
 197                 log_error("File overly large, refusing");
 198                 return -EFBIG;
 199         }
 200
 201         if (j->disk_fd >= 0) {
 202
 203                 if (j->grow_machine_directory && j->written_since_last_grow >= GROW_INTERVAL_BYTES) {
 204                         j->written_since_last_grow = 0;
 205                         grow_machine_directory();
 206                 }
 207
 208                 if (j->allow_sparse)
 209                         n = sparse_write(j->disk_fd, p, sz, 64);
 210                 else
 211                         n = write(j->disk_fd, p, sz);
 212                 if (n < 0)
 213                         return log_error_errno(errno, "Failed to write file: %m");
 214                 if ((size_t) n < sz) {
 215                         log_error("Short write");
 216                         return -EIO;
 217                 }
 218         } else {
 219
 220                 if (!GREEDY_REALLOC(j->payload, j->payload_allocated, j->payload_size + sz))
 221                         return log_oom();
 222
 223                 memcpy(j->payload + j->payload_size, p, sz);
 224                 j->payload_size += sz;
 225         }
 226
 227         j->written_uncompressed += sz;
 228         j->written_since_last_grow += sz;
 229
 230         return 0;
 231 }
 232
 233 static int pull_job_write_compressed(PullJob *j, void *p, size_t sz) {
 234         int r;
 235
 236         assert(j);
 237         assert(p);
 238
 239         if (sz <= 0)
 240                 return 0;
 241
 242         if (j->written_compressed + sz < j->written_compressed) {
 243                 log_error("File too large, overflow");
 244                 return -EOVERFLOW;
 245         }
 246
 247         if (j->written_compressed + sz > j->compressed_max) {
 248                 log_error("File overly large, refusing.");
 249                 return -EFBIG;
 250         }
 251
 252         if (j->content_length != (uint64_t) -1 &&
 253             j->written_compressed + sz > j->content_length) {
 254                 log_error("Content length incorrect.");
 255                 return -EFBIG;
 256         }
 257
 258         if (j->checksum_context)
 259                 gcry_md_write(j->checksum_context, p, sz);
 260
 261         r = import_uncompress(&j->compress, p, sz, pull_job_write_uncompressed, j);
 262         if (r < 0)
 263                 return r;
 264
 265         j->written_compressed += sz;
 266
 267         return 0;
 268 }
 269
 270 static int pull_job_open_disk(PullJob *j) {
 271         int r;
 272
 273         assert(j);
 274
 275         if (j->on_open_disk) {
 276                 r = j->on_open_disk(j);
 277                 if (r < 0)
 278                         return r;
 279         }
 280
 281         if (j->disk_fd >= 0) {
 282                 /* Check if we can do sparse files */
 283
 284                 if (lseek(j->disk_fd, SEEK_SET, 0) == 0)
 285                         j->allow_sparse = true;
 286                 else {
 287                         if (errno != ESPIPE)
 288                                 return log_error_errno(errno, "Failed to seek on file descriptor: %m");
 289
 290                         j->allow_sparse = false;
 291                 }
 292         }
 293
 294         if (j->calc_checksum) {
 295                 if (gcry_md_open(&j->checksum_context, GCRY_MD_SHA256, 0) != 0) {
 296                         log_error("Failed to initialize hash context.");
 297                         return -EIO;
 298                 }
 299         }
 300
 301         return 0;
 302 }
 303
 304 static int pull_job_detect_compression(PullJob *j) {
 305         _cleanup_free_ uint8_t *stub = NULL;
 306         size_t stub_size;
 307
 308         int r;
 309
 310         assert(j);
 311
 312         r = import_uncompress_detect(&j->compress, j->payload, j->payload_size);
 313         if (r < 0)
 314                 return log_error_errno(r, "Failed to initialize compressor: %m");
 315         if (r == 0)
 316                 return 0;
 317
 318         log_debug("Stream is compressed: %s", import_compress_type_to_string(j->compress.type));
 319
 320         r = pull_job_open_disk(j);
 321         if (r < 0)
 322                 return r;
 323
 324         /* Now, take the payload we read so far, and decompress it */
 325         stub = j->payload;
 326         stub_size = j->payload_size;
 327
 328         j->payload = NULL;
 329         j->payload_size = 0;
 330         j->payload_allocated = 0;
 331
 332         j->state = PULL_JOB_RUNNING;
 333
 334         r = pull_job_write_compressed(j, stub, stub_size);
 335         if (r < 0)
 336                 return r;
 337
 338         return 0;
 339 }
 340
 341 static size_t pull_job_write_callback(void *contents, size_t size, size_t nmemb, void *userdata) {
 342         PullJob *j = userdata;
 343         size_t sz = size * nmemb;
 344         int r;
 345
 346         assert(contents);
 347         assert(j);
 348
 349         switch (j->state) {
 350
 351         case PULL_JOB_ANALYZING:
 352                 /* Let's first check what it actually is */
 353
 354                 if (!GREEDY_REALLOC(j->payload, j->payload_allocated, j->payload_size + sz)) {
 355                         r = log_oom();
 356                         goto fail;
 357                 }
 358
 359                 memcpy(j->payload + j->payload_size, contents, sz);
 360                 j->payload_size += sz;
 361
 362                 r = pull_job_detect_compression(j);
 363                 if (r < 0)
 364                         goto fail;
 365
 366                 break;
 367
 368         case PULL_JOB_RUNNING:
 369
 370                 r = pull_job_write_compressed(j, contents, sz);
 371                 if (r < 0)
 372                         goto fail;
 373
 374                 break;
 375
 376         case PULL_JOB_DONE:
 377         case PULL_JOB_FAILED:
 378                 r = -ESTALE;
 379                 goto fail;
 380
 381         default:
 382                 assert_not_reached("Impossible state.");
 383         }
 384
 385         return sz;
 386
 387 fail:
 388         pull_job_finish(j, r);
 389         return 0;
 390 }
 391
 392 static size_t pull_job_header_callback(void *contents, size_t size, size_t nmemb, void *userdata) {
 393         PullJob *j = userdata;
 394         size_t sz = size * nmemb;
 395         _cleanup_free_ char *length = NULL, *last_modified = NULL;
 396         char *etag;
 397         int r;
 398
 399         assert(contents);
 400         assert(j);
 401
 402         if (j->state == PULL_JOB_DONE || j->state == PULL_JOB_FAILED) {
 403                 r = -ESTALE;
 404                 goto fail;
 405         }
 406
 407         assert(j->state == PULL_JOB_ANALYZING);
 408
 409         r = curl_header_strdup(contents, sz, "ETag:", &etag);
 410         if (r < 0) {
 411                 log_oom();
 412                 goto fail;
 413         }
 414         if (r > 0) {
 415                 free(j->etag);
 416                 j->etag = etag;
 417
 418                 if (strv_contains(j->old_etags, j->etag)) {
 419                         log_info("Image already downloaded. Skipping download.");
 420                         j->etag_exists = true;
 421                         pull_job_finish(j, 0);
 422                         return sz;
 423                 }
 424
 425                 return sz;
 426         }
 427
 428         r = curl_header_strdup(contents, sz, "Content-Length:", &length);
 429         if (r < 0) {
 430                 log_oom();
 431                 goto fail;
 432         }
 433         if (r > 0) {
 434                 (void) safe_atou64(length, &j->content_length);
 435
 436                 if (j->content_length != (uint64_t) -1) {
 437                         char bytes[FORMAT_BYTES_MAX];
 438
 439                         if (j->content_length > j->compressed_max) {
 440                                 log_error("Content too large.");
 441                                 r = -EFBIG;
 442                                 goto fail;
 443                         }
 444
 445                         log_info("Downloading %s for %s.", format_bytes(bytes, sizeof(bytes), j->content_length), j->url);
 446                 }
 447
 448                 return sz;
 449         }
 450
 451         r = curl_header_strdup(contents, sz, "Last-Modified:", &last_modified);
 452         if (r < 0) {
 453                 log_oom();
 454                 goto fail;
 455         }
 456         if (r > 0) {
 457                 (void) curl_parse_http_time(last_modified, &j->mtime);
 458                 return sz;
 459         }
 460
 461         if (j->on_header) {
 462                 r = j->on_header(j, contents, sz);
 463                 if (r < 0)
 464                         goto fail;
 465         }
 466
 467         return sz;
 468
 469 fail:
 470         pull_job_finish(j, r);
 471         return 0;
 472 }
 473
 474 static int pull_job_progress_callback(void *userdata, curl_off_t dltotal, curl_off_t dlnow, curl_off_t ultotal, curl_off_t ulnow) {
 475         PullJob *j = userdata;
 476         unsigned percent;
 477         usec_t n;
 478
 479         assert(j);
 480
 481         if (dltotal <= 0)
 482                 return 0;
 483
 484         percent = ((100 * dlnow) / dltotal);
 485         n = now(CLOCK_MONOTONIC);
 486
 487         if (n > j->last_status_usec + USEC_PER_SEC &&
 488             percent != j->progress_percent &&
 489             dlnow < dltotal) {
 490                 char buf[FORMAT_TIMESPAN_MAX];
 491
 492                 if (n - j->start_usec > USEC_PER_SEC && dlnow > 0) {
 493                         char y[FORMAT_BYTES_MAX];
 494                         usec_t left, done;
 495
 496                         done = n - j->start_usec;
 497                         left = (usec_t) (((double) done * (double) dltotal) / dlnow) - done;
 498
 499                         log_info("Got %u%% of %s. %s left at %s/s.",
 500                                  percent,
 501                                  j->url,
 502                                  format_timespan(buf, sizeof(buf), left, USEC_PER_SEC),
 503                                  format_bytes(y, sizeof(y), (uint64_t) ((double) dlnow / ((double) done / (double) USEC_PER_SEC))));
 504                 } else
 505                         log_info("Got %u%% of %s.", percent, j->url);
 506
 507                 j->progress_percent = percent;
 508                 j->last_status_usec = n;
 509
 510                 if (j->on_progress)
 511                         j->on_progress(j);
 512         }
 513
 514         return 0;
 515 }
 516
 517 int pull_job_new(PullJob **ret, const char *url, CurlGlue *glue, void *userdata) {
 518         _cleanup_(pull_job_unrefp) PullJob *j = NULL;
 519
 520         assert(url);
 521         assert(glue);
 522         assert(ret);
 523
 524         j = new0(PullJob, 1);
 525         if (!j)
 526                 return -ENOMEM;
 527
 528         j->state = PULL_JOB_INIT;
 529         j->disk_fd = -1;
 530         j->userdata = userdata;
 531         j->glue = glue;
 532         j->content_length = (uint64_t) -1;
 533         j->start_usec = now(CLOCK_MONOTONIC);
 534         j->compressed_max = j->uncompressed_max = 8LLU * 1024LLU * 1024LLU * 1024LLU; /* 8GB */
 535
 536         j->url = strdup(url);
 537         if (!j->url)
 538                 return -ENOMEM;
 539
 540         *ret = j;
 541         j = NULL;
 542
 543         return 0;
 544 }
 545
 546 int pull_job_begin(PullJob *j) {
 547         int r;
 548
 549         assert(j);
 550
 551         if (j->state != PULL_JOB_INIT)
 552                 return -EBUSY;
 553
 554         if (j->grow_machine_directory)
 555                 grow_machine_directory();
 556
 557         r = curl_glue_make(&j->curl, j->url, j);
 558         if (r < 0)
 559                 return r;
 560
 561         if (!strv_isempty(j->old_etags)) {
 562                 _cleanup_free_ char *cc = NULL, *hdr = NULL;
 563
 564                 cc = strv_join(j->old_etags, ", ");
 565                 if (!cc)
 566                         return -ENOMEM;
 567
 568                 hdr = strappend("If-None-Match: ", cc);
 569                 if (!hdr)
 570                         return -ENOMEM;
 571
 572                 if (!j->request_header) {
 573                         j->request_header = curl_slist_new(hdr, NULL);
 574                         if (!j->request_header)
 575                                 return -ENOMEM;
 576                 } else {
 577                         struct curl_slist *l;
 578
 579                         l = curl_slist_append(j->request_header, hdr);
 580                         if (!l)
 581                                 return -ENOMEM;
 582
 583                         j->request_header = l;
 584                 }
 585         }
 586
 587         if (j->request_header) {
 588                 if (curl_easy_setopt(j->curl, CURLOPT_HTTPHEADER, j->request_header) != CURLE_OK)
 589                         return -EIO;
 590         }
 591
 592         if (curl_easy_setopt(j->curl, CURLOPT_WRITEFUNCTION, pull_job_write_callback) != CURLE_OK)
 593                 return -EIO;
 594
 595         if (curl_easy_setopt(j->curl, CURLOPT_WRITEDATA, j) != CURLE_OK)
 596                 return -EIO;
 597
 598         if (curl_easy_setopt(j->curl, CURLOPT_HEADERFUNCTION, pull_job_header_callback) != CURLE_OK)
 599                 return -EIO;
 600
 601         if (curl_easy_setopt(j->curl, CURLOPT_HEADERDATA, j) != CURLE_OK)
 602                 return -EIO;
 603
 604         if (curl_easy_setopt(j->curl, CURLOPT_XFERINFOFUNCTION, pull_job_progress_callback) != CURLE_OK)
 605                 return -EIO;
 606
 607         if (curl_easy_setopt(j->curl, CURLOPT_XFERINFODATA, j) != CURLE_OK)
 608                 return -EIO;
 609
 610         if (curl_easy_setopt(j->curl, CURLOPT_NOPROGRESS, 0) != CURLE_OK)
 611                 return -EIO;
 612
 613         r = curl_glue_add(j->glue, j->curl);
 614         if (r < 0)
 615                 return r;
 616
 617         j->state = PULL_JOB_ANALYZING;
 618
 619         return 0;
 620 }