From 612ee16e1974b85881110c63381fdf3bfa892e8c Mon Sep 17 00:00:00 2001 From: Michael Tremer Date: Mon, 23 Oct 2017 02:07:09 +0100 Subject: [PATCH] Redesign mastering repositories The process has been split into two tasks. One deletes any old files which should be left on the server for a little while so that clients with old metadata can still download them. The mastering process could therefore be simplified. Signed-off-by: Michael Tremer --- src/buildservice/__init__.py | 4 +- src/buildservice/packages.py | 20 ++++ src/buildservice/repository.py | 157 +++++++++++++----------------- src/crontab/pakfire-build-service | 3 + src/scripts/pakfire-build-service | 3 + 5 files changed, 98 insertions(+), 89 deletions(-) diff --git a/src/buildservice/__init__.py b/src/buildservice/__init__.py index 30a0730..b40527d 100644 --- a/src/buildservice/__init__.py +++ b/src/buildservice/__init__.py @@ -140,7 +140,9 @@ class Backend(object): if not row.path: continue - path = os.path.join(PACKAGES_DIR, row.path) + path = row.path + if not path.startswith("/"): + path = os.path.join(PACKAGES_DIR, path) try: logging.debug("Removing %s..." % path) diff --git a/src/buildservice/packages.py b/src/buildservice/packages.py index 266383b..b130616 100644 --- a/src/buildservice/packages.py +++ b/src/buildservice/packages.py @@ -12,6 +12,9 @@ from . import base from . import database from . import misc +log = logging.getLogger("packages") +log.propagate = 1 + from .constants import * from .decorators import * @@ -374,6 +377,10 @@ class Package(base.DataObject): def path(self): return self.data.path + @property + def filename(self): + return os.path.basename(self.path) + @property def hash_sha512(self): return self.data.hash_sha512 @@ -382,6 +389,19 @@ class Package(base.DataObject): def filesize(self): return self.data.filesize + def copy(self, dst): + if os.path.isdir(dst): + dst = os.path.join(dst, self.filename) + + if os.path.exists(dst): + raise IOError("Destination file exists: %s" % dst) + + src = os.path.join(PACKAGES_DIR, self.path) + + log.debug("Copying %s to %s" % (src, dst)) + + shutil.copy2(src, dst) + def move(self, target_dir): # Create directory if it does not exist, yet. if not os.path.exists(target_dir): diff --git a/src/buildservice/repository.py b/src/buildservice/repository.py index d4501b2..db43580 100644 --- a/src/buildservice/repository.py +++ b/src/buildservice/repository.py @@ -3,6 +3,8 @@ import logging import os.path +import pakfire + log = logging.getLogger("repositories") log.propagate = 1 @@ -73,6 +75,14 @@ class Repositories(base.Object): with self.db.transaction(): repo.remaster() + def cleanup(self): + """ + Cleans up all repositories + """ + for repo in self: + with self.db.transaction(): + repo.cleanup() + class Repository(base.DataObject): table = "repositories" @@ -148,7 +158,7 @@ class Repository(base.DataObject): @property def path(self): - return os.path.join(REPOS_DIR, self.basepath, "%{arch}") + return os.path.join(REPOS_DIR, self.basepath) @property def url(self): @@ -156,7 +166,6 @@ class Repository(base.DataObject): self.settings.get("baseurl", "https://pakfire.ipfire.org"), "repositories", self.basepath, - "%{arch}" ) @property @@ -173,7 +182,7 @@ class Repository(base.DataObject): "[repo:%s]" % self.identifier, "description = %s - %s" % (self.distro.name, self.summary), "enabled = 1", - "baseurl = %s" % (self.path if local else self.url), + "baseurl = %s/%{arch}" % (self.path if local else self.url), ] if self.mirrored and not local: @@ -312,41 +321,22 @@ class Repository(base.DataObject): return _builds - def _get_packages(self, arch): - if arch.name == "src": - pkgs = self.db.query("SELECT packages.id AS id, packages.path AS path FROM packages \ - JOIN builds ON builds.pkg_id = packages.id \ - JOIN repositories_builds ON builds.id = repositories_builds.build_id \ - WHERE packages.arch = %s AND repositories_builds.repo_id = %s", - arch.name, self.id) - - else: - pkgs = self.db.query("SELECT packages.id AS id, packages.path AS path FROM packages \ - JOIN jobs_packages ON jobs_packages.pkg_id = packages.id \ - JOIN jobs ON jobs_packages.job_id = jobs.id \ - JOIN builds ON builds.id = jobs.build_id \ - JOIN repositories_builds ON builds.id = repositories_builds.build_id \ - WHERE (jobs.arch = %s OR jobs.arch = %s) AND \ - repositories_builds.repo_id = %s", - arch.name, "noarch", self.id) - - return pkgs - def get_packages(self, arch): - pkgs = [self.pakfire.packages.get_by_id(p.id) for p in self._get_packages(arch)] - pkgs.sort() - - return pkgs - - def get_paths(self, arch): - paths = [p.path for p in self._get_packages(arch)] - paths.sort() - - return paths - - @property - def packages(self): - return self.get_packages() + if arch == "src": + return self.backend.packages._get_packages("SELECT packages.* FROM repositories_builds \ + LEFT JOIN builds ON repositories_builds.build_id = builds.id \ + LEFT JOIN packages ON builds.pkg_id = packages.id \ + WHERE repositories_builds.repo_id = %s", self.id) + + return self.backend.packages._get_packages("SELECT packages.* FROM repositories_builds \ + LEFT JOIN builds ON repositories_builds.build_id = builds.id \ + LEFT JOIN jobs ON builds.id = jobs.build_id \ + LEFT JOIN jobs_packages ON jobs.id = jobs_packages.job_id \ + LEFT JOIN packages ON jobs_packages.pkg_id = packages.id \ + WHERE repositories_builds.repo_id = %s \ + AND (jobs.arch = %s OR jobs.arch = %s) \ + AND (packages.arch = %s OR packages.arch = %s)", + self.id, arch, "noarch", arch, "noarch") @property def unpushed_builds(self): @@ -373,84 +363,75 @@ class Repository(base.DataObject): def remaster(self): log.info("Going to update repository %s..." % self.name) - # Update the timestamp when we started at last. - self.updated() - for arch in self.arches: changed = False - # Get all package paths that are to be included in this repository. - paths = self.get_paths(arch) - - repo_path = os.path.join( - REPOS_DIR, - self.distro.identifier, - self.identifier, - arch - ) + repo_path = os.path.join(self.path, arch) + log.debug(" Path: %s" % repo_path) if not os.path.exists(repo_path): os.makedirs(repo_path) - source_files = [] - remove_files = [] - - for filename in os.listdir(repo_path): - path = os.path.join(repo_path, filename) - - if not os.path.isfile(path): - continue - - remove_files.append(path) - - for path in paths: - filename = os.path.basename(path) - - source_file = os.path.join(PACKAGES_DIR, path) - target_file = os.path.join(repo_path, filename) + # Get all packages that are to be included in this repository + packages = [] + for p in self.get_packages(arch): + path = os.path.join(repo_path, p.filename) + packages.append(path) - # Do not add duplicate files twice. - if source_file in source_files: + # Nothing to do if the package already exists + if os.path.exists(path): continue - source_files.append(source_file) + # Copy the package into the repository + log.info("Adding %s..." % p) + p.copy(repo_path) - try: - remove_files.remove(target_file) - except ValueError: - changed = True + # XXX need to sign the new package here - if remove_files: + # The repository has been changed changed = True - # If nothing in the repository data has changed, there - # is nothing to do. - if changed: - log.info("The repository has updates...") - else: - log.info("Nothing to update.") + # No need to regenerate the index if the repository hasn't changed + if not changed: continue + # Update the timestamp when we started at last + self.updated() + # Find the key to sign the package. key_id = None - if repo.key: + if self.key: key_id = self.key.fingerprint # Create package index. p = pakfire.PakfireServer(arch=arch) - - p.repo_create(repo_path, source_files, + p.repo_create(repo_path, packages, name="%s - %s.%s" % (self.distro.name, self.name, arch), key_id=key_id) - # Remove files afterwards. - for file in remove_files: - file = os.path.join(repo_path, file) + def cleanup(self): + log.info("Cleaning up repository %s..." % self.name) + + for arch in self.arches: + repo_path = os.path.join(self.path, arch) + # Get a list of all files in the repository directory right now + filelist = [e for e in os.listdir(repo_path) + if os.path.isfile(os.path.join(repo_path, e))] + + # Get a list of all packages that should be in the repository + # and remove them from the filelist + for p in self.get_packages(arch): try: - os.remove(file) - except OSError: - log.warning("Could not remove %s." % file) + filelist.remove(p.filename) + except ValueError: + pass + + # For any files that do not belong into the repository + # any more, we will just delete them + for filename in filelist: + path = os.path.join(repo_path, filename) + self.backend.delete_file(path) def get_history(self, **kwargs): kwargs.update({ diff --git a/src/crontab/pakfire-build-service b/src/crontab/pakfire-build-service index b2c784a..0c17ced 100644 --- a/src/crontab/pakfire-build-service +++ b/src/crontab/pakfire-build-service @@ -7,6 +7,9 @@ # Remaster repositories */5 * * * * pakfire pakfire-build-service remaster-repositories &>/dev/null +# Cleanup repositories +5 * * * * pakfire pakfire-build-service cleanup-repositories &>/dev/null + # Pull sources */5 * * * * pakfire pakfire-build-service pull-sources &>/dev/null diff --git a/src/scripts/pakfire-build-service b/src/scripts/pakfire-build-service index 9c91243..83aceb6 100644 --- a/src/scripts/pakfire-build-service +++ b/src/scripts/pakfire-build-service @@ -23,6 +23,9 @@ class Cli(object): # Cleanup files "cleanup-files" : self.backend.cleanup_files, + # Cleanup repositories + "cleanup-repositories" : self.backend.repos.cleanup, + # Cleanup sessions "cleanup-sessions" : self.backend.sessions.cleanup, -- 2.47.3