From 5a99898baf43312a29df7ffabefd9db954dfcbd3 Mon Sep 17 00:00:00 2001 From: Michael Tremer Date: Wed, 29 Feb 2012 10:52:10 +0100 Subject: [PATCH] Modify structure of the cache. Downloaded packages will be stored by their hash, so no re-download will be neccessary when a packages appears in an other repository. --- python/pakfire/constants.py | 2 +- python/pakfire/downloader.py | 8 ++++-- python/pakfire/packages/solv.py | 26 +++++++++++------ python/pakfire/repository/cache.py | 18 ++++++------ python/pakfire/repository/index.py | 40 +++++++++++++++----------- python/pakfire/repository/remote.py | 44 ++++++++++++++--------------- 6 files changed, 77 insertions(+), 61 deletions(-) diff --git a/python/pakfire/constants.py b/python/pakfire/constants.py index 725c8aa05..2998ab7fd 100644 --- a/python/pakfire/constants.py +++ b/python/pakfire/constants.py @@ -37,7 +37,7 @@ CONFIG_DISTRO_DIR = os.path.join(CONFIG_DIR, "distros") CACHE_DIR = "/var/cache/pakfire" CCACHE_CACHE_DIR = os.path.join(CACHE_DIR, "ccache") CACHE_ENVIRON_DIR = os.path.join(CACHE_DIR, "environments") -REPO_CACHE_DIR = os.path.join(CACHE_DIR, "repos") +REPO_CACHE_DIR = os.path.join(CACHE_DIR, "downloads") LOCAL_BUILD_REPO_PATH = "/var/lib/pakfire/local" LOCAL_TMP_PATH = "/var/tmp" diff --git a/python/pakfire/downloader.py b/python/pakfire/downloader.py index 66431da25..3bfbff21b 100644 --- a/python/pakfire/downloader.py +++ b/python/pakfire/downloader.py @@ -192,6 +192,10 @@ class MirrorList(object): self.update(force=False) + @property + def distro(self): + return self.repo.distro + @property def cache(self): """ @@ -209,8 +213,8 @@ class MirrorList(object): return log.debug("Updating mirrorlist for repository '%s' (force=%s)" % (self.repo.name, force)) - - cache_filename = "mirrors/mirrorlist" + cache_filename = os.path.join("repodata", self.distro.sname, self.distro.release, + self.repo.name, self.distro.arch, "mirrors") # Force the update if no mirrorlist is available. if not self.cache.exists(cache_filename): diff --git a/python/pakfire/packages/solv.py b/python/pakfire/packages/solv.py index e73a04f5f..c120ce69f 100644 --- a/python/pakfire/packages/solv.py +++ b/python/pakfire/packages/solv.py @@ -190,13 +190,22 @@ class SolvPackage(base.Package): # XXX need to support filelist. return ["%s does not support filelists, yet." % self.__class__.__name__,] + @property + def cache_filename(self): + """ + The path to this file in the cache. + """ + h = self.hash1 + + return os.path.join(h[0:2], h[2:], os.path.basename(self.filename)) + @property def is_in_cache(self): # Local files are always kinda cached. if self.repo.local: return True - return self.repo.cache.exists("package/%s" % self.filename) + return self.repo.cache.exists(self.cache_filename) def get_from_cache(self): path = None @@ -206,17 +215,16 @@ class SolvPackage(base.Package): # the root directory of the repository or in a subdirectory that # is named by the architecture. for i in ("", self.arch,): - path = os.path.join(self.repo.path, i, self.filename) + p = os.path.join(self.repo.path, i, self.filename) - if os.path.exists(path): - return file.BinaryPackage(self.pakfire, self.repo, path) + if os.path.exists(p): + path = p + break else: - filename = "packages/%s" % self.filename - - if self.repo.cache.exists(filename): - path = self.repo.cache.abspath(filename) + if self.repo.cache.exists(self.cache_filename): + path = self.repo.cache.abspath(self.cache_filename) - if path: + if path and self.repo.cache.verify(path, self.hash1): return file.BinaryPackage(self.pakfire, self.repo, path) def download(self, text=""): diff --git a/python/pakfire/repository/cache.py b/python/pakfire/repository/cache.py index 3d6665d3f..ef9597d41 100644 --- a/python/pakfire/repository/cache.py +++ b/python/pakfire/repository/cache.py @@ -31,6 +31,7 @@ class RepositoryCache(object): An object that is able to cache all data that is loaded from a remote repository. """ + path = REPO_CACHE_DIR def __init__(self, pakfire, repo): self.pakfire = pakfire @@ -48,11 +49,6 @@ class RepositoryCache(object): return self.__created - @property - def path(self): - return os.path.join(REPO_CACHE_DIR, self.pakfire.distro.release, \ - self.repo.name, self.repo.arch) - def abspath(self, path, create=True): if create: self.create() @@ -67,11 +63,8 @@ class RepositoryCache(object): if self.created: return - for path in ("mirrors", "packages", "repodata"): - path = self.abspath(path, create=False) - - if not os.path.exists(path): - os.makedirs(path) + if not os.path.exists(self.path): + os.makedirs(self.path) self.__created = True @@ -99,6 +92,11 @@ class RepositoryCache(object): def open(self, filename, *args, **kwargs): filename = self.abspath(filename) + # Create directory if not existant. + dirname = os.path.dirname(filename) + if not os.path.exists(dirname): + os.makedirs(dirname) + return open(filename, *args, **kwargs) def verify(self, filename, hash1): diff --git a/python/pakfire/repository/index.py b/python/pakfire/repository/index.py index c319814da..ef0d9dfed 100644 --- a/python/pakfire/repository/index.py +++ b/python/pakfire/repository/index.py @@ -55,6 +55,10 @@ class Index(object): def __len(self): return len(self.repo) + @property + def distro(self): + return self.repo.distro + @property def cache(self): return self.repo.cache @@ -204,6 +208,8 @@ class IndexSolv(Index): def _update_metadata(self, force, offline=False): filename = os.path.join(METADATA_DOWNLOAD_PATH, METADATA_DOWNLOAD_FILE) + cache_filename = os.path.join("repodata", self.distro.sname, self.distro.release, + self.repo.name, self.distro.arch, os.path.basename(filename)) # Marker if we need to do the download. download = True @@ -213,15 +219,15 @@ class IndexSolv(Index): if not force: # Check if file does exists and is not too old. - if self.cache.exists(filename): - age = self.cache.age(filename) + if self.cache.exists(cache_filename): + age = self.cache.age(cache_filename) if age and age < TIME_10M: download = False log.debug("Metadata is recent enough. I don't download it again.") # Open old metadata for comparison. old_metadata = metadata.Metadata(self.pakfire, self, - self.cache.abspath(filename)) + self.cache.abspath(cache_filename)) # If no metadata was downloaded and we are in offline mode. elif offline: @@ -258,21 +264,23 @@ class IndexSolv(Index): else: # We explicitely rewrite the metadata if it is equal to have # a new timestamp and do not download it over and over again. - with self.cache.open(filename, "w") as o: + with self.cache.open(cache_filename, "w") as o: o.write(data) # Parse the metadata that we just downloaded or load it from cache. self.metadata = metadata.Metadata(self.pakfire, self, - self.cache.abspath(filename)) + self.cache.abspath(cache_filename)) def _update_database(self, force, offline=False): if not hasattr(self, "metadata"): return # Construct cache and download filename. + cache_filename = os.path.join("repodata", self.distro.sname, self.distro.release, + self.repo.name, self.distro.arch, "database", self.metadata.database) filename = os.path.join(METADATA_DOWNLOAD_PATH, self.metadata.database) - if not self.cache.exists(filename): + if not self.cache.exists(cache_filename): if offline: # If there is not database and we are in offline mode, we cannot # download anything so we just skip the rest of this function. @@ -297,7 +305,7 @@ class IndexSolv(Index): algo=self.metadata.database_compression) # Make a new file in the cache. - cacheobj = self.cache.open(filename, "w") + cacheobj = self.cache.open(cache_filename, "w") try: while True: @@ -322,17 +330,17 @@ class IndexSolv(Index): urlobj.close() # check the hashsum of the downloaded file - if not util.calc_hash1(self.cache.abspath(filename)) == self.metadata.database_hash1: - # XXX an exception is not a very good idea because this file could - # be downloaded from another mirror. need a better way to handle this. - - # Remove bad file from cache. - self.cache.remove(filename) - - raise Exception, "Downloaded file did not match the hashsum. Need to re-download it." + #if self.cache.verify(self.cache.abspath(cache_filename), self.metadata.database_hash1): + # # XXX an exception is not a very good idea because this file could + # # be downloaded from another mirror. need a better way to handle this. + # + # # Remove bad file from cache. + # self.cache.remove(cache_filename) + # + # raise Exception, "Downloaded file did not match the hashsum. Need to re-download it." # (Re-)open the database. - self.read(self.cache.abspath(filename)) + self.read(self.cache.abspath(cache_filename)) class IndexDir(Index): diff --git a/python/pakfire/repository/remote.py b/python/pakfire/repository/remote.py index 5118cc42b..e707c59d1 100644 --- a/python/pakfire/repository/remote.py +++ b/python/pakfire/repository/remote.py @@ -79,15 +79,7 @@ class RepositorySolv(base.RepositoryFactory): # Marker, if we need to download the package. download = True - cache_prefix = "" - if filename.endswith(PACKAGE_EXTENSION): - cache_prefix = "packages" - elif filename == METADATA_DOWNLOAD_FILE: - cache_prefix = "repodata" - elif filename.endswith(METADATA_DATABASE_FILE): - cache_prefix = "repodata" - - cache_filename = os.path.join(cache_prefix, os.path.basename(filename)) + cache_filename = pkg.cache_filename # Check if file already exists in cache. if self.cache.exists(cache_filename): @@ -101,7 +93,17 @@ class RepositorySolv(base.RepositoryFactory): # The file in cache has a wrong hash. Remove it and repeat download. cache.remove(cache_filename) - if download: + # Get a package grabber and add mirror download capabilities to it. + grabber = downloader.PackageDownloader( + self.pakfire, + text=text + os.path.basename(filename), + ) + grabber = self.mirrors.group(grabber) + + # Make sure filename is of type string (and not unicode) + filename = str(filename) + + while download: log.debug("Going to download %s" % filename) # If we are in offline mode, we cannot download any files. @@ -109,16 +111,6 @@ class RepositorySolv(base.RepositoryFactory): raise OfflineModeError, _("Cannot download this file in offline mode: %s") \ % filename - # Make sure filename is of type string (and not unicode) - filename = str(filename) - - # Get a package grabber and add mirror download capabilities to it. - grabber = downloader.PackageDownloader( - self.pakfire, - text=text + os.path.basename(filename), - ) - grabber = self.mirrors.group(grabber) - i = grabber.urlopen(filename) # Open input and output files and download the file. @@ -132,9 +124,15 @@ class RepositorySolv(base.RepositoryFactory): i.close() o.close() - # Verify if the download was okay. - if hash1 and not self.cache.verify(cache_filename, hash1): - raise Exception, "XXX this should never happen..." + if self.cache.verify(cache_filename, hash1): + log.debug("Successfully downloaded %s (%s)." % (filename, hash1)) + break + + log.warning(_("The checksum of the downloaded file did not match.")) + log.warning(_("Trying an other mirror.")) + + # Go to the next mirror. + grabber.increment_mirror() return os.path.join(self.cache.path, cache_filename) -- 2.39.5