From 5a99898baf43312a29df7ffabefd9db954dfcbd3 Mon Sep 17 00:00:00 2001
From: Michael Tremer <michael.tremer@ipfire.org>
Date: Wed, 29 Feb 2012 10:52:10 +0100
Subject: [PATCH] Modify structure of the cache.

Downloaded packages will be stored by their hash, so no re-download
will be neccessary when a packages appears in an other repository.
---
 python/pakfire/constants.py         |  2 +-
 python/pakfire/downloader.py        |  8 ++++--
 python/pakfire/packages/solv.py     | 26 +++++++++++------
 python/pakfire/repository/cache.py  | 18 ++++++------
 python/pakfire/repository/index.py  | 40 +++++++++++++++-----------
 python/pakfire/repository/remote.py | 44 ++++++++++++++---------------
 6 files changed, 77 insertions(+), 61 deletions(-)

diff --git a/python/pakfire/constants.py b/python/pakfire/constants.py
index 725c8aa05..2998ab7fd 100644
--- a/python/pakfire/constants.py
+++ b/python/pakfire/constants.py
@@ -37,7 +37,7 @@ CONFIG_DISTRO_DIR = os.path.join(CONFIG_DIR, "distros")
 CACHE_DIR = "/var/cache/pakfire"
 CCACHE_CACHE_DIR = os.path.join(CACHE_DIR, "ccache")
 CACHE_ENVIRON_DIR = os.path.join(CACHE_DIR, "environments")
-REPO_CACHE_DIR = os.path.join(CACHE_DIR, "repos")
+REPO_CACHE_DIR = os.path.join(CACHE_DIR, "downloads")
 
 LOCAL_BUILD_REPO_PATH = "/var/lib/pakfire/local"
 LOCAL_TMP_PATH = "/var/tmp"
diff --git a/python/pakfire/downloader.py b/python/pakfire/downloader.py
index 66431da25..3bfbff21b 100644
--- a/python/pakfire/downloader.py
+++ b/python/pakfire/downloader.py
@@ -192,6 +192,10 @@ class MirrorList(object):
 
 		self.update(force=False)
 
+	@property
+	def distro(self):
+		return self.repo.distro
+
 	@property
 	def cache(self):
 		"""
@@ -209,8 +213,8 @@ class MirrorList(object):
 			return
 
 		log.debug("Updating mirrorlist for repository '%s' (force=%s)" % (self.repo.name, force))
-
-		cache_filename = "mirrors/mirrorlist"
+		cache_filename = os.path.join("repodata", self.distro.sname, self.distro.release,
+			self.repo.name, self.distro.arch, "mirrors")
 
 		# Force the update if no mirrorlist is available.
 		if not self.cache.exists(cache_filename):
diff --git a/python/pakfire/packages/solv.py b/python/pakfire/packages/solv.py
index e73a04f5f..c120ce69f 100644
--- a/python/pakfire/packages/solv.py
+++ b/python/pakfire/packages/solv.py
@@ -190,13 +190,22 @@ class SolvPackage(base.Package):
 		# XXX need to support filelist.
 		return ["%s does not support filelists, yet." % self.__class__.__name__,]
 
+	@property
+	def cache_filename(self):
+		"""
+			The path to this file in the cache.
+		"""
+		h = self.hash1
+
+		return os.path.join(h[0:2], h[2:], os.path.basename(self.filename))
+
 	@property
 	def is_in_cache(self):
 		# Local files are always kinda cached.
 		if self.repo.local:
 			return True
 
-		return self.repo.cache.exists("package/%s" % self.filename)
+		return self.repo.cache.exists(self.cache_filename)
 
 	def get_from_cache(self):
 		path = None
@@ -206,17 +215,16 @@ class SolvPackage(base.Package):
 			# the root directory of the repository or in a subdirectory that
 			# is named by the architecture.
 			for i in ("", self.arch,):
-				path = os.path.join(self.repo.path, i, self.filename)
+				p = os.path.join(self.repo.path, i, self.filename)
 
-				if os.path.exists(path):
-					return file.BinaryPackage(self.pakfire, self.repo, path)
+				if os.path.exists(p):
+					path = p
+					break
 		else:
-			filename = "packages/%s" % self.filename
-
-			if self.repo.cache.exists(filename):
-				path = self.repo.cache.abspath(filename)
+			if self.repo.cache.exists(self.cache_filename):
+				path = self.repo.cache.abspath(self.cache_filename)
 
-		if path:
+		if path and self.repo.cache.verify(path, self.hash1):
 			return file.BinaryPackage(self.pakfire, self.repo, path)
 
 	def download(self, text=""):
diff --git a/python/pakfire/repository/cache.py b/python/pakfire/repository/cache.py
index 3d6665d3f..ef9597d41 100644
--- a/python/pakfire/repository/cache.py
+++ b/python/pakfire/repository/cache.py
@@ -31,6 +31,7 @@ class RepositoryCache(object):
 		An object that is able to cache all data that is loaded from a
 		remote repository.
 	"""
+	path = REPO_CACHE_DIR
 
 	def __init__(self, pakfire, repo):
 		self.pakfire = pakfire
@@ -48,11 +49,6 @@ class RepositoryCache(object):
 
 		return self.__created
 
-	@property
-	def path(self):
-		return os.path.join(REPO_CACHE_DIR, self.pakfire.distro.release, \
-			self.repo.name, self.repo.arch)
-
 	def abspath(self, path, create=True):
 		if create:
 			self.create()
@@ -67,11 +63,8 @@ class RepositoryCache(object):
 		if self.created:
 			return
 
-		for path in ("mirrors", "packages", "repodata"):
-			path = self.abspath(path, create=False)
-
-			if not os.path.exists(path):
-				os.makedirs(path)
+		if not os.path.exists(self.path):
+			os.makedirs(self.path)
 
 		self.__created = True
 
@@ -99,6 +92,11 @@ class RepositoryCache(object):
 	def open(self, filename, *args, **kwargs):
 		filename = self.abspath(filename)
 
+		# Create directory if not existant.
+		dirname = os.path.dirname(filename)
+		if not os.path.exists(dirname):
+			os.makedirs(dirname)
+
 		return open(filename, *args, **kwargs)
 
 	def verify(self, filename, hash1):
diff --git a/python/pakfire/repository/index.py b/python/pakfire/repository/index.py
index c319814da..ef0d9dfed 100644
--- a/python/pakfire/repository/index.py
+++ b/python/pakfire/repository/index.py
@@ -55,6 +55,10 @@ class Index(object):
 	def __len(self):
 		return len(self.repo)
 
+	@property
+	def distro(self):
+		return self.repo.distro
+
 	@property
 	def cache(self):
 		return self.repo.cache
@@ -204,6 +208,8 @@ class IndexSolv(Index):
 
 	def _update_metadata(self, force, offline=False):
 		filename = os.path.join(METADATA_DOWNLOAD_PATH, METADATA_DOWNLOAD_FILE)
+		cache_filename = os.path.join("repodata", self.distro.sname, self.distro.release,
+			self.repo.name, self.distro.arch, os.path.basename(filename))
 
 		# Marker if we need to do the download.
 		download = True
@@ -213,15 +219,15 @@ class IndexSolv(Index):
 
 		if not force:
 			# Check if file does exists and is not too old.
-			if self.cache.exists(filename):
-				age = self.cache.age(filename)
+			if self.cache.exists(cache_filename):
+				age = self.cache.age(cache_filename)
 				if age and age < TIME_10M:
 					download = False
 					log.debug("Metadata is recent enough. I don't download it again.")
 
 				# Open old metadata for comparison.
 				old_metadata = metadata.Metadata(self.pakfire, self,
-					self.cache.abspath(filename))
+					self.cache.abspath(cache_filename))
 
 			# If no metadata was downloaded and we are in offline mode.
 			elif offline:
@@ -258,21 +264,23 @@ class IndexSolv(Index):
 				else:
 					# We explicitely rewrite the metadata if it is equal to have
 					# a new timestamp and do not download it over and over again.
-					with self.cache.open(filename, "w") as o:
+					with self.cache.open(cache_filename, "w") as o:
 						o.write(data)
 
 		# Parse the metadata that we just downloaded or load it from cache.
 		self.metadata = metadata.Metadata(self.pakfire, self,
-			self.cache.abspath(filename))
+			self.cache.abspath(cache_filename))
 
 	def _update_database(self, force, offline=False):
 		if not hasattr(self, "metadata"):
 			return
 
 		# Construct cache and download filename.
+		cache_filename = os.path.join("repodata", self.distro.sname, self.distro.release,
+			self.repo.name, self.distro.arch, "database", self.metadata.database)
 		filename = os.path.join(METADATA_DOWNLOAD_PATH, self.metadata.database)
 
-		if not self.cache.exists(filename):
+		if not self.cache.exists(cache_filename):
 			if offline:
 				# If there is not database and we are in offline mode, we cannot
 				# download anything so we just skip the rest of this function.
@@ -297,7 +305,7 @@ class IndexSolv(Index):
 					algo=self.metadata.database_compression)
 
 			# Make a new file in the cache.
-			cacheobj = self.cache.open(filename, "w")
+			cacheobj = self.cache.open(cache_filename, "w")
 
 			try:
 				while True:
@@ -322,17 +330,17 @@ class IndexSolv(Index):
 				urlobj.close()
 
 			# check the hashsum of the downloaded file
-			if not util.calc_hash1(self.cache.abspath(filename)) == self.metadata.database_hash1:
-				# XXX an exception is not a very good idea because this file could
-				# be downloaded from another mirror. need a better way to handle this.
-
-				# Remove bad file from cache.
-				self.cache.remove(filename)
-
-				raise Exception, "Downloaded file did not match the hashsum. Need to re-download it."
+			#if self.cache.verify(self.cache.abspath(cache_filename), self.metadata.database_hash1):
+			#	# XXX an exception is not a very good idea because this file could
+			#	# be downloaded from another mirror. need a better way to handle this.
+			#
+			#	# Remove bad file from cache.
+			#	self.cache.remove(cache_filename)
+			#
+			#	raise Exception, "Downloaded file did not match the hashsum. Need to re-download it."
 
 		# (Re-)open the database.
-		self.read(self.cache.abspath(filename))
+		self.read(self.cache.abspath(cache_filename))
 
 
 class IndexDir(Index):
diff --git a/python/pakfire/repository/remote.py b/python/pakfire/repository/remote.py
index 5118cc42b..e707c59d1 100644
--- a/python/pakfire/repository/remote.py
+++ b/python/pakfire/repository/remote.py
@@ -79,15 +79,7 @@ class RepositorySolv(base.RepositoryFactory):
 		# Marker, if we need to download the package.
 		download = True
 
-		cache_prefix = ""
-		if filename.endswith(PACKAGE_EXTENSION):
-			cache_prefix = "packages"
-		elif filename == METADATA_DOWNLOAD_FILE:
-			cache_prefix = "repodata"
-		elif filename.endswith(METADATA_DATABASE_FILE):
-			cache_prefix = "repodata"
-
-		cache_filename = os.path.join(cache_prefix, os.path.basename(filename))
+		cache_filename = pkg.cache_filename
 
 		# Check if file already exists in cache.
 		if self.cache.exists(cache_filename):
@@ -101,7 +93,17 @@ class RepositorySolv(base.RepositoryFactory):
 				# The file in cache has a wrong hash. Remove it and repeat download.
 				cache.remove(cache_filename)
 
-		if download:
+		# Get a package grabber and add mirror download capabilities to it.
+		grabber = downloader.PackageDownloader(
+			self.pakfire,
+			text=text + os.path.basename(filename),
+		)
+		grabber = self.mirrors.group(grabber)
+
+		# Make sure filename is of type string (and not unicode)
+		filename = str(filename)
+
+		while download:
 			log.debug("Going to download %s" % filename)
 
 			# If we are in offline mode, we cannot download any files.
@@ -109,16 +111,6 @@ class RepositorySolv(base.RepositoryFactory):
 				raise OfflineModeError, _("Cannot download this file in offline mode: %s") \
 					% filename
 
-			# Make sure filename is of type string (and not unicode)
-			filename = str(filename)
-
-			# Get a package grabber and add mirror download capabilities to it.
-			grabber = downloader.PackageDownloader(
-				self.pakfire,
-				text=text + os.path.basename(filename),
-			)
-			grabber = self.mirrors.group(grabber)
-
 			i = grabber.urlopen(filename)
 
 			# Open input and output files and download the file.
@@ -132,9 +124,15 @@ class RepositorySolv(base.RepositoryFactory):
 			i.close()
 			o.close()
 
-			# Verify if the download was okay.
-			if hash1 and not self.cache.verify(cache_filename, hash1):
-				raise Exception, "XXX this should never happen..."
+			if self.cache.verify(cache_filename, hash1):
+				log.debug("Successfully downloaded %s (%s)." % (filename, hash1))
+				break
+
+			log.warning(_("The checksum of the downloaded file did not match."))
+			log.warning(_("Trying an other mirror."))
+
+			# Go to the next mirror.
+			grabber.increment_mirror()
 
 		return os.path.join(self.cache.path, cache_filename)
 
-- 
2.39.5