[pakfire.git] / pakfire / downloader.py

#!/usr/bin/python

import json
import logging
import random

from config import Config

from urlgrabber.grabber import URLGrabber, URLGrabError
from urlgrabber.mirror import MirrorGroup
from urlgrabber.progress import TextMeter

from pakfire.constants import *

class PakfireGrabber(URLGrabber):
	"""
		Class to make some modifications on the urlgrabber configuration.
	"""
	def __init__(self, pakfire, *args, **kwargs):
		kwargs.update({
			"quote" : 0,
			"user_agent" : "pakfire/%s" % PAKFIRE_VERSION,
		})

		if isinstance(pakfire, Config):
			config = pakfire
		else:
			config = pakfire.config

		if config.get("offline"):
			raise
			raise OfflineModeError, "Cannot use %s in offline mode." % self.__class__.__name__

		# Set throttle setting.
		bandwidth_throttle = config.get("bandwidth_throttle")
		if bandwidth_throttle:
			try:
				bandwidth_throttle = int(bandwidth_throttle)
			except ValueError:
				logging.error("Configuration value for bandwidth_throttle is invalid.")
				bandwidth_throttle = 0

			kwargs.update({ "throttle" : bandwidth_throttle })

		# Configure HTTP proxy.
		http_proxy = config.get("http_proxy")
		if http_proxy:
			kwargs.update({ "proxies" : { "http" : http_proxy }})

		URLGrabber.__init__(self, *args, **kwargs)


class PackageDownloader(PakfireGrabber):
	def __init__(self, pakfire, *args, **kwargs):
		kwargs.update({
				"progress_obj" : TextMeter(),
		})

		PakfireGrabber.__init__(self, pakfire, *args, **kwargs)


class MetadataDownloader(PakfireGrabber):
	def __init__(self, pakfire, *args, **kwargs):
		kwargs.update({
			"http_headers" : (('Pragma', 'no-cache'),),
		})

		PakfireGrabber.__init__(self, pakfire, *args, **kwargs)


class DatabaseDownloader(PackageDownloader):
	def __init__(self, pakfire, *args, **kwargs):
		kwargs.update({
			"http_headers" : (('Pragma', 'no-cache'),),
		})

		PackageDownloader.__init__(self, pakfire, *args, **kwargs)


class Mirror(object):
	def __init__(self, url, location=None, preferred=False):
		# Save URL of the mirror in full format
		self.url = url

		# Save the location (if given)
		self.location = location

		# Save preference
		self.preferred = False


class MirrorList(object):
	def __init__(self, pakfire, repo):
		self.pakfire = pakfire
		self.repo = repo

		self.__mirrors = []

		# Save URL to more mirrors.
		self.mirrorlist = repo.mirrorlist

		self.update(force=False)

	@property
	def cache(self):
		"""
			Shortcut to cache from repository.
		"""
		return self.repo.cache

	def update(self, force=False):
		# XXX should this be allowed?
		if not self.mirrorlist:
			return 

		logging.debug("Updating mirrorlist for repository '%s' (force=%s)" % (self.repo.name, force))

		cache_filename = "mirrors/mirrorlist"

		# Force the update if no mirrorlist is available.
		if not self.cache.exists(cache_filename):
			force = True

		if not force and self.cache.exists(cache_filename):
			age = self.cache.age(cache_filename)

			# If the age could be determined and is higher than 24h,
			# we force an update.
			if age and age > TIME_24H:
				force = True

		if force:
			g = MetadataDownloader(self.pakfire)

			try:
				mirrordata = g.urlread(self.mirrorlist, limit=MIRRORLIST_MAXSIZE)
			except URLGrabError, e:
				logging.warning("Could not update the mirrorlist for repo '%s': %s" % (self.repo.name, e))
				return

			# XXX check for empty files or damaged output

			# Save new mirror data to cache.
			f = self.cache.open(cache_filename, "w")
			f.write(mirrordata)
			f.close()

		# Read mirrorlist from cache and parse it.
		with self.cache.open(cache_filename) as f:
			self.parse_mirrordata(f.read())

	def parse_mirrordata(self, data):
		data = json.loads(data)

		for mirror in data["mirrors"]:
			self.add_mirror(**mirror)

	def add_mirror(self, *args, **kwargs):
		mirror = Mirror(*args, **kwargs)

		self.__mirrors.append(mirror)

	@property
	def preferred(self):
		"""
			Return a generator for all mirrors that are preferred.
		"""
		for mirror in self.__mirrors:
			if mirror.preferred:
				yield mirror

	@property
	def non_preferred(self):
		"""
			Return a generator for all mirrors that are not preferred.
		"""
		for mirror in self.__mirrors:
			if not mirror.preferred:
				yield mirror

	@property
	def all(self):
		"""
			Return a generator for all mirrors.
		"""
		for mirror in self.__mirrors:
			yield mirror

	def group(self, grabber):
		"""
			Return a MirrorGroup object for the given grabber.
		"""
		# A list of mirrors that is passed to MirrorGroup.
		mirrors = []

		# Add all preferred mirrors at the first place and shuffle them
		# that we will start at a random place.
		for mirror in self.preferred:
			mirrors.append(mirror.url)
		random.shuffle(mirrors)

		# All other mirrors are added as well and will only be used if all
		# preferred mirrors did not work.
		for mirror in self.all:
			if mirror.url in mirrors:
				continue

			mirrors.append({ "mirror" : mirror.url })

		return MirrorGroup(grabber, mirrors)


class Downloader(object):
	def __init__(self, mirrors, files):
		self.grabber = PakfireGrabber()

		self.mirrorgroup = mirrors.group(self.grabber)
Commit	Line	Data
1de8761d MT	1	#!/usr/bin/python
	2
	3	import json
	4	import logging
4f91860e	5	import random
1de8761d	6
e57c5475 MT	7	from config import Config
e57c5475 MT	8
1de8761d	9	from urlgrabber.grabber import URLGrabber, URLGrabError
4f91860e	10	from urlgrabber.mirror import MirrorGroup
14ea3228	11	from urlgrabber.progress import TextMeter
1de8761d	12
a2d1644c	13	from pakfire.constants import *
1de8761d MT	14
	15	class PakfireGrabber(URLGrabber):
	16	"""
	17	Class to make some modifications on the urlgrabber configuration.
	18	"""
80104a80	19	def __init__(self, pakfire, args, *kwargs):
14ea3228 MT	20	kwargs.update({
	21	"quote" : 0,
	22	"user_agent" : "pakfire/%s" % PAKFIRE_VERSION,
	23	})
	24
e57c5475 MT	25	if isinstance(pakfire, Config):
	26	config = pakfire
	27	else:
	28	config = pakfire.config
	29
6a509182 MT	30	if config.get("offline"):
	31	raise
	32	raise OfflineModeError, "Cannot use %s in offline mode." % self.__class__.__name__
	33
cfc16a71	34	# Set throttle setting.
e57c5475	35	bandwidth_throttle = config.get("bandwidth_throttle")
80104a80 MT	36	if bandwidth_throttle:
	37	try:
	38	bandwidth_throttle = int(bandwidth_throttle)
	39	except ValueError:
	40	logging.error("Configuration value for bandwidth_throttle is invalid.")
	41	bandwidth_throttle = 0
	42
	43	kwargs.update({ "throttle" : bandwidth_throttle })
	44
cfc16a71	45	# Configure HTTP proxy.
e57c5475	46	http_proxy = config.get("http_proxy")
cfc16a71 MT	47	if http_proxy:
	48	kwargs.update({ "proxies" : { "http" : http_proxy }})
	49
14ea3228 MT	50	URLGrabber.__init__(self, args, *kwargs)
	51
	52
	53	class PackageDownloader(PakfireGrabber):
80104a80	54	def __init__(self, pakfire, args, *kwargs):
14ea3228 MT	55	kwargs.update({
	56	"progress_obj" : TextMeter(),
	57	})
	58
80104a80	59	PakfireGrabber.__init__(self, pakfire, args, *kwargs)
14ea3228 MT	60
	61
	62	class MetadataDownloader(PakfireGrabber):
80104a80	63	def __init__(self, pakfire, args, *kwargs):
14ea3228 MT	64	kwargs.update({
	65	"http_headers" : (('Pragma', 'no-cache'),),
	66	})
	67
80104a80	68	PakfireGrabber.__init__(self, pakfire, args, *kwargs)
14ea3228 MT	69
	70
	71	class DatabaseDownloader(PackageDownloader):
80104a80	72	def __init__(self, pakfire, args, *kwargs):
14ea3228 MT	73	kwargs.update({
	74	"http_headers" : (('Pragma', 'no-cache'),),
	75	})
	76
80104a80	77	PackageDownloader.__init__(self, pakfire, args, *kwargs)
1de8761d	78
4f91860e	79
1de8761d	80	class Mirror(object):
4f91860e	81	def __init__(self, url, location=None, preferred=False):
1de8761d	82	# Save URL of the mirror in full format
4f91860e	83	self.url = url
1de8761d MT	84
	85	# Save the location (if given)
	86	self.location = location
	87
	88	# Save preference
	89	self.preferred = False
	90
	91
	92	class MirrorList(object):
	93	def __init__(self, pakfire, repo):
	94	self.pakfire = pakfire
	95	self.repo = repo
	96
	97	self.__mirrors = []
	98
	99	# Save URL to more mirrors.
	100	self.mirrorlist = repo.mirrorlist
	101
	102	self.update(force=False)
	103
	104	@property
	105	def cache(self):
	106	"""
	107	Shortcut to cache from repository.
	108	"""
	109	return self.repo.cache
	110
	111	def update(self, force=False):
	112	# XXX should this be allowed?
	113	if not self.mirrorlist:
	114	return
	115
	116	logging.debug("Updating mirrorlist for repository '%s' (force=%s)" % (self.repo.name, force))
	117
	118	cache_filename = "mirrors/mirrorlist"
	119
	120	# Force the update if no mirrorlist is available.
	121	if not self.cache.exists(cache_filename):
	122	force = True
	123
	124	if not force and self.cache.exists(cache_filename):
	125	age = self.cache.age(cache_filename)
	126
	127	# If the age could be determined and is higher than 24h,
	128	# we force an update.
	129	if age and age > TIME_24H:
	130	force = True
	131
	132	if force:
80104a80	133	g = MetadataDownloader(self.pakfire)
1de8761d MT	134
	135	try:
	136	mirrordata = g.urlread(self.mirrorlist, limit=MIRRORLIST_MAXSIZE)
	137	except URLGrabError, e:
	138	logging.warning("Could not update the mirrorlist for repo '%s': %s" % (self.repo.name, e))
	139	return
	140
	141	# XXX check for empty files or damaged output
	142
	143	# Save new mirror data to cache.
	144	f = self.cache.open(cache_filename, "w")
	145	f.write(mirrordata)
	146	f.close()
	147
	148	# Read mirrorlist from cache and parse it.
	149	with self.cache.open(cache_filename) as f:
	150	self.parse_mirrordata(f.read())
	151
	152	def parse_mirrordata(self, data):
	153	data = json.loads(data)
	154
	155	for mirror in data["mirrors"]:
	156	self.add_mirror(**mirror)
	157
	158	def add_mirror(self, args, *kwargs):
	159	mirror = Mirror(args, *kwargs)
	160
	161	self.__mirrors.append(mirror)
	162
	163	@property
	164	def preferred(self):
	165	"""
	166	Return a generator for all mirrors that are preferred.
	167	"""
	168	for mirror in self.__mirrors:
	169	if mirror.preferred:
	170	yield mirror
	171
4f91860e MT	172	@property
	173	def non_preferred(self):
	174	"""
	175	Return a generator for all mirrors that are not preferred.
	176	"""
	177	for mirror in self.__mirrors:
	178	if not mirror.preferred:
	179	yield mirror
	180
1de8761d MT	181	@property
	182	def all(self):
	183	"""
	184	Return a generator for all mirrors.
	185	"""
	186	for mirror in self.__mirrors:
	187	yield mirror
	188
4f91860e MT	189	def group(self, grabber):
	190	"""
	191	Return a MirrorGroup object for the given grabber.
	192	"""
	193	# A list of mirrors that is passed to MirrorGroup.
	194	mirrors = []
	195
	196	# Add all preferred mirrors at the first place and shuffle them
	197	# that we will start at a random place.
	198	for mirror in self.preferred:
	199	mirrors.append(mirror.url)
	200	random.shuffle(mirrors)
	201
	202	# All other mirrors are added as well and will only be used if all
	203	# preferred mirrors did not work.
	204	for mirror in self.all:
	205	if mirror.url in mirrors:
	206	continue
	207
60285ce1	208	mirrors.append({ "mirror" : mirror.url })
4f91860e MT	209
	210	return MirrorGroup(grabber, mirrors)
	211
	212
	213
	214	class Downloader(object):
	215	def __init__(self, mirrors, files):
	216	self.grabber = PakfireGrabber()
	217
	218	self.mirrorgroup = mirrors.group(self.grabber)
	219
	220