[pakfire.git] / python / pakfire / downloader.py

#!/usr/bin/python
###############################################################################
#                                                                             #
# Pakfire - The IPFire package management system                              #
# Copyright (C) 2011 Pakfire development team                                 #
#                                                                             #
# This program is free software: you can redistribute it and/or modify        #
# it under the terms of the GNU General Public License as published by        #
# the Free Software Foundation, either version 3 of the License, or           #
# (at your option) any later version.                                         #
#                                                                             #
# This program is distributed in the hope that it will be useful,             #
# but WITHOUT ANY WARRANTY; without even the implied warranty of              #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               #
# GNU General Public License for more details.                                #
#                                                                             #
# You should have received a copy of the GNU General Public License           #
# along with this program.  If not, see <http://www.gnu.org/licenses/>.       #
#                                                                             #
###############################################################################

import json
import os
import pycurl
import random

import logging
log = logging.getLogger("pakfire")

from config import _Config

import urlgrabber.grabber
from urlgrabber.grabber import URLGrabber, URLGrabError
from urlgrabber.mirror import MirrorGroup
from urlgrabber.progress import TextMeter

from pakfire.constants import *
from pakfire.i18n import _

class PakfireGrabber(URLGrabber):
	"""
		Class to make some modifications on the urlgrabber configuration.
	"""
	def __init__(self, pakfire, *args, **kwargs):
		kwargs.update({
			"quote" : 0,
			"user_agent" : "pakfire/%s" % PAKFIRE_VERSION,

			"ssl_verify_host" : False,
			"ssl_verify_peer" : False,
		})

		if isinstance(pakfire, _Config):
			config = pakfire
		else:
			config = pakfire.config
		self.config = config

		# Set throttle setting.
		bandwidth_throttle = config.get("downloader", "bandwidth_throttle")
		if bandwidth_throttle:
			try:
				bandwidth_throttle = int(bandwidth_throttle)
			except ValueError:
				log.error("Configuration value for bandwidth_throttle is invalid.")
				bandwidth_throttle = 0

			kwargs.update({ "throttle" : bandwidth_throttle })

		# Configure HTTP proxy.
		http_proxy = config.get("downloader", "http_proxy")
		if http_proxy:
			kwargs.update({ "proxies" : { "http" : http_proxy, "https" : http_proxy }})

		URLGrabber.__init__(self, *args, **kwargs)

	def fork(self):
		"""
			Reset Curl object after forking a process.
		"""
		# XXX this is a very ugly hack and fiddles around with the internals
		# or urlgrabber. We should not touch these, but apparently nobody
		# else uses multiple threads or processes to talk to their servers.
		# So we simply replace Curl with a new instance without closing
		# the old one. This should be fixed in urlgrabber and/or pycurl.
		urlgrabber.grabber._curl_cache = pycurl.Curl()

	def check_offline_mode(self):
		offline = self.config.get("downloader", "offline")
		if not offline:
			return

		raise OfflineModeError

	def urlread(self, filename, *args, **kwargs):
		self.check_offline_mode()

		# This is for older versions of urlgrabber which are packaged in Debian
		# and Ubuntu and cannot handle filenames as a normal Python string but need
		# a unicode string.
		return URLGrabber.urlread(self, filename.encode("utf-8"), *args, **kwargs)

	def urlopen(self, filename, *args, **kwargs):
		self.check_offline_mode()

		# This is for older versions of urlgrabber which are packaged in Debian
		# and Ubuntu and cannot handle filenames as a normal Python string but need
		# a unicode string.
		return URLGrabber.urlopen(self, filename.encode("utf-8"), *args, **kwargs)

	def urlgrab(self, url, *args, **kwargs):
		self.check_offline_mode()

		# This is for older versions of urlgrabber which are packaged in Debian
		# and Ubuntu and cannot handle filenames as a normal Python string but need
		# a unicode string.
		return URLGrabber.urlgrab(self, url.encode("utf-8"), *args, **kwargs)


class PackageDownloader(PakfireGrabber):
	def __init__(self, pakfire, *args, **kwargs):
		kwargs.update({
			"progress_obj" : TextMeter(),
		})

		PakfireGrabber.__init__(self, pakfire, *args, **kwargs)


class MetadataDownloader(PakfireGrabber):
	def __init__(self, pakfire, *args, **kwargs):
		kwargs.update({
			"http_headers" : (('Pragma', 'no-cache'),),
		})

		PakfireGrabber.__init__(self, pakfire, *args, **kwargs)


class DatabaseDownloader(PackageDownloader):
	def __init__(self, pakfire, *args, **kwargs):
		kwargs.update({
			"http_headers" : (('Pragma', 'no-cache'),),
		})

		PackageDownloader.__init__(self, pakfire, *args, **kwargs)


class SourceDownloader(object):
	def __init__(self, pakfire, mirrors=None):
		self.pakfire = pakfire

		self.grabber = PakfireGrabber(
			self.pakfire,
			progress_obj = TextMeter(),
		)

		if mirrors:
			self.grabber = MirrorGroup(self.grabber,
				[{ "mirror" : m.encode("utf-8") } for m in mirrors])

	def download(self, files):
		existant_files = []
		download_files = []

		for file in files:
			filename = os.path.join(SOURCE_CACHE_DIR, file)
			log.debug("Checking existance of %s..." % filename)

			if os.path.exists(filename) and os.path.getsize(filename):
				log.debug("...exists!")
				existant_files.append(filename)
			else:
				log.debug("...does not exist!")
				download_files.append(filename)

		if download_files:
			log.info(_("Downloading source files:"))

			if self.pakfire.offline:
				raise OfflineModeError, _("Cannot download source code in offline mode.")

			# Create source download directory.
			if not os.path.exists(SOURCE_CACHE_DIR):
				os.makedirs(SOURCE_CACHE_DIR)

			for filename in download_files:
				try:
					self.grabber.urlgrab(os.path.basename(filename), filename=filename)
				except URLGrabError, e:
					# Remove partly downloaded file.
					try:
						os.unlink(filename)
					except OSError:
						pass

					raise DownloadError, "%s %s" % (os.path.basename(filename), e)

				# Check if the downloaded file was empty.
				if os.path.getsize(filename) == 0:
					# Remove the file and raise an error.
					os.unlink(filename)

					raise DownloadError, _("Downloaded empty file: %s") \
						% os.path.basename(filename)

			log.info("")

		return existant_files + download_files


class Mirror(object):
	def __init__(self, url, location=None, preferred=False):
		# Save URL of the mirror in full format
		self.url = url

		# Save the location (if given)
		self.location = location

		# Save preference
		self.preferred = False


class MirrorList(object):
	def __init__(self, pakfire, repo, mirrorlist):
		self.pakfire = pakfire
		self.repo = repo

		self.__mirrors = []

		# Save URL to more mirrors.
		self.mirrorlist = mirrorlist

	@property
	def base_mirror(self):
		if not self.repo.baseurl:
			return

		return Mirror(self.repo.baseurl, preferred=False)

	@property
	def distro(self):
		return self.repo.distro

	@property
	def cache(self):
		"""
			Shortcut to cache from repository.
		"""
		return self.repo.cache

	def update(self, force=False):
		# XXX should this be allowed?
		if not self.mirrorlist:
			return 

		# If the system is not online, we cannot download anything.
		if self.pakfire.offline:
			return

		log.debug("Updating mirrorlist for repository '%s' (force=%s)" % (self.repo.name, force))
		cache_filename = os.path.join("repodata", self.distro.sname, self.distro.release,
			self.repo.name, self.distro.arch, "mirrors")

		# Force the update if no mirrorlist is available.
		if not self.cache.exists(cache_filename):
			force = True

		if not force and self.cache.exists(cache_filename):
			age = self.cache.age(cache_filename)

			# If the age could be determined and is higher than 24h,
			# we force an update.
			if age and age > TIME_24H:
				force = True

		if force:
			g = MetadataDownloader(self.pakfire)

			try:
				mirrordata = g.urlread(self.mirrorlist, limit=MIRRORLIST_MAXSIZE)
			except URLGrabError, e:
				log.warning("Could not update the mirrorlist for repo '%s': %s" % (self.repo.name, e))
				return

			# XXX check for empty files or damaged output

			# Save new mirror data to cache.
			f = self.cache.open(cache_filename, "w")
			f.write(mirrordata)
			f.close()

		# Read mirrorlist from cache and parse it.
		self.forget_mirrors()
		with self.cache.open(cache_filename) as f:
			self.parse_mirrordata(f.read())

	def parse_mirrordata(self, data):
		data = json.loads(data)

		for mirror in data["mirrors"]:
			self.add_mirror(**mirror)

	def add_mirror(self, *args, **kwargs):
		mirror = Mirror(*args, **kwargs)

		self.__mirrors.append(mirror)

	def forget_mirrors(self):
		self.__mirrors = []

	@property
	def preferred(self):
		"""
			Return a generator for all mirrors that are preferred.
		"""
		for mirror in self.__mirrors:
			if mirror.preferred:
				yield mirror

	@property
	def non_preferred(self):
		"""
			Return a generator for all mirrors that are not preferred.
		"""
		for mirror in self.__mirrors:
			if not mirror.preferred:
				yield mirror

	@property
	def all(self):
		"""
			Return a generator for all mirrors.
		"""
		for mirror in self.__mirrors:
			yield mirror

	def group(self, grabber):
		"""
			Return a MirrorGroup object for the given grabber.
		"""
		# Make sure the mirrorlist is up to date.
		self.update()

		# A list of mirrors that is passed to MirrorGroup.
		mirrors = []

		# Add all preferred mirrors at the first place and shuffle them
		# that we will start at a random place.
		for mirror in self.preferred:
			mirrors.append({ "mirror" : mirror.url.encode("utf-8") })
		random.shuffle(mirrors)

		# All other mirrors are added as well and will only be used if all
		# preferred mirrors did not work.
		for mirror in self.all:
			mirror = { "mirror" : mirror.url.encode("utf-8") }
			if mirror in mirrors:
				continue

			mirrors.append(mirror)

		# Always add the base mirror if any.
		base_mirror = self.base_mirror
		if base_mirror:
			mirror = { "mirror" : base_mirror.url.encode("utf-8") }
			if not mirror in mirrors:
				mirrors.append(mirror)

		return MirrorGroup(grabber, mirrors)


class Downloader(object):
	def __init__(self, mirrors, files):
		self.grabber = PakfireGrabber()

		self.mirrorgroup = mirrors.group(self.grabber)
Commit	Line	Data
1de8761d	1	#!/usr/bin/python
b792d887 MT	2	###############################################################################
	3	# #
	4	# Pakfire - The IPFire package management system #
	5	# Copyright (C) 2011 Pakfire development team #
	6	# #
	7	# This program is free software: you can redistribute it and/or modify #
	8	# it under the terms of the GNU General Public License as published by #
	9	# the Free Software Foundation, either version 3 of the License, or #
	10	# (at your option) any later version. #
	11	# #
	12	# This program is distributed in the hope that it will be useful, #
	13	# but WITHOUT ANY WARRANTY; without even the implied warranty of #
	14	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
	15	# GNU General Public License for more details. #
	16	# #
	17	# You should have received a copy of the GNU General Public License #
	18	# along with this program. If not, see <http://www.gnu.org/licenses/>. #
	19	# #
	20	###############################################################################
1de8761d MT	21
1de8761d MT	22	import json
062699ee	23	import os
aa14071d	24	import pycurl
4f91860e	25	import random
1de8761d	26
8b6bc023 MT	27	import logging
	28	log = logging.getLogger("pakfire")
	29
a6bd96bc	30	from config import _Config
e57c5475	31
aa14071d	32	import urlgrabber.grabber
1de8761d	33	from urlgrabber.grabber import URLGrabber, URLGrabError
4f91860e	34	from urlgrabber.mirror import MirrorGroup
14ea3228	35	from urlgrabber.progress import TextMeter
1de8761d	36
a2d1644c	37	from pakfire.constants import *
062699ee	38	from pakfire.i18n import _
1de8761d MT	39
	40	class PakfireGrabber(URLGrabber):
	41	"""
	42	Class to make some modifications on the urlgrabber configuration.
	43	"""
80104a80	44	def __init__(self, pakfire, args, *kwargs):
14ea3228 MT	45	kwargs.update({
	46	"quote" : 0,
	47	"user_agent" : "pakfire/%s" % PAKFIRE_VERSION,
a6bd96bc MT	48
	49	"ssl_verify_host" : False,
	50	"ssl_verify_peer" : False,
14ea3228 MT	51	})
14ea3228 MT	52
a6bd96bc	53	if isinstance(pakfire, _Config):
e57c5475 MT	54	config = pakfire
	55	else:
	56	config = pakfire.config
98733451	57	self.config = config
6a509182	58
cfc16a71	59	# Set throttle setting.
a6bd96bc	60	bandwidth_throttle = config.get("downloader", "bandwidth_throttle")
80104a80 MT	61	if bandwidth_throttle:
	62	try:
	63	bandwidth_throttle = int(bandwidth_throttle)
	64	except ValueError:
8b6bc023	65	log.error("Configuration value for bandwidth_throttle is invalid.")
80104a80 MT	66	bandwidth_throttle = 0
	67
	68	kwargs.update({ "throttle" : bandwidth_throttle })
	69
cfc16a71	70	# Configure HTTP proxy.
a6bd96bc	71	http_proxy = config.get("downloader", "http_proxy")
cfc16a71	72	if http_proxy:
c611f46b	73	kwargs.update({ "proxies" : { "http" : http_proxy, "https" : http_proxy }})
cfc16a71	74
14ea3228 MT	75	URLGrabber.__init__(self, args, *kwargs)
14ea3228 MT	76
aa14071d MT	77	def fork(self):
	78	"""
	79	Reset Curl object after forking a process.
	80	"""
	81	# XXX this is a very ugly hack and fiddles around with the internals
	82	# or urlgrabber. We should not touch these, but apparently nobody
	83	# else uses multiple threads or processes to talk to their servers.
	84	# So we simply replace Curl with a new instance without closing
	85	# the old one. This should be fixed in urlgrabber and/or pycurl.
	86	urlgrabber.grabber._curl_cache = pycurl.Curl()
	87
98733451 MT	88	def check_offline_mode(self):
	89	offline = self.config.get("downloader", "offline")
	90	if not offline:
	91	return
	92
	93	raise OfflineModeError
	94
4efe0da7	95	def urlread(self, filename, args, *kwargs):
98733451 MT	96	self.check_offline_mode()
98733451 MT	97
4efe0da7 MT	98	# This is for older versions of urlgrabber which are packaged in Debian
	99	# and Ubuntu and cannot handle filenames as a normal Python string but need
	100	# a unicode string.
	101	return URLGrabber.urlread(self, filename.encode("utf-8"), args, *kwargs)
	102
0f8d6745	103	def urlopen(self, filename, args, *kwargs):
98733451 MT	104	self.check_offline_mode()
98733451 MT	105
bbd51f58 MT	106	# This is for older versions of urlgrabber which are packaged in Debian
	107	# and Ubuntu and cannot handle filenames as a normal Python string but need
	108	# a unicode string.
	109	return URLGrabber.urlopen(self, filename.encode("utf-8"), args, *kwargs)
	110
	111	def urlgrab(self, url, args, *kwargs):
	112	self.check_offline_mode()
0f8d6745	113
bbd51f58 MT	114	# This is for older versions of urlgrabber which are packaged in Debian
	115	# and Ubuntu and cannot handle filenames as a normal Python string but need
	116	# a unicode string.
	117	return URLGrabber.urlgrab(self, url.encode("utf-8"), args, *kwargs)
0f8d6745	118
14ea3228 MT	119
14ea3228 MT	120	class PackageDownloader(PakfireGrabber):
80104a80	121	def __init__(self, pakfire, args, *kwargs):
14ea3228	122	kwargs.update({
ca38a577	123	"progress_obj" : TextMeter(),
14ea3228 MT	124	})
14ea3228 MT	125
80104a80	126	PakfireGrabber.__init__(self, pakfire, args, *kwargs)
14ea3228 MT	127
	128
	129	class MetadataDownloader(PakfireGrabber):
80104a80	130	def __init__(self, pakfire, args, *kwargs):
14ea3228 MT	131	kwargs.update({
	132	"http_headers" : (('Pragma', 'no-cache'),),
	133	})
	134
80104a80	135	PakfireGrabber.__init__(self, pakfire, args, *kwargs)
14ea3228 MT	136
	137
	138	class DatabaseDownloader(PackageDownloader):
80104a80	139	def __init__(self, pakfire, args, *kwargs):
14ea3228 MT	140	kwargs.update({
	141	"http_headers" : (('Pragma', 'no-cache'),),
	142	})
	143
80104a80	144	PackageDownloader.__init__(self, pakfire, args, *kwargs)
1de8761d	145
4f91860e	146
062699ee MT	147	class SourceDownloader(object):
	148	def __init__(self, pakfire, mirrors=None):
	149	self.pakfire = pakfire
	150
	151	self.grabber = PakfireGrabber(
	152	self.pakfire,
	153	progress_obj = TextMeter(),
	154	)
	155
	156	if mirrors:
	157	self.grabber = MirrorGroup(self.grabber,
4efe0da7	158	[{ "mirror" : m.encode("utf-8") } for m in mirrors])
062699ee MT	159
	160	def download(self, files):
	161	existant_files = []
	162	download_files = []
	163
	164	for file in files:
	165	filename = os.path.join(SOURCE_CACHE_DIR, file)
b76f5f47	166	log.debug("Checking existance of %s..." % filename)
062699ee	167
9ddb19b9	168	if os.path.exists(filename) and os.path.getsize(filename):
b76f5f47	169	log.debug("...exists!")
062699ee MT	170	existant_files.append(filename)
062699ee MT	171	else:
b76f5f47	172	log.debug("...does not exist!")
062699ee MT	173	download_files.append(filename)
	174
	175	if download_files:
8b6bc023	176	log.info(_("Downloading source files:"))
062699ee	177
98733451 MT	178	if self.pakfire.offline:
	179	raise OfflineModeError, _("Cannot download source code in offline mode.")
	180
062699ee MT	181	# Create source download directory.
	182	if not os.path.exists(SOURCE_CACHE_DIR):
	183	os.makedirs(SOURCE_CACHE_DIR)
	184
	185	for filename in download_files:
	186	try:
	187	self.grabber.urlgrab(os.path.basename(filename), filename=filename)
	188	except URLGrabError, e:
08de9306 MT	189	# Remove partly downloaded file.
	190	try:
	191	os.unlink(filename)
	192	except OSError:
	193	pass
	194
062699ee MT	195	raise DownloadError, "%s %s" % (os.path.basename(filename), e)
062699ee MT	196
9ddb19b9 MT	197	# Check if the downloaded file was empty.
	198	if os.path.getsize(filename) == 0:
	199	# Remove the file and raise an error.
	200	os.unlink(filename)
	201
	202	raise DownloadError, _("Downloaded empty file: %s") \
	203	% os.path.basename(filename)
	204
8b6bc023	205	log.info("")
062699ee MT	206
	207	return existant_files + download_files
	208
	209
1de8761d	210	class Mirror(object):
4f91860e	211	def __init__(self, url, location=None, preferred=False):
1de8761d	212	# Save URL of the mirror in full format
4f91860e	213	self.url = url
1de8761d MT	214
	215	# Save the location (if given)
	216	self.location = location
	217
	218	# Save preference
	219	self.preferred = False
	220
	221
	222	class MirrorList(object):
0f8d6745	223	def __init__(self, pakfire, repo, mirrorlist):
1de8761d MT	224	self.pakfire = pakfire
	225	self.repo = repo
	226
	227	self.__mirrors = []
	228
	229	# Save URL to more mirrors.
0f8d6745 MT	230	self.mirrorlist = mirrorlist
	231
	232	@property
	233	def base_mirror(self):
	234	if not self.repo.baseurl:
	235	return
	236
	237	return Mirror(self.repo.baseurl, preferred=False)
1de8761d	238
5a99898b MT	239	@property
	240	def distro(self):
	241	return self.repo.distro
	242
1de8761d MT	243	@property
	244	def cache(self):
	245	"""
	246	Shortcut to cache from repository.
	247	"""
	248	return self.repo.cache
	249
	250	def update(self, force=False):
	251	# XXX should this be allowed?
	252	if not self.mirrorlist:
	253	return
	254
c07a3ca7 MT	255	# If the system is not online, we cannot download anything.
	256	if self.pakfire.offline:
	257	return
	258
8b6bc023	259	log.debug("Updating mirrorlist for repository '%s' (force=%s)" % (self.repo.name, force))
5a99898b MT	260	cache_filename = os.path.join("repodata", self.distro.sname, self.distro.release,
5a99898b MT	261	self.repo.name, self.distro.arch, "mirrors")
1de8761d MT	262
	263	# Force the update if no mirrorlist is available.
	264	if not self.cache.exists(cache_filename):
	265	force = True
	266
	267	if not force and self.cache.exists(cache_filename):
	268	age = self.cache.age(cache_filename)
	269
	270	# If the age could be determined and is higher than 24h,
	271	# we force an update.
	272	if age and age > TIME_24H:
	273	force = True
	274
	275	if force:
80104a80	276	g = MetadataDownloader(self.pakfire)
1de8761d MT	277
	278	try:
	279	mirrordata = g.urlread(self.mirrorlist, limit=MIRRORLIST_MAXSIZE)
	280	except URLGrabError, e:
8b6bc023	281	log.warning("Could not update the mirrorlist for repo '%s': %s" % (self.repo.name, e))
1de8761d MT	282	return
	283
	284	# XXX check for empty files or damaged output
	285
	286	# Save new mirror data to cache.
	287	f = self.cache.open(cache_filename, "w")
	288	f.write(mirrordata)
	289	f.close()
	290
	291	# Read mirrorlist from cache and parse it.
90919c62	292	self.forget_mirrors()
1de8761d MT	293	with self.cache.open(cache_filename) as f:
	294	self.parse_mirrordata(f.read())
	295
	296	def parse_mirrordata(self, data):
	297	data = json.loads(data)
	298
	299	for mirror in data["mirrors"]:
	300	self.add_mirror(**mirror)
	301
	302	def add_mirror(self, args, *kwargs):
	303	mirror = Mirror(args, *kwargs)
	304
	305	self.__mirrors.append(mirror)
	306
90919c62 MT	307	def forget_mirrors(self):
	308	self.__mirrors = []
	309
1de8761d MT	310	@property
	311	def preferred(self):
	312	"""
	313	Return a generator for all mirrors that are preferred.
	314	"""
	315	for mirror in self.__mirrors:
	316	if mirror.preferred:
	317	yield mirror
	318
4f91860e MT	319	@property
	320	def non_preferred(self):
	321	"""
	322	Return a generator for all mirrors that are not preferred.
	323	"""
	324	for mirror in self.__mirrors:
	325	if not mirror.preferred:
	326	yield mirror
	327
1de8761d MT	328	@property
	329	def all(self):
	330	"""
	331	Return a generator for all mirrors.
	332	"""
	333	for mirror in self.__mirrors:
	334	yield mirror
	335
4f91860e MT	336	def group(self, grabber):
	337	"""
	338	Return a MirrorGroup object for the given grabber.
	339	"""
90919c62 MT	340	# Make sure the mirrorlist is up to date.
	341	self.update()
	342
4f91860e MT	343	# A list of mirrors that is passed to MirrorGroup.
	344	mirrors = []
	345
	346	# Add all preferred mirrors at the first place and shuffle them
	347	# that we will start at a random place.
	348	for mirror in self.preferred:
9a1da36a	349	mirrors.append({ "mirror" : mirror.url.encode("utf-8") })
4f91860e MT	350	random.shuffle(mirrors)
	351
	352	# All other mirrors are added as well and will only be used if all
	353	# preferred mirrors did not work.
	354	for mirror in self.all:
9a1da36a MT	355	mirror = { "mirror" : mirror.url.encode("utf-8") }
9a1da36a MT	356	if mirror in mirrors:
4f91860e MT	357	continue
4f91860e MT	358
9a1da36a MT	359	mirrors.append(mirror)
	360
	361	# Always add the base mirror if any.
	362	base_mirror = self.base_mirror
	363	if base_mirror:
	364	mirror = { "mirror" : base_mirror.url.encode("utf-8") }
	365	if not mirror in mirrors:
	366	mirrors.append(mirror)
4f91860e MT	367
	368	return MirrorGroup(grabber, mirrors)
	369
	370
	371
	372	class Downloader(object):
	373	def __init__(self, mirrors, files):
	374	self.grabber = PakfireGrabber()
	375
	376	self.mirrorgroup = mirrors.group(self.grabber)
	377
	378