From: Michael Tremer <michael.tremer@ipfire.org>
Date: Mon, 4 Mar 2024 12:20:10 +0000 (+0000)
Subject: importer: Merge the downloader into our main downloader
X-Git-Tag: 0.9.18~105
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=335946c062fd680735b3f7b303130097b09f2c55;p=location%2Flibloc.git

importer: Merge the downloader into our main downloader

I don't know why we ended up with duplicated code here, but there seems
to be no reason whatsoever for this.

Signed-off-by: Michael Tremer <michael.tremer@ipfire.org>
---

diff --git a/Makefile.am b/Makefile.am
index 6986afe..a7d1c4f 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -194,7 +194,6 @@ dist_pkgpython_PYTHON = \
 	src/python/location/downloader.py \
 	src/python/location/export.py \
 	src/python/location/i18n.py \
-	src/python/location/importer.py \
 	src/python/location/logger.py
 
 pyexec_LTLIBRARIES = \
diff --git a/src/python/location/downloader.py b/src/python/location/downloader.py
index 3618968..4e9e184 100644
--- a/src/python/location/downloader.py
+++ b/src/python/location/downloader.py
@@ -16,6 +16,7 @@
 #                                                                             #
 ###############################################################################
 
+import gzip
 import logging
 import lzma
 import os
@@ -207,3 +208,56 @@ class Downloader(object):
 				return False
 
 		return True
+
+	def retrieve(self, url, **kwargs):
+		"""
+			This method will fetch the content at the given URL
+			and will return a file-object to a temporary file.
+
+			If the content was compressed, it will be decompressed on the fly.
+		"""
+		# Open a temporary file to buffer the downloaded content
+		t = tempfile.SpooledTemporaryFile(max_size=100 * 1024 * 1024)
+
+		# Create a new request
+		req = self._make_request(url, **kwargs)
+
+		# Send request
+		res = self._send_request(req)
+
+		# Write the payload to the temporary file
+		with res as f:
+			while True:
+				buf = f.read(65536)
+				if not buf:
+					break
+
+				t.write(buf)
+
+		# Rewind the temporary file
+		t.seek(0)
+
+		gzip_compressed = False
+
+		# Fetch the content type
+		content_type = res.headers.get("Content-Type")
+
+		# Decompress any gzipped response on the fly
+		if content_type in ("application/x-gzip", "application/gzip"):
+			gzip_compressed = True
+
+		# Check for the gzip magic in case web servers send a different MIME type
+		elif t.read(2) == b"\x1f\x8b":
+			gzip_compressed = True
+
+		# Reset again
+		t.seek(0)
+
+		# Decompress the temporary file
+		if gzip_compressed:
+			log.debug("Gzip compression detected")
+
+			t = gzip.GzipFile(fileobj=t, mode="rb")
+
+		# Return the temporary file handle
+		return t
diff --git a/src/python/location/importer.py b/src/python/location/importer.py
deleted file mode 100644
index 58ec368..0000000
--- a/src/python/location/importer.py
+++ /dev/null
@@ -1,101 +0,0 @@
-###############################################################################
-#                                                                             #
-# libloc - A library to determine the location of someone on the Internet     #
-#                                                                             #
-# Copyright (C) 2020 IPFire Development Team <info@ipfire.org>                #
-#                                                                             #
-# This library is free software; you can redistribute it and/or               #
-# modify it under the terms of the GNU Lesser General Public                  #
-# License as published by the Free Software Foundation; either                #
-# version 2.1 of the License, or (at your option) any later version.          #
-#                                                                             #
-# This library is distributed in the hope that it will be useful,             #
-# but WITHOUT ANY WARRANTY; without even the implied warranty of              #
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU           #
-# Lesser General Public License for more details.                             #
-#                                                                             #
-###############################################################################
-
-import gzip
-import logging
-import tempfile
-import urllib.request
-
-# Initialise logging
-log = logging.getLogger("location.importer")
-log.propagate = 1
-
-class Downloader(object):
-	def __init__(self):
-		self.proxy = None
-
-	def set_proxy(self, url):
-		"""
-			Sets a HTTP proxy that is used to perform all requests
-		"""
-		log.info("Using proxy %s" % url)
-		self.proxy = url
-
-	def retrieve(self, url, **kwargs):
-		"""
-			This method will fetch the content at the given URL
-			and will return a file-object to a temporary file.
-
-			If the content was compressed, it will be decompressed on the fly.
-		"""
-		# Open a temporary file to buffer the downloaded content
-		t = tempfile.SpooledTemporaryFile(max_size=100 * 1024 * 1024)
-
-		# Create a new request
-		req = urllib.request.Request(url, **kwargs)
-
-		# Configure proxy
-		if self.proxy:
-			req.set_proxy(self.proxy, "http")
-
-		log.info("Retrieving %s..." % req.full_url)
-
-		# Send request
-		res = urllib.request.urlopen(req)
-
-		# Log the response headers
-		log.debug("Response Headers:")
-		for header in res.headers:
-			log.debug("	%s: %s" % (header, res.headers[header]))
-
-		# Write the payload to the temporary file
-		with res as f:
-			while True:
-				buf = f.read(65536)
-				if not buf:
-					break
-
-				t.write(buf)
-
-		# Rewind the temporary file
-		t.seek(0)
-
-		gzip_compressed = False
-
-		# Fetch the content type
-		content_type = res.headers.get("Content-Type")
-
-		# Decompress any gzipped response on the fly
-		if content_type in ("application/x-gzip", "application/gzip"):
-			gzip_compressed = True
-
-		# Check for the gzip magic in case web servers send a different MIME type
-		elif t.read(2) == b"\x1f\x8b":
-			gzip_compressed = True
-
-		# Reset again
-		t.seek(0)
-
-		# Decompress the temporary file
-		if gzip_compressed:
-			log.debug("Gzip compression detected")
-
-			t = gzip.GzipFile(fileobj=t, mode="rb")
-
-		# Return the temporary file handle
-		return t
diff --git a/src/scripts/location-importer.in b/src/scripts/location-importer.in
index eb14246..5b6ffad 100644
--- a/src/scripts/location-importer.in
+++ b/src/scripts/location-importer.in
@@ -33,7 +33,7 @@ import urllib.error
 # Load our location module
 import location
 import location.database
-import location.importer
+from location.downloader import Downloader
 from location.i18n import _
 
 # Initialise logging
@@ -162,6 +162,9 @@ class CLI(object):
 		# Parse command line arguments
 		args = self.parse_cli()
 
+		# Initialize the downloader
+		self.downloader = Downloader()
+
 		# Initialise database
 		self.db = self._setup_database(args)
 
@@ -689,8 +692,6 @@ class CLI(object):
 			writer.write(file)
 
 	def handle_update_whois(self, ns):
-		downloader = location.importer.Downloader()
-
 		# Did we run successfully?
 		success = True
 
@@ -749,7 +750,7 @@ class CLI(object):
 				continue
 
 			try:
-				self._process_source(downloader, name, feeds, countries)
+				self._process_source(name, feeds, countries)
 
 			# Log an error but continue if an exception occurs
 			except Exception as e:
@@ -759,7 +760,7 @@ class CLI(object):
 		# Return a non-zero exit code for errors
 		return 0 if success else 1
 
-	def _process_source(self, downloader, source, feeds, countries):
+	def _process_source(self, source, feeds, countries):
 		"""
 			This function processes one source
 		"""
@@ -791,7 +792,7 @@ class CLI(object):
 			# Parse all feeds
 			for callback, url, *args in feeds:
 				# Retrieve the feed
-				f = downloader.retrieve(url)
+				f = self.downloader.retrieve(url)
 
 				# Call the callback
 				callback(source, countries, f, *args)
@@ -1599,9 +1600,6 @@ class CLI(object):
 				yield line
 
 	def handle_update_geofeeds(self, ns):
-		# Create a downloader
-		downloader = location.importer.Downloader()
-
 		# Sync geofeeds
 		with self.db.transaction():
 			# Delete all geofeeds which are no longer linked
@@ -1652,7 +1650,7 @@ class CLI(object):
 		# Update all geofeeds
 		for geofeed in geofeeds:
 			with self.db.transaction():
-				self._fetch_geofeed(downloader, geofeed)
+				self._fetch_geofeed(geofeed)
 
 		# Delete data from any feeds that did not update in the last two weeks
 		with self.db.transaction():
@@ -1672,14 +1670,14 @@ class CLI(object):
 					)
 			""")
 
-	def _fetch_geofeed(self, downloader, geofeed):
+	def _fetch_geofeed(self, geofeed):
 		log.debug("Fetching Geofeed %s" % geofeed.url)
 
 		with self.db.transaction():
 			# Open the URL
 			try:
 				# Send the request
-				f = downloader.retrieve(geofeed.url, headers={
+				f = self.downloader.retrieve(geofeed.url, headers={
 					"User-Agent" : "location/%s" % location.__version__,
 
 					# We expect some plain text file in CSV format
@@ -1897,9 +1895,6 @@ class CLI(object):
 		"""
 		success = True
 
-		# Create a downloader
-		downloader = location.importer.Downloader()
-
 		feeds = (
 			# AWS IP Ranges
 			("AWS-IP-RANGES", self._import_aws_ip_ranges, "https://ip-ranges.amazonaws.com/ip-ranges.json"),
@@ -1928,7 +1923,7 @@ class CLI(object):
 				continue
 
 			try:
-				self._process_feed(downloader, name, callback, url, *args)
+				self._process_feed(name, callback, url, *args)
 
 			# Log an error but continue if an exception occurs
 			except Exception as e:
@@ -1938,12 +1933,12 @@ class CLI(object):
 		# Return status
 		return 0 if success else 1
 
-	def _process_feed(self, downloader, name, callback, url, *args):
+	def _process_feed(self, name, callback, url, *args):
 		"""
 			Processes one feed
 		"""
 		# Open the URL
-		f = downloader.retrieve(url)
+		f = self.downloader.retrieve(url)
 
 		with self.db.transaction():
 			# Drop any previous content