Signed-off-by: Michael Tremer <michael.tremer@ipfire.org>
log.info("Using proxy %s" % url)
self.proxy = url
log.info("Using proxy %s" % url)
self.proxy = url
- def retrieve(self, url, data=None):
+ def retrieve(self, url, **kwargs):
"""
This method will fetch the content at the given URL
and will return a file-object to a temporary file.
"""
This method will fetch the content at the given URL
and will return a file-object to a temporary file.
t = tempfile.SpooledTemporaryFile(max_size=100 * 1024 * 1024)
# Create a new request
t = tempfile.SpooledTemporaryFile(max_size=100 * 1024 * 1024)
# Create a new request
- req = urllib.request.Request(url, data=data)
+ req = urllib.request.Request(url, **kwargs)
# Configure proxy
if self.proxy:
# Configure proxy
if self.proxy:
import argparse
import concurrent.futures
import csv
import argparse
import concurrent.futures
import csv
import http.client
import ipaddress
import json
import http.client
import ipaddress
import json
+ # Create a downloader
+ downloader = location.importer.Downloader()
+
+ # Pass the downloader to the fetch_geofeed function
+ fetch_geofeed = functools.partial(self._fetch_geofeed, downloader)
+
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
- results = executor.map(self._fetch_geofeed, geofeeds)
+ results = executor.map(fetch_geofeed, geofeeds)
# Fetch all results to raise any exceptions
for result in results:
# Fetch all results to raise any exceptions
for result in results:
- def _fetch_geofeed(self, geofeed):
+ def _fetch_geofeed(self, downloader, geofeed):
log.debug("Fetching Geofeed %s" % geofeed.url)
with self.db.transaction():
# Open the URL
try:
log.debug("Fetching Geofeed %s" % geofeed.url)
with self.db.transaction():
# Open the URL
try:
- req = urllib.request.Request(geofeed.url, headers={
+ # Send the request
+ f = downloader.retrieve(geofeed.url, headers={
"User-Agent" : "location/%s" % location.__version__,
# We expect some plain text file in CSV format
"User-Agent" : "location/%s" % location.__version__,
# We expect some plain text file in CSV format
- "Accept" : "text/csv, text/plain",
+ "Accept" : "text/csv, text/plain",
+ # Remove any previous data
+ self.db.execute("DELETE FROM geofeed_networks \
+ WHERE geofeed_id = %s", geofeed.id)
- # Send the request
- with urllib.request.urlopen(req, timeout=10) as f:
- # Remove any previous data
- self.db.execute("DELETE FROM geofeed_networks \
- WHERE geofeed_id = %s", geofeed.id)
+ # Read the output line by line
+ for line in f:
+ lineno += 1
- # Read the output line by line
- for line in f:
- lineno += 1
-
- try:
- line = line.decode()
+ try:
+ line = line.decode()
- # Ignore any lines we cannot decode
- except UnicodeDecodeError:
- log.debug("Could not decode line %s in %s" \
- % (lineno, geofeed.url))
- continue
+ # Ignore any lines we cannot decode
+ except UnicodeDecodeError:
+ log.debug("Could not decode line %s in %s" \
+ % (lineno, geofeed.url))
+ continue
- # Strip any newline
- line = line.rstrip()
+ # Strip any newline
+ line = line.rstrip()
- # Skip empty lines
- if not line:
- continue
+ # Skip empty lines
+ if not line:
+ continue
- # Try to parse the line
- try:
- fields = line.split(",", 5)
- except ValueError:
- log.debug("Could not parse line: %s" % line)
- continue
+ # Try to parse the line
+ try:
+ fields = line.split(",", 5)
+ except ValueError:
+ log.debug("Could not parse line: %s" % line)
+ continue
- # Check if we have enough fields
- if len(fields) < 4:
- log.debug("Not enough fields in line: %s" % line)
- continue
+ # Check if we have enough fields
+ if len(fields) < 4:
+ log.debug("Not enough fields in line: %s" % line)
+ continue
- # Fetch all fields
- network, country, region, city, = fields[:4]
+ # Fetch all fields
+ network, country, region, city, = fields[:4]
- # Try to parse the network
- try:
- network = ipaddress.ip_network(network, strict=False)
- except ValueError:
- log.debug("Could not parse network: %s" % network)
- continue
+ # Try to parse the network
+ try:
+ network = ipaddress.ip_network(network, strict=False)
+ except ValueError:
+ log.debug("Could not parse network: %s" % network)
+ continue
- # Strip any excess whitespace from country codes
- country = country.strip()
+ # Strip any excess whitespace from country codes
+ country = country.strip()
- # Make the country code uppercase
- country = country.upper()
+ # Make the country code uppercase
+ country = country.upper()
- # Check the country code
- if not country:
- log.debug("Empty country code in Geofeed %s line %s" \
- % (geofeed.url, lineno))
- continue
+ # Check the country code
+ if not country:
+ log.debug("Empty country code in Geofeed %s line %s" \
+ % (geofeed.url, lineno))
+ continue
- elif not location.country_code_is_valid(country):
- log.debug("Invalid country code in Geofeed %s:%s: %s" \
- % (geofeed.url, lineno, country))
- continue
+ elif not location.country_code_is_valid(country):
+ log.debug("Invalid country code in Geofeed %s:%s: %s" \
+ % (geofeed.url, lineno, country))
+ continue
- # Write this into the database
- self.db.execute("""
- INSERT INTO
- geofeed_networks (
- geofeed_id,
- network,
- country,
- region,
- city
- )
- VALUES (%s, %s, %s, %s, %s)""",
- geofeed.id,
- "%s" % network,
- country,
- region,
- city,
- )
+ # Write this into the database
+ self.db.execute("""
+ INSERT INTO
+ geofeed_networks (
+ geofeed_id,
+ network,
+ country,
+ region,
+ city
+ )
+ VALUES (%s, %s, %s, %s, %s)""",
+ geofeed.id,
+ "%s" % network,
+ country,
+ region,
+ city,
+ )
# Catch any HTTP errors
except urllib.request.HTTPError as e:
# Catch any HTTP errors
except urllib.request.HTTPError as e: