]> git.ipfire.org Git - people/ms/libloc.git/commitdiff
importer: Add command to import geofeeds into the database
authorMichael Tremer <michael.tremer@ipfire.org>
Mon, 26 Sep 2022 23:11:16 +0000 (23:11 +0000)
committerMichael Tremer <michael.tremer@ipfire.org>
Tue, 27 Sep 2022 16:47:17 +0000 (16:47 +0000)
Signed-off-by: Michael Tremer <michael.tremer@ipfire.org>
src/scripts/location-importer.in

index 5bd5da3612551423c6c5aeebf3c9f0a274568024..ddec3762f6b7aba5397fab8e0bb807aed255bd55 100644 (file)
@@ -18,6 +18,7 @@
 ###############################################################################
 
 import argparse
+import concurrent.futures
 import ipaddress
 import json
 import logging
@@ -95,6 +96,11 @@ class CLI(object):
                update_announcements.add_argument("server", nargs=1,
                        help=_("Route Server to connect to"), metavar=_("SERVER"))
 
+               # Update geofeeds
+               update_geofeeds = subparsers.add_parser("update-geofeeds",
+                       help=_("Update Geofeeds"))
+               update_geofeeds.set_defaults(func=self.handle_update_geofeeds)
+
                # Update overrides
                update_overrides = subparsers.add_parser("update-overrides",
                        help=_("Update overrides"),
@@ -183,6 +189,25 @@ class CLI(object):
                                CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
 
                                -- geofeeds
+                               CREATE TABLE IF NOT EXISTS geofeeds(
+                                       id serial primary key,
+                                       url text,
+                                       status integer default null,
+                                       updated_at timestamp without time zone default null
+                               );
+                               CREATE UNIQUE INDEX IF NOT EXISTS geofeeds_unique
+                                       ON geofeeds(url);
+                               CREATE TABLE IF NOT EXISTS geofeed_networks(
+                                       geofeed_id integer references geofeeds(id) on delete cascade,
+                                       network inet,
+                                       country text,
+                                       region text,
+                                       city text
+                               );
+                               CREATE INDEX IF NOT EXISTS geofeed_networks_geofeed_id
+                                       ON geofeed_networks(geofeed_id);
+                               CREATE INDEX IF NOT EXISTS geofeed_networks_search
+                                       ON geofeed_networks(network);
                                CREATE TABLE IF NOT EXISTS network_geofeeds(network inet, url text);
                                CREATE UNIQUE INDEX IF NOT EXISTS network_geofeeds_unique
                                        ON network_geofeeds(network);
@@ -1253,6 +1278,124 @@ class CLI(object):
                                # Otherwise return the line
                                yield line
 
+       def handle_update_geofeeds(self, ns):
+               # Fetch all Geofeeds that require an update
+               geofeeds = self.db.query("""
+                       SELECT
+                               id,
+                               url
+                       FROM
+                               geofeeds
+                       WHERE
+                               updated_at IS NULL
+                       OR
+                               updated_at <= CURRENT_TIMESTAMP - INTERVAL '1 week'
+                       ORDER BY
+                               id
+               """)
+
+               with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
+                       results = executor.map(self._fetch_geofeed, geofeeds)
+
+                       for result in results:
+                               print(result)
+
+       def _fetch_geofeed(self, geofeed):
+               log.debug("Fetching Geofeed %s" % geofeed.url)
+
+               with self.db.transaction():
+                       # Open the URL
+                       try:
+                               req = urllib.request.Request(geofeed.url, headers={
+                                       "User-Agent" : "location/%s" % location.__version__,
+
+                                       # We expect some plain text file in CSV format
+                                       "Accept" : "text/csv, text/plain",
+                               })
+
+                               # XXX set proxy
+
+                               # Send the request
+                               with urllib.request.urlopen(req, timeout=10) as f:
+                                       # Remove any previous data
+                                       self.db.execute("DELETE FROM geofeed_networks \
+                                               WHERE geofeed_id = %s", geofeed.id)
+
+                                       # Read the output line by line
+                                       for line in f:
+                                               line = line.decode()
+
+                                               # Strip any newline
+                                               line = line.rstrip()
+
+                                               # Skip empty lines
+                                               if not line:
+                                                       continue
+
+                                               # Try to parse the line
+                                               try:
+                                                       fields = line.split(",", 5)
+                                               except ValueError:
+                                                       log.debug("Could not parse line: %s" % line)
+                                                       continue
+
+                                               # Check if we have enough fields
+                                               if len(fields) < 4:
+                                                       log.debug("Not enough fields in line: %s" % line)
+                                                       continue
+
+                                               # Fetch all fields
+                                               network, country, region, city, = fields[:4]
+
+                                               # Try to parse the network
+                                               try:
+                                                       network = ipaddress.ip_network(network, strict=False)
+                                               except ValueError:
+                                                       log.debug("Could not parse network: %s" % network)
+                                                       continue
+
+                                               # XXX Check the country code
+
+                                               # Write this into the database
+                                               self.db.execute("""
+                                                       INSERT INTO
+                                                               geofeed_networks (
+                                                                       geofeed_id,
+                                                                       network,
+                                                                       country,
+                                                                       region,
+                                                                       city
+                                                               )
+                                                       VALUES (%s, %s, %s, %s, %s)""",
+                                                       geofeed.id,
+                                                       "%s" % network,
+                                                       country,
+                                                       region,
+                                                       city,
+                                               )
+
+                       # Catch any HTTP errors
+                       except urllib.request.HTTPError as e:
+                               self.db.execute("UPDATE geofeeds SET status = %s \
+                                       WHERE id = %s", e.code, geofeed.id)
+
+                       # Catch any other errors
+                       except urllib.request.URLError as e:
+                               log.error("Could not fetch URL %s: %s" % (geofeed.url, e))
+
+                       # Mark the geofeed as updated
+                       else:
+                               self.db.execute("""
+                                       UPDATE
+                                               geofeeds
+                                       SET
+                                               updated_at = CURRENT_TIMESTAMP,
+                                               status = NULL
+                                       WHERE
+                                               id = %s""",
+                                       geofeed.id,
+                               )
+
        def handle_update_overrides(self, ns):
                with self.db.transaction():
                        # Only drop manually created overrides, as we can be reasonably sure to have them,