]> git.ipfire.org Git - people/ms/libloc.git/commitdiff
geofeed: Parse and normalize any URLs
authorMichael Tremer <michael.tremer@ipfire.org>
Wed, 5 Jul 2023 09:39:35 +0000 (09:39 +0000)
committerMichael Tremer <michael.tremer@ipfire.org>
Wed, 5 Jul 2023 09:39:35 +0000 (09:39 +0000)
It would be nice if we had an easy way to check if the URL is valid, but
Python does not seem to have a library function for this.

Therefore we might store invalid URLs in the database, but when making a
request to them, urllib with throw an InvalidURL error.

Signed-off-by: Michael Tremer <michael.tremer@ipfire.org>
src/scripts/location-importer.in

index 55e06ce1166f81b3349847705da418d429174f74..28e80702ad29e408eb4d5b469b6e8a6cec287058 100644 (file)
@@ -897,33 +897,41 @@ class CLI(object):
 
                                # Update any geofeed information
                                geofeed = inetnum.get("geofeed", None)
-
-                               # Make sure that this is a HTTPS URL
-                               if geofeed and not geofeed.startswith("https://"):
-                                       log.warning("Geofeed URL is not using HTTPS: %s" % geofeed)
-                                       geofeed = None
-
-                               # Store/update any geofeeds
                                if geofeed:
-                                       self.db.execute("""
-                                               INSERT INTO
-                                                       network_geofeeds(
-                                                               network,
-                                                               url
-                                                       )
-                                               VALUES(
-                                                       %s, %s
-                                               )
-                                               ON CONFLICT (network) DO
-                                                       UPDATE SET url = excluded.url""",
-                                               "%s" % single_network, geofeed,
-                                       )
+                                       self._parse_geofeed(geofeed, single_network)
 
                                # Delete any previous geofeeds
                                else:
                                        self.db.execute("DELETE FROM network_geofeeds WHERE network = %s",
                                                "%s" % single_network)
 
+       def _parse_geofeed(self, url, single_network):
+               # Parse the URL
+               url = urllib.parse.urlparse(url)
+
+               # Make sure that this is a HTTPS URL
+               if not url.scheme == "https":
+                       log.debug("Geofeed URL is not using HTTPS: %s" % geofeed)
+                       return
+
+               # Put the URL back together normalized
+               url = url.geturl()
+
+               # Store/update any geofeeds
+               self.db.execute("""
+                       INSERT INTO
+                               network_geofeeds(
+                                       network,
+                                       url
+                               )
+                       VALUES(
+                               %s, %s
+                       )
+                       ON CONFLICT (network) DO
+                               UPDATE SET url = excluded.url""",
+                       "%s" % single_network, url,
+               )
+
        def _parse_org_block(self, block, source_key):
                org = {}
                for line in block: