From: Michael Tremer Date: Wed, 5 Jul 2023 09:39:35 +0000 (+0000) Subject: geofeed: Parse and normalize any URLs X-Git-Tag: 0.9.17~16 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=da832d449181e19c83371c1b3740f84fc989995e;p=location%2Flibloc.git geofeed: Parse and normalize any URLs It would be nice if we had an easy way to check if the URL is valid, but Python does not seem to have a library function for this. Therefore we might store invalid URLs in the database, but when making a request to them, urllib with throw an InvalidURL error. Signed-off-by: Michael Tremer --- diff --git a/src/scripts/location-importer.in b/src/scripts/location-importer.in index 55e06ce..28e8070 100644 --- a/src/scripts/location-importer.in +++ b/src/scripts/location-importer.in @@ -897,33 +897,41 @@ class CLI(object): # Update any geofeed information geofeed = inetnum.get("geofeed", None) - - # Make sure that this is a HTTPS URL - if geofeed and not geofeed.startswith("https://"): - log.warning("Geofeed URL is not using HTTPS: %s" % geofeed) - geofeed = None - - # Store/update any geofeeds if geofeed: - self.db.execute(""" - INSERT INTO - network_geofeeds( - network, - url - ) - VALUES( - %s, %s - ) - ON CONFLICT (network) DO - UPDATE SET url = excluded.url""", - "%s" % single_network, geofeed, - ) + self._parse_geofeed(geofeed, single_network) # Delete any previous geofeeds else: self.db.execute("DELETE FROM network_geofeeds WHERE network = %s", "%s" % single_network) + def _parse_geofeed(self, url, single_network): + # Parse the URL + url = urllib.parse.urlparse(url) + + # Make sure that this is a HTTPS URL + if not url.scheme == "https": + log.debug("Geofeed URL is not using HTTPS: %s" % geofeed) + return + + # Put the URL back together normalized + url = url.geturl() + + # Store/update any geofeeds + self.db.execute(""" + INSERT INTO + network_geofeeds( + network, + url + ) + VALUES( + %s, %s + ) + ON CONFLICT (network) DO + UPDATE SET url = excluded.url""", + "%s" % single_network, url, + ) + def _parse_org_block(self, block, source_key): org = {} for line in block: