From: Michael Tremer Date: Tue, 7 Jan 2020 17:10:31 +0000 (+0000) Subject: tools: Mirgate to use extended format X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0876640488a15f96636fc9494f824e48727e3ef5;p=location%2Flocation-database.git tools: Mirgate to use extended format This patch drops parsing the "whois" information and uses the "new" "RIR Statistics Exchange Format": ftp://ftp.ripe.net/ripe/stats/RIR-Statistics-Exchange-Format.txt Signed-off-by: Michael Tremer --- diff --git a/tools/afrinic.py b/tools/afrinic.py index ad416f7..26e526f 100644 --- a/tools/afrinic.py +++ b/tools/afrinic.py @@ -26,5 +26,5 @@ class AFRINIC(base.RIR): name = "African Network Information Centre" database_urls = ( - "https://ftp.afrinic.net/pub/pub/dbase/afrinic.db.gz", + "https://ftp.afrinic.net/pub/stats/afrinic/delegated-afrinic-extended-latest", ) diff --git a/tools/apnic.py b/tools/apnic.py index a38246a..d34d6c9 100644 --- a/tools/apnic.py +++ b/tools/apnic.py @@ -26,10 +26,5 @@ class APNIC(base.RIR): name = "Asia Pacific Network Information Centre" database_urls = ( - "https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz", - "https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz", - "https://ftp.apnic.net/apnic/whois/apnic.db.route6.gz", - "https://ftp.apnic.net/apnic/whois/apnic.db.route.gz", - "https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz", - "https://ftp.apnic.net/apnic/whois/apnic.db.organisation.gz", + "http://ftp.apnic.net/apnic/stats/apnic/delegated-apnic-extended-latest", ) diff --git a/tools/arin.py b/tools/arin.py index 6ed669e..8271062 100644 --- a/tools/arin.py +++ b/tools/arin.py @@ -26,5 +26,5 @@ class ARIN(base.RIR): name = "American Registry for Internet Numbers" database_urls = ( - "https://ftp.arin.net/pub/rr/arin.db", + "https://ftp.arin.net/pub/stats/arin/delegated-arin-extended-latest", ) diff --git a/tools/base.py b/tools/base.py index 4d3da0d..f469bc8 100644 --- a/tools/base.py +++ b/tools/base.py @@ -98,21 +98,17 @@ class RIRParser(object): def _make_database(self, filename): db = sqlite3.connect(filename) + db.set_trace_callback(logging.debug) # Create database layout with db as cursor: cursor.executescript(""" - CREATE TABLE IF NOT EXISTS autnums(asn INTEGER, name TEXT, org TEXT); + CREATE TABLE IF NOT EXISTS autnums(asn INTEGER, country TEXT, org_id INTEGER); + CREATE INDEX autnums_org_id ON autnums(org_id); - CREATE TABLE IF NOT EXISTS inetnums(network TEXT, netname TEXT, country TEXT, description TEXT, - family INTEGER, address_start BLOB, address_end BLOB, prefix INTEGER); - CREATE INDEX inetnums_search ON inetnums(family, prefix, address_start); - - CREATE TABLE IF NOT EXISTS organisations(handle TEXT, name TEXT, country TEXT); - CREATE INDEX IF NOT EXISTS organisations_handle ON organisations(handle); - - CREATE TABLE IF NOT EXISTS routes(route TEXT, asn INTEGER, + CREATE TABLE IF NOT EXISTS inetnums(network TEXT, country TEXT, org_id INTEGER, family INTEGER, address_start BLOB, address_end BLOB, prefix INTEGER); + CREATE INDEX inetnums_sort ON inetnums(address_start); """) return db @@ -220,251 +216,135 @@ class RIRParser(object): def parse_url(self, url): with self.downloader.request(url) as r: - for block in r: - self.parse_block(block) - - def parse_block(self, block): - # Get first line to find out what type of block this is - line = block[0] - - # inetnum - if line.startswith("inet6num:") or line.startswith("inetnum:"): - return self._parse_inetnum_block(block) - - # route - elif line.startswith("route6:") or line.startswith("route:"): - return self._parse_route_block(block) - - # aut-num - elif line.startswith("aut-num:"): - return self._parse_autnum_block(block) - - # organisation - elif line.startswith("organisation:"): - return self._parse_org_block(block) - - # person (ignored) - elif line.startswith("person:"): - return - - # domain (ignored) - elif line.startswith("domain:"): + for line in r: + self.parse_line(line) + + def parse_line(self, line): + try: + lacnic, country_code, type, line = line.split("|", 3) + except: + logging.warning("Could not parse line: %s" % line) return - # mntner (ignored) - elif line.startswith("mntner:"): + # Skip any lines with addresses that are not allocated + if not "|allocated|" in line and not "|assigned|" in line: return - # as-block (ignored) - elif line.startswith("as-block:"): - return - - # as-set (ignored) - elif line.startswith("as-set:"): - return + if type in ("ipv6", "ipv4"): + return self._parse_ip_line(country_code, type, line) - # route-set (ignored) - elif line.startswith("route-set:"): - return + elif type == "asn": + return self._parse_asn_line(country_code, line) - # role (ignored) - elif line.startswith("role:"): + else: + logging.warning("Unknown line type: %s" % type) return - # key-cert (ignored) - elif line.startswith("key-cert:"): + def _parse_ip_line(self, country_code, type, line): + try: + address, prefix, date, status, org_id = line.split("|") + except: + logging.warning("Unhandled line format: %s" % line) return - # irt (ignored) - elif line.startswith("irt:"): - return - - # Log any unknown blocks - else: - logging.warning("Unknown block:") - for line in block: - logging.warning(line) - - def _parse_inetnum_block(self, block): - logging.debug("Parsing inetnum block:") - - inetnum = {} - for line in block: - logging.debug(line) - - # Split line - key, val = util.split_line(line) - - if key == "inetnum": - start_address, delim, end_address = val.partition("-") - - # Strip any excess space - start_address, end_address = start_address.rstrip(), end_address.strip() - - # Skip invalid blocks - if start_address in INVALID_ADDRESSES: - return - - # Convert to IP address - try: - start_address = ipaddress.ip_address(start_address) - end_address = ipaddress.ip_address(end_address) - except ValueError: - logging.warning("Could not parse line: %s" % line) - return - - # Set prefix to default - prefix = 32 - - # Count number of addresses in this subnet - num_addresses = int(end_address) - int(start_address) - if num_addresses: - prefix -= math.log(num_addresses, 2) - - inetnum["inetnum"] = "%s/%.0f" % (start_address, prefix) - - elif key == "inet6num": - # Skip invalid blocks - if val in INVALID_ADDRESSES: - return - - inetnum[key] = val - - elif key == "netname": - inetnum[key] = val - - elif key == "country": - if val == "UNITED STATES": - val = "US" - - inetnum[key] = val.upper() - - elif key == "descr": - if key in inetnum: - inetnum[key] += "\n%s" % val - else: - inetnum[key] = val - - # Skip empty objects - if not inetnum: + # Cast prefix into an integer + try: + prefix = int(prefix) + except: + logging.warning("Invalid prefix: %s" % prefix) + + # Fix prefix length for IPv4 + if type == "ipv4": + prefix = 32 - int(math.log(prefix, 2)) + + # Try to parse the address + try: + network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False) + except ValueError: + raise + logging.warning("Invalid IP address: %s" % address) return with self.db as c: - network = ipaddress.ip_network(inetnum.get("inet6num") or inetnum.get("inetnum"), strict=False) - # Get the first and last address of this network address_start, address_end = int(network.network_address), int(network.broadcast_address) args = ( "%s" % network, - inetnum.get("netname"), - inetnum.get("country"), - inetnum.get("descr"), + country_code, + org_id, network.version, struct.pack(">QQ", address_start >> 64, address_start % (2 ** 64)), struct.pack(">QQ", address_end >> 64, address_end % (2 ** 64)), network.prefixlen, ) - c.execute("INSERT INTO inetnums(network, netname, country, description, family, \ - address_start, address_end, prefix) VALUES(?, ?, ?, ?, ?, ?, ?, ?)", args) - - def _parse_route_block(self, block): - logging.debug("Parsing route block:") - - route = {} - for line in block: - logging.debug(line) + c.execute("INSERT INTO inetnums(network, country, org_id, \ + family, address_start, address_end, prefix) VALUES(?, ?, ?, ?, ?, ?, ?)", args) - # Split line - key, val = util.split_line(line) - - # Keep any significant data - if key in ("route6", "route"): - route[key] = val - - elif key == "origin": - m = RE_AS.match(val) - if m: - route["asn"] = m.group(2) - - # Skip empty objects - if not route: + def _parse_asn_line(self, country_code, line): + try: + asn, dunno, date, status, org_id = line.split("|") + except: + logging.warning("Could not parse line: %s" % line) return with self.db as c: - network = ipaddress.ip_network(route.get("route6") or route.get("route"), strict=False) - - # Get the first and last address of this network - address_start, address_end = int(network.network_address), int(network.broadcast_address) - args = ( - "%s" % network, - route.get("asn"), - network.version, - struct.pack(">QQ", address_start >> 64, address_start % (2 ** 64)), - struct.pack(">QQ", address_end >> 64, address_end % (2 ** 64)), - network.prefixlen, + asn, + country_code, + org_id, ) - c.execute("INSERT INTO routes(route, asn, family, \ - address_start, address_end, prefix) VALUES(?, ?, ?, ?, ?, ?)", args) - - def _parse_autnum_block(self, block): - logging.debug("Parsing autnum block:") + c.execute("INSERT INTO autnums(asn, country, org_id) \ + VALUES(?, ?, ?)", args) - autnum = {} - for line in block: - logging.debug(line) + def _export_networks(self, f): + # Write header + self._write_header(f) - # Split line - key, val = util.split_line(line) + with self.db as c: + # Write all networks + res = c.execute(""" + SELECT inetnums.network, + autnums.asn, + inetnums.address_start, + inetnums.country + FROM inetnums + LEFT JOIN autnums + WHERE inetnums.org_id = autnums.org_id + ORDER BY inetnums.address_start + """) - if key == "aut-num": - m = RE_AS.match(val) - if m: - autnum["asn"] = m.group(2) + for row in res: + net, asn, address_start, country = row - elif key in ("as-name", "org"): - autnum[key] = val + f.write(FMT % ("net:", net)) - # Skip empty objects - if not autnum: - return + if asn: + f.write(FMT % ("asnum:", "AS%s" % asn)) - with self.db as c: - args = ( - autnum.get("asn"), - autnum.get("as-name"), - autnum.get("org"), - ) + if country: + f.write(FMT % ("country:", country)) - c.execute("INSERT INTO autnums(asn, name, org) \ - VALUES(?, ?, ?)", args) - - def _parse_org_block(self, block): - logging.debug("Parsing org block:") + # End the block + f.write("\n") - org = {} - for line in block: - logging.debug(line) + def _export_asnums(self, f): + # Write header + self._write_header(f) - # Split line - key, val = util.split_line(line) + with self.db as c: + res = c.execute("SELECT DISTINCT autnums.asn, autnums.country \ + FROM autnums ORDER BY autnums.asn") - if key in ("organisation", "org-name", "country"): - org[key] = val + for row in res: + asn, country = row - # Skip empty objects - if not org: - return + f.write(FMT % ("asnum:", "AS%s" % asn)) - with self.db as c: - args = ( - org.get("organisation"), - org.get("org-name"), - org.get("country"), - ) + if country: + f.write(FMT % ("country:", country)) - c.execute("INSERT INTO organisations(handle, name, country) \ - VALUES(?, ?, ?)", args) + # End block + f.write("\n") diff --git a/tools/downloader.py b/tools/downloader.py index 6cc738e..02f91a9 100644 --- a/tools/downloader.py +++ b/tools/downloader.py @@ -43,7 +43,7 @@ class Downloader(object): log.info("Using proxy %s" % url) self.proxy = url - def request(self, url, data=None, return_blocks=True): + def request(self, url, data=None): req = urllib.request.Request(url, data=data) # Configure proxy @@ -54,17 +54,14 @@ class Downloader(object): if self.USER_AGENT: req.add_header("User-Agent", self.USER_AGENT) - return DownloaderContext(self, req, return_blocks=return_blocks) + return DownloaderContext(self, req) class DownloaderContext(object): - def __init__(self, downloader, request, return_blocks=True): + def __init__(self, downloader, request): self.downloader = downloader self.request = request - # Should we return one block or a single line? - self.return_blocks = return_blocks - # Save the response object self.response = None @@ -88,12 +85,6 @@ class DownloaderContext(object): """ Makes the object iterable by going through each block """ - if self.return_blocks: - for b in util.iterate_over_blocks(self.body): - yield b - - return - # Store body body = self.body @@ -123,12 +114,6 @@ class DownloaderContext(object): Returns a file-like object with the decoded content of the response. """ - content_type = self.get_header("Content-Type") - - # Decompress any gzipped response on the fly - if content_type in ("application/x-gzip", "application/gzip"): - return gzip.GzipFile(fileobj=self.response, mode="rb") - # Return the response by default return self.response @@ -143,9 +128,7 @@ if __name__ == "__main__": for url in sys.argv[1:]: print("Downloading %s..." % url) - + with d.request(url) as r: - for block in r: - for line in block: - print(line) - print() + for line in r: + print(line) diff --git a/tools/lacnic.py b/tools/lacnic.py index 13f0b8d..767e766 100644 --- a/tools/lacnic.py +++ b/tools/lacnic.py @@ -34,161 +34,3 @@ class LACNIC(base.RIR): database_urls = ( "http://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-extended-latest", ) - - @property - def parser(self): - return LACNICParser - - -class LACNICParser(base.RIRParser): - def _make_database(self, filename): - db = sqlite3.connect(filename) - - # Create database layout - with db as cursor: - cursor.executescript(""" - CREATE TABLE IF NOT EXISTS autnums(asn INTEGER, country TEXT, org_id INTEGER); - CREATE INDEX autnums_org_id ON autnums(org_id); - - CREATE TABLE IF NOT EXISTS inetnums(network TEXT, country TEXT, org_id INTEGER, - family INTEGER, address_start BLOB, address_end BLOB, prefix INTEGER); - CREATE INDEX inetnums_sort ON inetnums(address_start); - """) - - return db - - def parse_url(self, url): - with self.downloader.request(url, return_blocks=False) as r: - for line in r: - self.parse_line(line) - - def parse_line(self, line): - try: - lacnic, country_code, type, line = line.split("|", 3) - except: - logging.warning("Could not parse line: %s" % line) - return - - # Skip any lines with addresses that are not allocated - if not "|allocated|" in line: - return - - if type in ("ipv6", "ipv4"): - return self._parse_ip_line(country_code, type, line) - - elif type == "asn": - return self._parse_asn_line(country_code, line) - - else: - logging.warning("Unknown line type: %s" % type) - return - - def _parse_ip_line(self, country_code, type, line): - try: - address, prefix, date, status, org_id = line.split("|") - except: - logging.warning("Unhandled line format: %s" % line) - return - - # Cast prefix into an integer - try: - prefix = int(prefix) - except: - logging.warning("Invalid prefix: %s" % prefix) - - # Fix prefix length for IPv4 - if type == "ipv4": - prefix = 32 - int(math.log(prefix, 2)) - - # Try to parse the address - try: - network = ipaddress.ip_network("%s/%s" % (address, prefix)) - except ValueError: - raise - logging.warning("Invalid IP address: %s" % address) - return - - with self.db as c: - # Get the first and last address of this network - address_start, address_end = int(network.network_address), int(network.broadcast_address) - - args = ( - "%s" % network, - country_code, - org_id, - network.version, - struct.pack(">QQ", address_start >> 64, address_start % (2 ** 64)), - struct.pack(">QQ", address_end >> 64, address_end % (2 ** 64)), - network.prefixlen, - ) - - c.execute("INSERT INTO inetnums(network, country, org_id, \ - family, address_start, address_end, prefix) VALUES(?, ?, ?, ?, ?, ?, ?)", args) - - def _parse_asn_line(self, country_code, line): - try: - asn, dunno, date, status, org_id = line.split("|") - except: - logging.warning("Could not parse line: %s" % line) - return - - with self.db as c: - args = ( - asn, - country_code, - org_id, - ) - - c.execute("INSERT INTO autnums(asn, country, org_id) VALUES(?, ?, ?)", args) - - def _export_networks(self, f): - # Write header - self._write_header(f) - - with self.db as c: - # Write all networks - res = c.execute(""" - SELECT inetnums.network, - autnums.asn, - inetnums.address_start, - inetnums.country - FROM inetnums - LEFT JOIN autnums - WHERE inetnums.org_id = autnums.org_id - ORDER BY inetnums.address_start - """) - - for row in res: - net, asn, address_start, country = row - - f.write(base.FMT % ("net:", net)) - - if asn: - f.write(base.FMT % ("asnum:", "AS%s" % asn)) - - if country: - f.write(base.FMT % ("country:", country)) - - # End the block - f.write("\n") - - def _export_asnums(self, f): - # Write header - self._write_header(f) - - with self.db as c: - res = c.execute("SELECT DISTINCT autnums.asn, autnums.country \ - FROM autnums ORDER BY autnums.asn") - - for row in res: - asn, country = row - - f.write(base.FMT % ("asnum:", "AS%s" % asn)) - - if country: - f.write(base.FMT % ("country:", country)) - - # End block - f.write("\n") - - diff --git a/tools/ripe.py b/tools/ripe.py index 99f4a8b..f2f6a42 100644 --- a/tools/ripe.py +++ b/tools/ripe.py @@ -26,10 +26,5 @@ class RIPE(base.RIR): name = "Réseaux IP Européens" database_urls = ( - "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz", - "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz", - "https://ftp.ripe.net/ripe/dbase/split/ripe.db.route6.gz", - "https://ftp.ripe.net/ripe/dbase/split/ripe.db.route.gz", - "https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz", - "https://ftp.ripe.net/ripe/dbase/split/ripe.db.organisation.gz", + "https://ftp.ripe.net/pub/stats/ripencc/delegated-ripencc-extended-latest", )