log = logging.getLogger("location.importer")
log.propagate = 1
+WHOIS_SOURCES = (
+ # African Network Information Centre
+ "https://ftp.afrinic.net/pub/pub/dbase/afrinic.db.gz",
+
+ # Asia Pacific Network Information Centre
+ "https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz",
+ "https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz",
+ "https://ftp.apnic.net/apnic/whois/apnic.db.route6.gz",
+ "https://ftp.apnic.net/apnic/whois/apnic.db.route.gz",
+ "https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz",
+ "https://ftp.apnic.net/apnic/whois/apnic.db.organisation.gz",
+
+ # American Registry for Internet Numbers
+ "https://ftp.arin.net/pub/rr/arin.db",
+
+ # Latin America and Caribbean Network Information Centre
+ # XXX ???
+
+ # Réseaux IP Européens
+ "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz",
+ "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz",
+ "https://ftp.ripe.net/ripe/dbase/split/ripe.db.route6.gz",
+ "https://ftp.ripe.net/ripe/dbase/split/ripe.db.route.gz",
+ "https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz",
+ "https://ftp.ripe.net/ripe/dbase/split/ripe.db.organisation.gz",
+)
+
class Downloader(object):
def __init__(self):
self.proxy = None
log.info("Using proxy %s" % url)
self.proxy = url
- def request(self, url, data=None):
+ def request(self, url, data=None, return_blocks=False):
req = urllib.request.Request(url, data=data)
# Configure proxy
if self.proxy:
req.set_proxy(self.proxy, "http")
- return DownloaderContext(self, req)
+ return DownloaderContext(self, req, return_blocks=return_blocks)
class DownloaderContext(object):
- def __init__(self, downloader, request):
+ def __init__(self, downloader, request, return_blocks=False):
self.downloader = downloader
self.request = request
+ # Should we return one block or a single line?
+ self.return_blocks = return_blocks
+
# Save the response object
self.response = None
"""
Makes the object iterable by going through each block
"""
+ if self.return_blocks:
+ return iterate_over_blocks(self.body)
+
# Store body
- body = self.body
+ #body = self.body
- while True:
- line = body.readline()
- if not line:
- break
+ #while True:
+ # line = body.readline()
+ # if not line:
+ # break
- # Decode the line
- line = line.decode()
+ # # Decode the line
+ # print(line)
+ # line = line.decode()
- # Strip the ending
- yield line.rstrip()
+ # # Strip the ending
+ # yield line.rstrip()
@property
def headers(self):
Returns a file-like object with the decoded content
of the response.
"""
+ content_type = self.get_header("Content-Type")
+
+ # Decompress any gzipped response on the fly
+ if content_type in ("application/x-gzip", "application/gzip"):
+ return gzip.GzipFile(fileobj=self.response, mode="rb")
+
# Return the response by default
return self.response
+
+
+def iterate_over_blocks(f, charsets=("utf-8", "latin1")):
+ block = []
+
+ for line in f:
+ # Convert to string
+ for charset in charsets:
+ try:
+ line = line.decode(charset)
+ except UnicodeDecodeError:
+ continue
+ else:
+ break
+
+ # Skip commented lines
+ if line.startswith("#") or line.startswith("%"):
+ continue
+
+ # Strip line-endings
+ line = line.rstrip()
+
+ # Remove any comments at the end of line
+ line, hash, comment = line.partition("#")
+
+ if comment:
+ # Strip any whitespace before the comment
+ line = line.rstrip()
+
+ # If the line is now empty, we move on
+ if not line:
+ continue
+
+ if line:
+ block.append(line)
+ continue
+
+ # End the block on an empty line
+ if block:
+ yield block
+
+ # Reset the block
+ block = []
###############################################################################
import argparse
+import ipaddress
import logging
+import math
+import re
import sys
# Load our location module
log = logging.getLogger("location.importer")
log.propagate = 1
+INVALID_ADDRESSES = (
+ "0.0.0.0",
+ "::/0",
+ "0::/0",
+)
+
class CLI(object):
def parse_cli(self):
parser = argparse.ArgumentParser(
description=_("Location Importer Command Line Interface"),
)
+ subparsers = parser.add_subparsers()
# Global configuration flags
parser.add_argument("--debug", action="store_true",
parser.add_argument("--database-password", required=True,
help=_("Database Password"), metavar=_("PASSWORD"))
+ # Update WHOIS
+ update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information"))
+ update_whois.set_defaults(func=self.handle_update_whois)
+
args = parser.parse_args()
# Enable debug logging
if args.debug:
log.setLevel(logging.DEBUG)
+ # Print usage if no action was given
+ if not "func" in args:
+ parser.print_usage()
+ sys.exit(2)
+
return args
def run(self):
args = self.parse_cli()
# Initialise database
- db = self._setup_database(args)
+ self.db = self._setup_database(args)
# Call function
- ret = self.handle_import(db, args)
+ ret = args.func(args)
# Return with exit code
if ret:
with db.transaction():
db.execute("""
- CREATE TABLE IF NOT EXISTS asnums(number integer, name text);
- CREATE UNIQUE INDEX IF NOT EXISTS asnums_number ON asnums(number);
+ -- autnums
+ CREATE TABLE IF NOT EXISTS autnums(number integer, name text, organization text);
+ CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
+
+ -- inetnums
+ CREATE TABLE IF NOT EXISTS inetnums(network inet, name text, country text, description text);
+ CREATE UNIQUE INDEX IF NOT EXISTS inetnums_networks ON inetnums(network);
+ CREATE INDEX IF NOT EXISTS inetnums_family ON inetnums(family(network));
+
+ -- organizations
+ CREATE TABLE IF NOT EXISTS organizations(handle text, name text, country text);
+ CREATE UNIQUE INDEX IF NOT EXISTS organizations_handle ON organizations(handle);
+
+ -- routes
+ CREATE TABLE IF NOT EXISTS routes(network inet, asn integer);
+ CREATE UNIQUE INDEX IF NOT EXISTS routes_network ON routes(network);
+ CREATE INDEX IF NOT EXISTS routes_family ON routes(family(network));
""")
return db
- def handle_import(self, db, ns):
- pass
+ def handle_update_whois(self, ns):
+ downloader = location.importer.Downloader()
+
+ # Download all sources
+ for source in location.importer.WHOIS_SOURCES:
+ with self.db.transaction():
+ with downloader.request(source, return_blocks=True) as f:
+ for block in f:
+ self._parse_block(block)
+
+ def _parse_block(self, block):
+ # Get first line to find out what type of block this is
+ line = block[0]
+
+ # inetnum
+ if line.startswith("inet6num:") or line.startswith("inetnum:"):
+ return self._parse_inetnum_block(block)
+
+ # route
+ elif line.startswith("route6:") or line.startswith("route:"):
+ return self._parse_route_block(block)
+
+ # aut-num
+ elif line.startswith("aut-num:"):
+ return self._parse_autnum_block(block)
+
+ # organisation
+ elif line.startswith("organisation:"):
+ return self._parse_org_block(block)
+
+ # person (ignored)
+ elif line.startswith("person:"):
+ return
+
+ # domain (ignored)
+ elif line.startswith("domain:"):
+ return
+
+ # mntner (ignored)
+ elif line.startswith("mntner:"):
+ return
+
+ # as-block (ignored)
+ elif line.startswith("as-block:"):
+ return
+
+ # as-set (ignored)
+ elif line.startswith("as-set:"):
+ return
+
+ # route-set (ignored)
+ elif line.startswith("route-set:"):
+ return
+
+ # role (ignored)
+ elif line.startswith("role:"):
+ return
+
+ # key-cert (ignored)
+ elif line.startswith("key-cert:"):
+ return
+
+ # irt (ignored)
+ elif line.startswith("irt:"):
+ return
+
+ # Log any unknown blocks
+ else:
+ log.warning("Unknown block:")
+ for line in block:
+ log.warning(line)
+
+ def _parse_autnum_block(self, block):
+ log.debug("Parsing autnum block:")
+
+ autnum = {}
+ for line in block:
+ # Split line
+ key, val = split_line(line)
+
+ if key == "aut-num":
+ m = re.match(r"^(AS|as)(\d+)", val)
+ if m:
+ autnum["asn"] = m.group(2)
+
+ elif key in ("as-name", "org"):
+ autnum[key] = val
+
+ # Skip empty objects
+ if not autnum:
+ return
+
+ # Insert into database
+ self.db.execute("INSERT INTO autnums(number, name, organization) \
+ VALUES(%s, %s, %s) ON CONFLICT (number) DO UPDATE SET \
+ name = excluded.name, organization = excluded.organization",
+ autnum.get("asn"), autnum.get("as-name"), autnum.get("org"),
+ )
+
+ def _parse_inetnum_block(self, block):
+ inetnum = {}
+ for line in block:
+ # Split line
+ key, val = split_line(line)
+
+ if key == "inetnum":
+ start_address, delim, end_address = val.partition("-")
+
+ # Strip any excess space
+ start_address, end_address = start_address.rstrip(), end_address.strip()
+
+ # Skip invalid blocks
+ if start_address in INVALID_ADDRESSES:
+ return
+
+ # Convert to IP address
+ try:
+ start_address = ipaddress.ip_address(start_address)
+ end_address = ipaddress.ip_address(end_address)
+ except ValueError:
+ log.warning("Could not parse line: %s" % line)
+ return
+
+ # Set prefix to default
+ prefix = 32
+
+ # Count number of addresses in this subnet
+ num_addresses = int(end_address) - int(start_address)
+ if num_addresses:
+ prefix -= math.log(num_addresses, 2)
+
+ inetnum["inetnum"] = "%s/%.0f" % (start_address, prefix)
+
+ elif key == "inet6num":
+ # Skip invalid blocks
+ if val in INVALID_ADDRESSES:
+ return
+
+ inetnum[key] = val
+
+ elif key == "netname":
+ inetnum[key] = val
+
+ elif key == "country":
+ if val == "UNITED STATES":
+ val = "US"
+
+ inetnum[key] = val.upper()
+
+ elif key == "descr":
+ if key in inetnum:
+ inetnum[key] += "\n%s" % val
+ else:
+ inetnum[key] = val
+
+ # Skip empty objects
+ if not inetnum:
+ return
+
+ network = ipaddress.ip_network(inetnum.get("inet6num") or inetnum.get("inetnum"), strict=False)
+
+ self.db.execute("INSERT INTO inetnums(network, name, country, description) \
+ VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \
+ UPDATE SET name = excluded.name, country = excluded.country, description = excluded.description",
+ "%s" % network, inetnum.get("netname"), inetnum.get("country"), inetnum.get("descr"),
+ )
+
+ def _parse_org_block(self, block):
+ org = {}
+ for line in block:
+ # Split line
+ key, val = split_line(line)
+
+ if key in ("organisation", "org-name", "country"):
+ org[key] = val
+
+ # Skip empty objects
+ if not org:
+ return
+
+ self.db.execute("INSERT INTO organizations(handle, name, country) \
+ VALUES(%s, %s, %s) ON CONFLICT (handle) DO \
+ UPDATE SET name = excluded.name, country = excluded.country",
+ org.get("organisation"), org.get("org-name"), org.get("country"),
+ )
+
+ def _parse_route_block(self, block):
+ route = {}
+ for line in block:
+ # Split line
+ key, val = split_line(line)
+
+ # Keep any significant data
+ if key in ("route6", "route"):
+ route[key] = val
+
+ elif key == "origin":
+ m = re.match(r"^(AS|as)(\d+)", val)
+ if m:
+ route["asn"] = m.group(2)
+
+ # Skip empty objects
+ if not route:
+ return
+
+ network = ipaddress.ip_network(route.get("route6") or route.get("route"), strict=False)
+
+ self.db.execute("INSERT INTO routes(network, asn) \
+ VALUES(%s, %s) ON CONFLICT (network) DO UPDATE SET asn = excluded.asn",
+ "%s" % network, route.get("asn"),
+ )
+
+
+def split_line(line):
+ key, colon, val = line.partition(":")
+
+ # Strip any excess space
+ key = key.strip()
+ val = val.strip()
+ return key, val
def main():
# Run the command line interface