(131072, 4199999999),
)
+TRANSLATED_COUNTRIES = {
+ # When people say UK, they mean GB
+ "UK" : "GB",
+}
+
+IGNORED_COUNTRIES = set((
+ # Formerly Yugoslavia
+ "YU",
+
+ # Some people use ZZ to say "no country" or to hide the country
+ "ZZ",
+))
+
# Configure the CSV parser for ARIN
csv.register_dialect("arin", delimiter=",", quoting=csv.QUOTE_ALL, quotechar="\"")
# Fetch all valid country codes to check parsed networks aganist...
countries = self.db.query("SELECT country_code FROM countries ORDER BY country_code")
- return [country.country_code for country in countries]
+ return set((country.country_code for country in countries))
def handle_write(self, ns):
"""
error = False
# Fetch all valid country codes to check parsed networks against
- validcountries = self.fetch_countries()
+ countries = self.fetch_countries()
+
+ # Check if we have countries
+ if not countries:
+ log.error("Please import countries before importing any WHOIS data")
+ return 1
# Iterate over all potential sources
for source in sorted(location.importer.SOURCES):
name text NOT NULL, source text NOT NULL) ON COMMIT DROP;
CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
- CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL,
+ CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text,
original_countries text[] NOT NULL, source text NOT NULL)
ON COMMIT DROP;
CREATE INDEX _rirdata_search ON _rirdata
# Fetch WHOIS sources
for url in location.importer.WHOIS_SOURCES.get(source, []):
for block in downloader.request_blocks(url):
- self._parse_block(block, source, validcountries)
+ self._parse_block(block, source, countries)
# Fetch extended sources
for url in location.importer.EXTENDED_SOURCES.get(source, []):
for line in downloader.request_lines(url):
- self._parse_line(line, source, validcountries)
+ self._parse_line(line, source, countries)
except urllib.error.URLError as e:
log.error("Could not retrieve data from %s: %s" % (source, e))
error = True
We will return False in case a network is not suitable for adding
it to our database, and True otherwise.
"""
+ # Check input
+ if isinstance(network, ipaddress.IPv6Network):
+ pass
+ elif isinstance(network, ipaddress.IPv4Network):
+ pass
+ else:
+ raise ValueError("Invalid network: %s (type %s)" % (network, type(network)))
- if not network or not (isinstance(network, ipaddress.IPv4Network) or isinstance(network, ipaddress.IPv6Network)):
- return False
-
+ # Ignore anything that isn't globally routable
if not network.is_global:
log.debug("Skipping non-globally routable network: %s" % network)
return False
+ # Ignore anything that is unspecified IP range (See RFC 5735 for IPv4 or RFC 2373 for IPv6)
+ elif network.is_unspecified:
+ log.debug("Skipping unspecified network: %s" % network)
+ return False
+
if network.version == 4:
if network.prefixlen < 7:
log.debug("Skipping too big IP chunk: %s" % network)
log.debug("Skipping network too small to be publicly announced: %s" % network)
return False
- if str(network.network_address) == "0.0.0.0":
- log.debug("Skipping network based on 0.0.0.0: %s" % network)
- return False
-
elif network.version == 6:
if network.prefixlen < 10:
log.debug("Skipping too big IP chunk: %s" % network)
log.debug("Skipping network too small to be publicly announced: %s" % network)
return False
- if str(network.network_address) == "::":
- log.debug("Skipping network based on '::': %s" % network)
- return False
-
- else:
- # This should not happen...
- log.warning("Skipping network of unknown family, this should not happen: %s" % network)
- return False
-
# In case we have made it here, the network is considered to
# be suitable for libloc consumption...
return True
log.info("Supplied ASN %s out of publicly routable ASN ranges" % asn)
return False
- def _parse_block(self, block, source_key, validcountries = None):
+ def _parse_block(self, block, source_key, countries):
# Get first line to find out what type of block this is
line = block[0]
# inetnum
if line.startswith("inet6num:") or line.startswith("inetnum:"):
- return self._parse_inetnum_block(block, source_key, validcountries)
+ return self._parse_inetnum_block(block, source_key, countries)
# organisation
elif line.startswith("organisation:"):
autnum.get("asn"), autnum.get("org"), source_key,
)
- def _parse_inetnum_block(self, block, source_key, validcountries = None):
+ def _parse_inetnum_block(self, block, source_key, countries):
log.debug("Parsing inetnum block:")
inetnum = {}
inetnum[key] = [ipaddress.ip_network(val, strict=False)]
elif key == "country":
- val = val.upper()
+ cc = val.upper()
- # Catch RIR data objects with more than one country code...
- if not key in inetnum:
- inetnum[key] = []
- else:
- if val in inetnum.get("country"):
- # ... but keep this list distinct...
- continue
+ # Ignore certain country codes
+ if cc in IGNORED_COUNTRIES:
+ log.debug("Ignoring country code '%s'" % cc)
+ continue
- # When people set country codes to "UK", they actually mean "GB"
- if val == "UK":
- val = "GB"
+ # Translate country codes
+ try:
+ cc = TRANSLATED_COUNTRIES[cc]
+ except KeyError:
+ pass
+
+ # Do we know this country?
+ if not cc in countries:
+ log.warning("Skipping invalid country code '%s'" % cc)
+ continue
- inetnum[key].append(val)
+ try:
+ inetnum[key].append(cc)
+ except KeyError:
+ inetnum[key] = [cc]
# Parse the geofeed attribute
elif key == "geofeed":
inetnum["geofeed"] = m.group(1)
# Skip empty objects
- if not inetnum or not "country" in inetnum:
+ if not inetnum:
return
- # Prepare skipping objects with unknown country codes...
- invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries]
-
# Iterate through all networks enumerated from above, check them for plausibility and insert
# them into the database, if _check_parsed_network() succeeded
for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"):
- if self._check_parsed_network(single_network):
- # Skip objects with unknown country codes if they are valid to avoid log spam...
- if validcountries and invalidcountries:
- log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
- (invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
- break
+ if not self._check_parsed_network(single_network):
+ continue
- # Everything is fine here, run INSERT statement...
- self.db.execute("INSERT INTO _rirdata(network, country, original_countries, source) \
- VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
- "%s" % single_network, inetnum.get("country")[0], inetnum.get("country"), source_key,
+ # Fetch the countries or use a list with an empty country
+ countries = inetnum.get("country", [None])
+
+ # Insert the network into the database but only use the first country code
+ for cc in countries:
+ self.db.execute("""
+ INSERT INTO
+ _rirdata
+ (
+ network,
+ country,
+ original_countries,
+ source
+ )
+ VALUES
+ (
+ %s, %s, %s, %s
+ )
+ ON CONFLICT (network)
+ DO UPDATE SET country = excluded.country
+ """, "%s" % single_network, cc, [cc for cc in countries if cc], source_key,
)
- # Update any geofeed information
- geofeed = inetnum.get("geofeed", None)
- if geofeed:
- self._parse_geofeed(geofeed, single_network)
+ # If there are more than one country, we will only use the first one
+ break
- # Delete any previous geofeeds
- else:
- self.db.execute("DELETE FROM network_geofeeds WHERE network = %s",
- "%s" % single_network)
+ # Update any geofeed information
+ geofeed = inetnum.get("geofeed", None)
+ if geofeed:
+ self._parse_geofeed(geofeed, single_network)
+
+ # Delete any previous geofeeds
+ else:
+ self.db.execute(
+ "DELETE FROM network_geofeeds WHERE network = %s", "%s" % single_network,
+ )
def _parse_geofeed(self, url, single_network):
# Parse the URL
org.get("organisation"), org.get("org-name"), source_key,
)
- def _parse_line(self, line, source_key, validcountries = None):
+ def _parse_line(self, line, source_key, validcountries=None):
# Skip version line
if line.startswith("2"):
return
log.warning("Could not parse line: %s" % line)
return
+ # Skip any unknown protocols
+ if not type in ("ipv6", "ipv4"):
+ log.warning("Unknown IP protocol '%s'" % type)
+ return
+
# Skip any lines that are for stats only or do not have a country
# code at all (avoids log spam below)
if not country_code or country_code == '*':
(country_code, line))
return
- if type in ("ipv6", "ipv4"):
- return self._parse_ip_line(country_code, type, line, source_key)
-
- def _parse_ip_line(self, country, type, line, source_key):
try:
address, prefix, date, status, organization = line.split("|")
except ValueError:
if not self._check_parsed_network(network):
return
- self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
- VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \
- UPDATE SET country = excluded.country",
- "%s" % network, country, [country], source_key,
+ self.db.execute("""
+ INSERT INTO
+ networks
+ (
+ network,
+ country,
+ original_countries,
+ source
+ )
+ VALUES
+ (
+ %s, %s, %s, %s
+ )
+ ON CONFLICT (network)
+ DO UPDATE SET country = excluded.country
+ """, "%s" % network, country_code, [country], source_key,
)
def _import_as_names_from_arin(self, downloader):