2 ###############################################################################
4 # libloc - A library to determine the location of someone on the Internet #
6 # Copyright (C) 2020-2024 IPFire Development Team <info@ipfire.org> #
8 # This library is free software; you can redistribute it and/or #
9 # modify it under the terms of the GNU Lesser General Public #
10 # License as published by the Free Software Foundation; either #
11 # version 2.1 of the License, or (at your option) any later version. #
13 # This library is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16 # Lesser General Public License for more details. #
18 ###############################################################################
21 import concurrent
.futures
33 # Load our location module
35 import location
.database
36 import location
.importer
37 from location
.i18n
import _
40 log
= logging
.getLogger("location.importer")
50 # Configure the CSV parser for ARIN
51 csv
.register_dialect("arin", delimiter
=",", quoting
=csv
.QUOTE_ALL
, quotechar
="\"")
55 parser
= argparse
.ArgumentParser(
56 description
=_("Location Importer Command Line Interface"),
58 subparsers
= parser
.add_subparsers()
60 # Global configuration flags
61 parser
.add_argument("--debug", action
="store_true",
62 help=_("Enable debug output"))
63 parser
.add_argument("--quiet", action
="store_true",
64 help=_("Enable quiet mode"))
67 parser
.add_argument("--version", action
="version",
68 version
="%(prog)s @VERSION@")
71 parser
.add_argument("--database-host", required
=True,
72 help=_("Database Hostname"), metavar
=_("HOST"))
73 parser
.add_argument("--database-name", required
=True,
74 help=_("Database Name"), metavar
=_("NAME"))
75 parser
.add_argument("--database-username", required
=True,
76 help=_("Database Username"), metavar
=_("USERNAME"))
77 parser
.add_argument("--database-password", required
=True,
78 help=_("Database Password"), metavar
=_("PASSWORD"))
81 write
= subparsers
.add_parser("write", help=_("Write database to file"))
82 write
.set_defaults(func
=self
.handle_write
)
83 write
.add_argument("file", nargs
=1, help=_("Database File"))
84 write
.add_argument("--signing-key", nargs
="?", type=open, help=_("Signing Key"))
85 write
.add_argument("--backup-signing-key", nargs
="?", type=open, help=_("Backup Signing Key"))
86 write
.add_argument("--vendor", nargs
="?", help=_("Sets the vendor"))
87 write
.add_argument("--description", nargs
="?", help=_("Sets a description"))
88 write
.add_argument("--license", nargs
="?", help=_("Sets the license"))
89 write
.add_argument("--version", type=int, help=_("Database Format Version"))
92 update_whois
= subparsers
.add_parser("update-whois", help=_("Update WHOIS Information"))
93 update_whois
.set_defaults(func
=self
.handle_update_whois
)
95 # Update announcements
96 update_announcements
= subparsers
.add_parser("update-announcements",
97 help=_("Update BGP Annoucements"))
98 update_announcements
.set_defaults(func
=self
.handle_update_announcements
)
99 update_announcements
.add_argument("server", nargs
=1,
100 help=_("Route Server to connect to"), metavar
=_("SERVER"))
103 update_geofeeds
= subparsers
.add_parser("update-geofeeds",
104 help=_("Update Geofeeds"))
105 update_geofeeds
.set_defaults(func
=self
.handle_update_geofeeds
)
108 update_feeds
= subparsers
.add_parser("update-feeds",
109 help=_("Update Feeds"))
110 update_feeds
.set_defaults(func
=self
.handle_update_feeds
)
113 update_overrides
= subparsers
.add_parser("update-overrides",
114 help=_("Update overrides"),
116 update_overrides
.add_argument(
117 "files", nargs
="+", help=_("Files to import"),
119 update_overrides
.set_defaults(func
=self
.handle_update_overrides
)
122 import_countries
= subparsers
.add_parser("import-countries",
123 help=_("Import countries"),
125 import_countries
.add_argument("file", nargs
=1, type=argparse
.FileType("r"),
126 help=_("File to import"))
127 import_countries
.set_defaults(func
=self
.handle_import_countries
)
129 args
= parser
.parse_args()
133 location
.logger
.set_level(logging
.DEBUG
)
135 location
.logger
.set_level(logging
.WARNING
)
137 # Print usage if no action was given
138 if not "func" in args
:
145 # Parse command line arguments
146 args
= self
.parse_cli()
148 # Initialise database
149 self
.db
= self
._setup
_database
(args
)
152 ret
= args
.func(args
)
154 # Return with exit code
158 # Otherwise just exit
161 def _setup_database(self
, ns
):
163 Initialise the database
165 # Connect to database
166 db
= location
.database
.Connection(
167 host
=ns
.database_host
, database
=ns
.database_name
,
168 user
=ns
.database_username
, password
=ns
.database_password
,
171 with db
.transaction():
174 CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint,
175 first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
176 last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
177 CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
178 CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network));
179 CREATE INDEX IF NOT EXISTS announcements_search ON announcements USING GIST(network inet_ops);
182 CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
183 ALTER TABLE autnums ADD COLUMN IF NOT EXISTS source text;
184 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
187 CREATE TABLE IF NOT EXISTS countries(
188 country_code text NOT NULL, name text NOT NULL, continent_code text NOT NULL);
189 CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code);
192 CREATE TABLE IF NOT EXISTS networks(network inet, country text);
193 ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[];
194 ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text;
195 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
196 CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
197 CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
200 CREATE TABLE IF NOT EXISTS geofeeds(
201 id serial primary key,
203 status integer default null,
204 updated_at timestamp without time zone default null
206 ALTER TABLE geofeeds ADD COLUMN IF NOT EXISTS error text;
207 CREATE UNIQUE INDEX IF NOT EXISTS geofeeds_unique
209 CREATE TABLE IF NOT EXISTS geofeed_networks(
210 geofeed_id integer references geofeeds(id) on delete cascade,
216 CREATE INDEX IF NOT EXISTS geofeed_networks_geofeed_id
217 ON geofeed_networks(geofeed_id);
218 CREATE INDEX IF NOT EXISTS geofeed_networks_search
219 ON geofeed_networks USING GIST(network inet_ops);
220 CREATE TABLE IF NOT EXISTS network_geofeeds(network inet, url text);
221 CREATE UNIQUE INDEX IF NOT EXISTS network_geofeeds_unique
222 ON network_geofeeds(network);
223 CREATE INDEX IF NOT EXISTS network_geofeeds_search
224 ON network_geofeeds USING GIST(network inet_ops);
225 CREATE INDEX IF NOT EXISTS network_geofeeds_url
226 ON network_geofeeds(url);
229 CREATE TABLE IF NOT EXISTS autnum_feeds(
230 number bigint NOT NULL,
231 source text NOT NULL,
234 is_anonymous_proxy boolean,
235 is_satellite_provider boolean,
239 CREATE UNIQUE INDEX IF NOT EXISTS autnum_feeds_unique
240 ON autnum_feeds(number, source);
242 CREATE TABLE IF NOT EXISTS network_feeds(
243 network inet NOT NULL,
244 source text NOT NULL,
246 is_anonymous_proxy boolean,
247 is_satellite_provider boolean,
251 CREATE UNIQUE INDEX IF NOT EXISTS network_feeds_unique
252 ON network_feeds(network, source);
253 CREATE INDEX IF NOT EXISTS network_feeds_search
254 ON network_feeds USING GIST(network inet_ops);
257 CREATE TABLE IF NOT EXISTS autnum_overrides(
258 number bigint NOT NULL,
261 is_anonymous_proxy boolean,
262 is_satellite_provider boolean,
265 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
266 ON autnum_overrides(number);
267 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS source text;
268 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
270 CREATE TABLE IF NOT EXISTS network_overrides(
271 network inet NOT NULL,
273 is_anonymous_proxy boolean,
274 is_satellite_provider boolean,
277 CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
278 ON network_overrides(network);
279 CREATE INDEX IF NOT EXISTS network_overrides_search
280 ON network_overrides USING GIST(network inet_ops);
281 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS source text;
282 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
287 def handle_write(self
, ns
):
289 Compiles a database in libloc format out of what is in the database
292 writer
= location
.Writer(ns
.signing_key
, ns
.backup_signing_key
)
296 writer
.vendor
= ns
.vendor
299 writer
.description
= ns
.description
302 writer
.license
= ns
.license
304 # Add all Autonomous Systems
305 log
.info("Writing Autonomous Systems...")
307 # Select all ASes with a name
308 rows
= self
.db
.query("""
310 autnums.number AS number,
318 autnum_overrides overrides ON autnums.number = overrides.number
324 # Skip AS without names
328 a
= writer
.add_as(row
.number
)
332 log
.info("Writing networks...")
334 # Select all known networks
335 rows
= self
.db
.query("""
336 WITH known_networks AS (
337 SELECT network FROM announcements
339 SELECT network FROM networks
341 SELECT network FROM network_feeds
343 SELECT network FROM network_overrides
345 SELECT network FROM geofeed_networks
348 ordered_networks AS (
350 known_networks.network AS network,
351 announcements.autnum AS autnum,
352 networks.country AS country,
354 -- Must be part of returned values for ORDER BY clause
355 masklen(announcements.network) AS sort_a,
356 masklen(networks.network) AS sort_b
360 announcements ON known_networks.network <<= announcements.network
362 networks ON known_networks.network <<= networks.network
364 known_networks.network,
369 -- Return a list of those networks enriched with all
370 -- other information that we store in the database
372 DISTINCT ON (network)
382 network_overrides overrides
384 networks.network <<= overrides.network
386 masklen(overrides.network) DESC
393 autnum_overrides overrides
395 networks.autnum = overrides.number
403 networks.network <<= feeds.network
405 masklen(feeds.network) DESC
414 networks.autnum = feeds.number
421 geofeed_networks.country AS country
425 -- Join the data from the geofeeds
427 geofeeds ON network_geofeeds.url = geofeeds.url
429 geofeed_networks ON geofeeds.id = geofeed_networks.geofeed_id
431 -- Check whether we have a geofeed for this network
433 networks.network <<= network_geofeeds.network
435 networks.network <<= geofeed_networks.network
437 -- Filter for the best result
439 masklen(geofeed_networks.network) DESC
451 network_overrides overrides
453 networks.network <<= overrides.network
455 masklen(overrides.network) DESC
464 networks.network <<= feeds.network
466 masklen(feeds.network) DESC
475 networks.autnum = feeds.number
484 autnum_overrides overrides
486 networks.autnum = overrides.number
489 ) AS is_anonymous_proxy,
493 is_satellite_provider
495 network_overrides overrides
497 networks.network <<= overrides.network
499 masklen(overrides.network) DESC
504 is_satellite_provider
508 networks.network <<= feeds.network
510 masklen(feeds.network) DESC
515 is_satellite_provider
519 networks.autnum = feeds.number
526 is_satellite_provider
528 autnum_overrides overrides
530 networks.autnum = overrides.number
533 ) AS is_satellite_provider,
539 network_overrides overrides
541 networks.network <<= overrides.network
543 masklen(overrides.network) DESC
552 networks.network <<= feeds.network
554 masklen(feeds.network) DESC
563 networks.autnum = feeds.number
572 autnum_overrides overrides
574 networks.autnum = overrides.number
583 network_overrides overrides
585 networks.network <<= overrides.network
587 masklen(overrides.network) DESC
596 networks.network <<= feeds.network
598 masklen(feeds.network) DESC
607 networks.autnum = feeds.number
616 autnum_overrides overrides
618 networks.autnum = overrides.number
623 ordered_networks networks
627 network
= writer
.add_network(row
.network
)
631 network
.country_code
= row
.country
635 network
.asn
= row
.autnum
638 if row
.is_anonymous_proxy
:
639 network
.set_flag(location
.NETWORK_FLAG_ANONYMOUS_PROXY
)
641 if row
.is_satellite_provider
:
642 network
.set_flag(location
.NETWORK_FLAG_SATELLITE_PROVIDER
)
645 network
.set_flag(location
.NETWORK_FLAG_ANYCAST
)
648 network
.set_flag(location
.NETWORK_FLAG_DROP
)
651 log
.info("Writing countries...")
652 rows
= self
.db
.query("SELECT * FROM countries ORDER BY country_code")
655 c
= writer
.add_country(row
.country_code
)
656 c
.continent_code
= row
.continent_code
659 # Write everything to file
660 log
.info("Writing database to file...")
664 def handle_update_whois(self
, ns
):
665 downloader
= location
.importer
.Downloader()
667 # Did we run successfully?
670 # Fetch all valid country codes to check parsed networks against
671 validcountries
= self
.countries
673 # Iterate over all potential sources
674 for source
in sorted(location
.importer
.SOURCES
):
675 with self
.db
.transaction():
676 # Create some temporary tables to store parsed data
678 CREATE TEMPORARY TABLE _autnums(number integer NOT NULL,
679 organization text NOT NULL, source text NOT NULL) ON COMMIT DROP;
680 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
682 CREATE TEMPORARY TABLE _organizations(handle text NOT NULL,
683 name text NOT NULL, source text NOT NULL) ON COMMIT DROP;
684 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
686 CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL,
687 original_countries text[] NOT NULL, source text NOT NULL)
689 CREATE INDEX _rirdata_search ON _rirdata
690 USING BTREE(family(network), masklen(network));
691 CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
694 # Remove all previously imported content
695 self
.db
.execute("DELETE FROM autnums WHERE source = %s", source
)
696 self
.db
.execute("DELETE FROM networks WHERE source = %s", source
)
699 # Fetch WHOIS sources
700 for url
in location
.importer
.WHOIS_SOURCES
.get(source
, []):
701 for block
in downloader
.request_blocks(url
):
702 self
._parse
_block
(block
, source
, validcountries
)
704 # Fetch extended sources
705 for url
in location
.importer
.EXTENDED_SOURCES
.get(source
, []):
706 for line
in downloader
.request_lines(url
):
707 self
._parse
_line
(line
, source
, validcountries
)
708 except urllib
.error
.URLError
as e
:
709 log
.error("Could not retrieve data from %s: %s" % (source
, e
))
712 # Continue with the next source
715 # Process all parsed networks from every RIR we happen to have access to,
716 # insert the largest network chunks into the networks table immediately...
717 families
= self
.db
.query("""
719 family(network) AS family
727 for family
in (row
.family
for row
in families
):
728 # Fetch the smallest mask length in our data set
729 smallest
= self
.db
.get("""
759 masklen(network) = %s
768 # ... determine any other prefixes for this network family, ...
769 prefixes
= self
.db
.query("""
771 DISTINCT masklen(network) AS prefix
782 # ... and insert networks with this prefix in case they provide additional
783 # information (i. e. subnet of a larger chunk with a different country)
784 for prefix
in (row
.prefix
for row
in prefixes
):
790 _rirdata.original_countries,
795 family(_rirdata.network) = %s
797 masklen(_rirdata.network) = %s
801 DISTINCT ON (c.network)
804 c.original_countries,
806 masklen(networks.network),
807 networks.country AS parent_country
813 c.network << networks.network
816 masklen(networks.network) DESC NULLS LAST
819 networks(network, country, original_countries, source)
828 parent_country IS NULL
830 country <> parent_country
831 ON CONFLICT DO NOTHING
846 _organizations.source
850 _organizations ON _autnums.organization = _organizations.handle
856 SET name = excluded.name
860 # Download and import (technical) AS names from ARIN
861 with self
.db
.transaction():
862 self
._import
_as
_names
_from
_arin
(downloader
)
864 # Return a non-zero exit code for errors
865 return 1 if error
else 0
867 def _check_parsed_network(self
, network
):
869 Assistive function to detect and subsequently sort out parsed
870 networks from RIR data (both Whois and so-called "extended sources"),
873 (a) not globally routable (RFC 1918 space, et al.)
874 (b) covering a too large chunk of the IP address space (prefix length
875 is < 7 for IPv4 networks, and < 10 for IPv6)
876 (c) "0.0.0.0" or "::" as a network address
877 (d) are too small for being publicly announced (we have decided not to
878 process them at the moment, as they significantly enlarge our
879 database without providing very helpful additional information)
881 This unfortunately is necessary due to brain-dead clutter across
882 various RIR databases, causing mismatches and eventually disruptions.
884 We will return False in case a network is not suitable for adding
885 it to our database, and True otherwise.
888 if not network
or not (isinstance(network
, ipaddress
.IPv4Network
) or isinstance(network
, ipaddress
.IPv6Network
)):
891 if not network
.is_global
:
892 log
.debug("Skipping non-globally routable network: %s" % network
)
895 if network
.version
== 4:
896 if network
.prefixlen
< 7:
897 log
.debug("Skipping too big IP chunk: %s" % network
)
900 if network
.prefixlen
> 24:
901 log
.debug("Skipping network too small to be publicly announced: %s" % network
)
904 if str(network
.network_address
) == "0.0.0.0":
905 log
.debug("Skipping network based on 0.0.0.0: %s" % network
)
908 elif network
.version
== 6:
909 if network
.prefixlen
< 10:
910 log
.debug("Skipping too big IP chunk: %s" % network
)
913 if network
.prefixlen
> 48:
914 log
.debug("Skipping network too small to be publicly announced: %s" % network
)
917 if str(network
.network_address
) == "::":
918 log
.debug("Skipping network based on '::': %s" % network
)
922 # This should not happen...
923 log
.warning("Skipping network of unknown family, this should not happen: %s" % network
)
926 # In case we have made it here, the network is considered to
927 # be suitable for libloc consumption...
930 def _check_parsed_asn(self
, asn
):
932 Assistive function to filter Autonomous System Numbers not being suitable
933 for adding to our database. Returns False in such cases, and True otherwise.
936 for start
, end
in VALID_ASN_RANGES
:
937 if start
<= asn
and end
>= asn
:
940 log
.info("Supplied ASN %s out of publicly routable ASN ranges" % asn
)
943 def _parse_block(self
, block
, source_key
, validcountries
= None):
944 # Get first line to find out what type of block this is
948 if line
.startswith("aut-num:"):
949 return self
._parse
_autnum
_block
(block
, source_key
)
952 if line
.startswith("inet6num:") or line
.startswith("inetnum:"):
953 return self
._parse
_inetnum
_block
(block
, source_key
, validcountries
)
956 elif line
.startswith("organisation:"):
957 return self
._parse
_org
_block
(block
, source_key
)
959 def _parse_autnum_block(self
, block
, source_key
):
963 key
, val
= split_line(line
)
966 m
= re
.match(r
"^(AS|as)(\d+)", val
)
968 autnum
["asn"] = m
.group(2)
971 autnum
[key
] = val
.upper()
974 # Save the first description line as well...
975 if not key
in autnum
:
979 if not autnum
or not "asn" in autnum
:
982 # Insert a dummy organisation handle into our temporary organisations
983 # table in case the AS does not have an organisation handle set, but
984 # has a description (a quirk often observed in APNIC area), so we can
985 # later display at least some string for this AS.
986 if not "org" in autnum
:
987 if "descr" in autnum
:
988 autnum
["org"] = "LIBLOC-%s-ORGHANDLE" % autnum
.get("asn")
990 self
.db
.execute("INSERT INTO _organizations(handle, name, source) \
991 VALUES(%s, %s, %s) ON CONFLICT (handle) DO NOTHING",
992 autnum
.get("org"), autnum
.get("descr"), source_key
,
995 log
.warning("ASN %s neither has an organisation handle nor a description line set, omitting" % \
999 # Insert into database
1000 self
.db
.execute("INSERT INTO _autnums(number, organization, source) \
1001 VALUES(%s, %s, %s) ON CONFLICT (number) DO UPDATE SET \
1002 organization = excluded.organization",
1003 autnum
.get("asn"), autnum
.get("org"), source_key
,
1006 def _parse_inetnum_block(self
, block
, source_key
, validcountries
= None):
1007 log
.debug("Parsing inetnum block:")
1014 key
, val
= split_line(line
)
1016 # Filter any inetnum records which are only referring to IP space
1017 # not managed by that specific RIR...
1018 if key
== "netname":
1019 if re
.match(r
"^(ERX-NETBLOCK|(AFRINIC|ARIN|LACNIC|RIPE)-CIDR-BLOCK|IANA-NETBLOCK-\d{1,3}|NON-RIPE-NCC-MANAGED-ADDRESS-BLOCK|STUB-[\d-]{3,}SLASH\d{1,2})", val
.strip()):
1020 log
.debug("Skipping record indicating historic/orphaned data: %s" % val
.strip())
1023 if key
== "inetnum":
1024 start_address
, delim
, end_address
= val
.partition("-")
1026 # Strip any excess space
1027 start_address
, end_address
= start_address
.rstrip(), end_address
.strip()
1029 # Handle "inetnum" formatting in LACNIC DB (e.g. "24.152.8/22" instead of "24.152.8.0/22")
1030 if start_address
and not (delim
or end_address
):
1032 start_address
= ipaddress
.ip_network(start_address
, strict
=False)
1034 start_address
= start_address
.split("/")
1035 ldigits
= start_address
[0].count(".")
1037 # How many octets do we need to add?
1038 # (LACNIC does not seem to have a /8 or greater assigned, so the following should suffice.)
1040 start_address
= start_address
[0] + ".0.0/" + start_address
[1]
1042 start_address
= start_address
[0] + ".0/" + start_address
[1]
1044 log
.warning("Could not recover IPv4 address from line in LACNIC DB format: %s" % line
)
1048 start_address
= ipaddress
.ip_network(start_address
, strict
=False)
1050 log
.warning("Could not parse line in LACNIC DB format: %s" % line
)
1053 # Enumerate first and last IP address of this network
1054 end_address
= start_address
[-1]
1055 start_address
= start_address
[0]
1058 # Convert to IP address
1060 start_address
= ipaddress
.ip_address(start_address
)
1061 end_address
= ipaddress
.ip_address(end_address
)
1063 log
.warning("Could not parse line: %s" % line
)
1066 inetnum
["inetnum"] = list(ipaddress
.summarize_address_range(start_address
, end_address
))
1068 elif key
== "inet6num":
1069 inetnum
[key
] = [ipaddress
.ip_network(val
, strict
=False)]
1071 elif key
== "country":
1074 # Catch RIR data objects with more than one country code...
1075 if not key
in inetnum
:
1078 if val
in inetnum
.get("country"):
1079 # ... but keep this list distinct...
1082 # When people set country codes to "UK", they actually mean "GB"
1086 inetnum
[key
].append(val
)
1088 # Parse the geofeed attribute
1089 elif key
== "geofeed":
1090 inetnum
["geofeed"] = val
1092 # Parse geofeed when used as a remark
1093 elif key
== "remarks":
1094 m
= re
.match(r
"^(?:Geofeed)\s+(https://.*)", val
)
1096 inetnum
["geofeed"] = m
.group(1)
1098 # Skip empty objects
1099 if not inetnum
or not "country" in inetnum
:
1102 # Prepare skipping objects with unknown country codes...
1103 invalidcountries
= [singlecountry
for singlecountry
in inetnum
.get("country") if singlecountry
not in validcountries
]
1105 # Iterate through all networks enumerated from above, check them for plausibility and insert
1106 # them into the database, if _check_parsed_network() succeeded
1107 for single_network
in inetnum
.get("inet6num") or inetnum
.get("inetnum"):
1108 if self
._check
_parsed
_network
(single_network
):
1109 # Skip objects with unknown country codes if they are valid to avoid log spam...
1110 if validcountries
and invalidcountries
:
1111 log
.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
1112 (invalidcountries
, inetnum
.get("country"), inetnum
.get("inet6num") or inetnum
.get("inetnum")))
1115 # Everything is fine here, run INSERT statement...
1116 self
.db
.execute("INSERT INTO _rirdata(network, country, original_countries, source) \
1117 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
1118 "%s" % single_network
, inetnum
.get("country")[0], inetnum
.get("country"), source_key
,
1121 # Update any geofeed information
1122 geofeed
= inetnum
.get("geofeed", None)
1124 self
._parse
_geofeed
(geofeed
, single_network
)
1126 # Delete any previous geofeeds
1128 self
.db
.execute("DELETE FROM network_geofeeds WHERE network = %s",
1129 "%s" % single_network
)
1131 def _parse_geofeed(self
, url
, single_network
):
1133 url
= urllib
.parse
.urlparse(url
)
1135 # Make sure that this is a HTTPS URL
1136 if not url
.scheme
== "https":
1137 log
.debug("Geofeed URL is not using HTTPS: %s" % geofeed
)
1140 # Put the URL back together normalized
1143 # Store/update any geofeeds
1153 ON CONFLICT (network) DO
1154 UPDATE SET url = excluded.url""",
1155 "%s" % single_network
, url
,
1158 def _parse_org_block(self
, block
, source_key
):
1162 key
, val
= split_line(line
)
1164 if key
== "organisation":
1165 org
[key
] = val
.upper()
1166 elif key
== "org-name":
1169 # Skip empty objects
1173 self
.db
.execute("INSERT INTO _organizations(handle, name, source) \
1174 VALUES(%s, %s, %s) ON CONFLICT (handle) DO \
1175 UPDATE SET name = excluded.name",
1176 org
.get("organisation"), org
.get("org-name"), source_key
,
1179 def _parse_line(self
, line
, source_key
, validcountries
= None):
1181 if line
.startswith("2"):
1185 if line
.startswith("#"):
1189 registry
, country_code
, type, line
= line
.split("|", 3)
1191 log
.warning("Could not parse line: %s" % line
)
1194 # Skip any lines that are for stats only or do not have a country
1195 # code at all (avoids log spam below)
1196 if not country_code
or country_code
== '*':
1199 # Skip objects with unknown country codes
1200 if validcountries
and country_code
not in validcountries
:
1201 log
.warning("Skipping line with bogus country '%s': %s" % \
1202 (country_code
, line
))
1205 if type in ("ipv6", "ipv4"):
1206 return self
._parse
_ip
_line
(country_code
, type, line
, source_key
)
1208 def _parse_ip_line(self
, country
, type, line
, source_key
):
1210 address
, prefix
, date
, status
, organization
= line
.split("|")
1214 # Try parsing the line without organization
1216 address
, prefix
, date
, status
= line
.split("|")
1218 log
.warning("Unhandled line format: %s" % line
)
1221 # Skip anything that isn't properly assigned
1222 if not status
in ("assigned", "allocated"):
1225 # Cast prefix into an integer
1227 prefix
= int(prefix
)
1229 log
.warning("Invalid prefix: %s" % prefix
)
1232 # Fix prefix length for IPv4
1234 prefix
= 32 - int(math
.log(prefix
, 2))
1236 # Try to parse the address
1238 network
= ipaddress
.ip_network("%s/%s" % (address
, prefix
), strict
=False)
1240 log
.warning("Invalid IP address: %s" % address
)
1243 if not self
._check
_parsed
_network
(network
):
1246 self
.db
.execute("INSERT INTO networks(network, country, original_countries, source) \
1247 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \
1248 UPDATE SET country = excluded.country",
1249 "%s" % network
, country
, [country
], source_key
,
1252 def _import_as_names_from_arin(self
, downloader
):
1253 # Delete all previously imported content
1254 self
.db
.execute("DELETE FROM autnums WHERE source = %s", "ARIN")
1256 # Try to retrieve the feed from ftp.arin.net
1257 feed
= downloader
.request_lines("https://ftp.arin.net/pub/resource_registry_service/asns.csv")
1259 # Walk through the file
1260 for line
in csv
.DictReader(feed
, dialect
="arin"):
1261 log
.debug("Processing object: %s" % line
)
1264 status
= line
.get("Status")
1266 # We are only interested in anything managed by ARIN
1267 if not status
== "Full Registry Services":
1270 # Fetch organization name
1271 name
= line
.get("Org Name")
1274 first_asn
= line
.get("Start AS Number")
1275 last_asn
= line
.get("End AS Number")
1279 first_asn
= int(first_asn
)
1280 except TypeError as e
:
1281 log
.warning("Could not parse ASN '%s'" % first_asn
)
1285 last_asn
= int(last_asn
)
1286 except TypeError as e
:
1287 log
.warning("Could not parse ASN '%s'" % last_asn
)
1290 # Check if the range is valid
1291 if last_asn
< first_asn
:
1292 log
.warning("Invalid ASN range %s-%s" % (first_asn
, last_asn
))
1294 # Insert everything into the database
1295 for asn
in range(first_asn
, last_asn
+ 1):
1296 if not self
._check
_parsed
_asn
(asn
):
1297 log
.warning("Skipping invalid ASN %s" % asn
)
1317 """, asn
, name
, "ARIN",
1320 def handle_update_announcements(self
, ns
):
1321 server
= ns
.server
[0]
1323 with self
.db
.transaction():
1324 if server
.startswith("/"):
1325 self
._handle
_update
_announcements
_from
_bird
(server
)
1327 # Purge anything we never want here
1329 -- Delete default routes
1330 DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0';
1332 -- Delete anything that is not global unicast address space
1333 DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3';
1335 -- DELETE "current network" address space
1336 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8';
1338 -- DELETE local loopback address space
1339 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8';
1341 -- DELETE RFC 1918 address space
1342 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8';
1343 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12';
1344 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16';
1346 -- DELETE test, benchmark and documentation address space
1347 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24';
1348 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24';
1349 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15';
1350 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24';
1351 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24';
1353 -- DELETE CGNAT address space (RFC 6598)
1354 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10';
1356 -- DELETE link local address space
1357 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16';
1359 -- DELETE IPv6 to IPv4 (6to4) address space (RFC 3068)
1360 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24';
1361 DELETE FROM announcements WHERE family(network) = 6 AND network <<= '2002::/16';
1363 -- DELETE multicast and reserved address space
1364 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4';
1365 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4';
1367 -- Delete networks that are too small to be in the global routing table
1368 DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48;
1369 DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24;
1371 -- Delete any non-public or reserved ASNs
1372 DELETE FROM announcements WHERE NOT (
1373 (autnum >= 1 AND autnum <= 23455)
1375 (autnum >= 23457 AND autnum <= 64495)
1377 (autnum >= 131072 AND autnum <= 4199999999)
1380 -- Delete everything that we have not seen for 14 days
1381 DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days';
1384 def _handle_update_announcements_from_bird(self
, server
):
1385 # Pre-compile the regular expression for faster searching
1386 route
= re
.compile(b
"^\s(.+?)\s+.+?\[(?:AS(.*?))?.\]$")
1388 log
.info("Requesting routing table from Bird (%s)" % server
)
1390 aggregated_networks
= []
1392 # Send command to list all routes
1393 for line
in self
._bird
_cmd
(server
, "show route"):
1394 m
= route
.match(line
)
1400 # Ignore any header lines with the name of the routing table
1401 elif line
.startswith(b
"Table"):
1406 log
.debug("Could not parse line: %s" % line
.decode())
1410 # Fetch the extracted network and ASN
1411 network
, autnum
= m
.groups()
1413 # Decode into strings
1415 network
= network
.decode()
1417 autnum
= autnum
.decode()
1419 # Collect all aggregated networks
1421 log
.debug("%s is an aggregated network" % network
)
1422 aggregated_networks
.append(network
)
1425 # Insert it into the database
1426 self
.db
.execute("INSERT INTO announcements(network, autnum) \
1427 VALUES(%s, %s) ON CONFLICT (network) DO \
1428 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1432 # Process any aggregated networks
1433 for network
in aggregated_networks
:
1434 log
.debug("Processing aggregated network %s" % network
)
1436 # Run "show route all" for each network
1437 for line
in self
._bird
_cmd
(server
, "show route %s all" % network
):
1438 # Try finding the path
1439 m
= re
.match(b
"\s+BGP\.as_path:.* (\d+) {\d+}$", line
)
1441 # Select the last AS number in the path
1442 autnum
= m
.group(1).decode()
1444 # Insert it into the database
1445 self
.db
.execute("INSERT INTO announcements(network, autnum) \
1446 VALUES(%s, %s) ON CONFLICT (network) DO \
1447 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1451 # We don't need to process any more
1454 def _bird_cmd(self
, socket_path
, command
):
1455 # Connect to the socket
1456 s
= socket
.socket(socket
.AF_UNIX
, socket
.SOCK_STREAM
)
1457 s
.connect(socket_path
)
1459 # Allocate some buffer
1462 log
.debug("Sending Bird command: %s" % command
)
1465 s
.send(b
"%s\n" % command
.encode())
1468 # Fill up the buffer
1469 buffer += s
.recv(4096)
1472 # Search for the next newline
1473 pos
= buffer.find(b
"\n")
1475 # If we cannot find one, we go back and read more data
1479 # Cut after the newline character
1482 # Split the line we want and keep the rest in buffer
1483 line
, buffer = buffer[:pos
], buffer[pos
:]
1485 # Try parsing any status lines
1486 if len(line
) > 4 and line
[:4].isdigit() and line
[4] in (32, 45):
1487 code
, delim
, line
= int(line
[:4]), line
[4], line
[5:]
1489 log
.debug("Received response code %s from bird" % code
)
1499 # Otherwise return the line
1502 def handle_update_geofeeds(self
, ns
):
1504 with self
.db
.transaction():
1505 # Delete all geofeeds which are no longer linked
1516 geofeeds.url = network_geofeeds.url
1535 # Fetch all Geofeeds that require an update
1536 geofeeds
= self
.db
.query("""
1545 updated_at <= CURRENT_TIMESTAMP - INTERVAL '1 week'
1550 with concurrent
.futures
.ThreadPoolExecutor(max_workers
=10) as executor
:
1551 results
= executor
.map(self
._fetch
_geofeed
, geofeeds
)
1553 # Fetch all results to raise any exceptions
1554 for result
in results
:
1557 # Delete data from any feeds that did not update in the last two weeks
1558 with self
.db
.transaction():
1563 geofeed_networks.geofeed_id IN (
1571 updated_at <= CURRENT_TIMESTAMP - INTERVAL '2 weeks'
1575 def _fetch_geofeed(self
, geofeed
):
1576 log
.debug("Fetching Geofeed %s" % geofeed
.url
)
1578 with self
.db
.transaction():
1581 req
= urllib
.request
.Request(geofeed
.url
, headers
={
1582 "User-Agent" : "location/%s" % location
.__version
__,
1584 # We expect some plain text file in CSV format
1585 "Accept" : "text/csv, text/plain",
1591 with urllib
.request
.urlopen(req
, timeout
=10) as f
:
1592 # Remove any previous data
1593 self
.db
.execute("DELETE FROM geofeed_networks \
1594 WHERE geofeed_id = %s", geofeed
.id)
1598 # Read the output line by line
1603 line
= line
.decode()
1605 # Ignore any lines we cannot decode
1606 except UnicodeDecodeError:
1607 log
.debug("Could not decode line %s in %s" \
1608 % (lineno
, geofeed
.url
))
1612 line
= line
.rstrip()
1618 # Try to parse the line
1620 fields
= line
.split(",", 5)
1622 log
.debug("Could not parse line: %s" % line
)
1625 # Check if we have enough fields
1627 log
.debug("Not enough fields in line: %s" % line
)
1631 network
, country
, region
, city
, = fields
[:4]
1633 # Try to parse the network
1635 network
= ipaddress
.ip_network(network
, strict
=False)
1637 log
.debug("Could not parse network: %s" % network
)
1640 # Strip any excess whitespace from country codes
1641 country
= country
.strip()
1643 # Make the country code uppercase
1644 country
= country
.upper()
1646 # Check the country code
1648 log
.debug("Empty country code in Geofeed %s line %s" \
1649 % (geofeed
.url
, lineno
))
1652 elif not location
.country_code_is_valid(country
):
1653 log
.debug("Invalid country code in Geofeed %s:%s: %s" \
1654 % (geofeed
.url
, lineno
, country
))
1657 # Write this into the database
1667 VALUES (%s, %s, %s, %s, %s)""",
1675 # Catch any HTTP errors
1676 except urllib
.request
.HTTPError
as e
:
1677 self
.db
.execute("UPDATE geofeeds SET status = %s, error = %s \
1678 WHERE id = %s", e
.code
, "%s" % e
, geofeed
.id)
1680 # Remove any previous data when the feed has been deleted
1682 self
.db
.execute("DELETE FROM geofeed_networks \
1683 WHERE geofeed_id = %s", geofeed
.id)
1685 # Catch any other errors and connection timeouts
1686 except (http
.client
.InvalidURL
, urllib
.request
.URLError
, TimeoutError
) as e
:
1687 log
.debug("Could not fetch URL %s: %s" % (geofeed
.url
, e
))
1689 self
.db
.execute("UPDATE geofeeds SET status = %s, error = %s \
1690 WHERE id = %s", 599, "%s" % e
, geofeed
.id)
1692 # Mark the geofeed as updated
1698 updated_at = CURRENT_TIMESTAMP,
1706 def handle_update_overrides(self
, ns
):
1707 with self
.db
.transaction():
1708 # Only drop manually created overrides, as we can be reasonably sure to have them,
1709 # and preserve the rest. If appropriate, it is deleted by correspondent functions.
1711 DELETE FROM autnum_overrides WHERE source = 'manual';
1712 DELETE FROM network_overrides WHERE source = 'manual';
1715 for file in ns
.files
:
1716 log
.info("Reading %s..." % file)
1718 with
open(file, "rb") as f
:
1719 for type, block
in location
.importer
.read_blocks(f
):
1721 network
= block
.get("net")
1722 # Try to parse and normalise the network
1724 network
= ipaddress
.ip_network(network
, strict
=False)
1725 except ValueError as e
:
1726 log
.warning("Invalid IP network: %s: %s" % (network
, e
))
1729 # Prevent that we overwrite all networks
1730 if network
.prefixlen
== 0:
1731 log
.warning("Skipping %s: You cannot overwrite default" % network
)
1735 INSERT INTO network_overrides(
1740 is_satellite_provider,
1743 ) VALUES (%s, %s, %s, %s, %s, %s, %s)
1744 ON CONFLICT (network) DO NOTHING""",
1746 block
.get("country"),
1748 self
._parse
_bool
(block
, "is-anonymous-proxy"),
1749 self
._parse
_bool
(block
, "is-satellite-provider"),
1750 self
._parse
_bool
(block
, "is-anycast"),
1751 self
._parse
_bool
(block
, "drop"),
1754 elif type == "aut-num":
1755 autnum
= block
.get("aut-num")
1757 # Check if AS number begins with "AS"
1758 if not autnum
.startswith("AS"):
1759 log
.warning("Invalid AS number: %s" % autnum
)
1766 INSERT INTO autnum_overrides(
1772 is_satellite_provider,
1775 ) VALUES(%s, %s, %s, %s, %s, %s, %s, %s)
1776 ON CONFLICT DO NOTHING""",
1779 block
.get("country"),
1781 self
._parse
_bool
(block
, "is-anonymous-proxy"),
1782 self
._parse
_bool
(block
, "is-satellite-provider"),
1783 self
._parse
_bool
(block
, "is-anycast"),
1784 self
._parse
_bool
(block
, "drop"),
1788 log
.warning("Unsupported type: %s" % type)
1790 def handle_update_feeds(self
, ns
):
1792 Update any third-party feeds
1796 # Create a downloader
1797 downloader
= location
.importer
.Downloader()
1801 ("AWS-IP-RANGES", self
._import
_aws
_ip
_ranges
, "https://ip-ranges.amazonaws.com/ip-ranges.json"),
1804 # Walk through all feeds
1805 for name
, callback
, url
, *args
in feeds
:
1807 self
._process
_feed
(downloader
, name
, callback
, url
, *args
)
1809 # Log an error but continue if an exception occurs
1810 except Exception as e
:
1811 log
.error("Error processing feed '%s': %s" % (name
, e
))
1815 self
._update
_feed
_for
_spamhaus
_drop
()
1818 return 0 if success
else 1
1820 def _process_feed(self
, downloader
, name
, callback
, url
, *args
):
1825 f
= downloader
.retrieve(url
)
1827 with self
.db
.transaction():
1828 # Drop any previous content
1829 self
.db
.execute("DELETE FROM autnum_feeds WHERE source = %s", name
)
1830 self
.db
.execute("DELETE FROM network_feeds WHERE source = %s", name
)
1832 # Call the callback to process the feed
1833 return callback(name
, f
, *args
)
1835 def _import_aws_ip_ranges(self
, name
, f
):
1837 aws_ip_dump
= json
.load(f
)
1839 # XXX: Set up a dictionary for mapping a region name to a country. Unfortunately,
1840 # there seems to be no machine-readable version available of this other than
1841 # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html
1842 # (worse, it seems to be incomplete :-/ ); https://www.cloudping.cloud/endpoints
1843 # was helpful here as well.
1844 aws_region_country_map
= {
1849 "ap-northeast-3": "JP",
1850 "ap-northeast-2": "KR",
1851 "ap-southeast-1": "SG",
1852 "ap-southeast-2": "AU",
1853 "ap-southeast-3": "MY",
1854 "ap-southeast-4": "AU",
1855 "ap-southeast-5": "NZ", # Auckland, NZ
1856 "ap-southeast-6": "AP", # XXX: Precise location not documented anywhere
1857 "ap-northeast-1": "JP",
1858 "ca-central-1": "CA",
1860 "eu-central-1": "DE",
1861 "eu-central-2": "CH",
1868 "eusc-de-east-1" : "DE", # XXX: Undocumented, likely located in Berlin rather than Frankfurt
1869 "il-central-1": "IL", # XXX: This one is not documented anywhere except for ip-ranges.json itself
1870 "me-central-1": "AE",
1875 # Fetch all valid country codes to check parsed networks aganist...
1876 rows
= self
.db
.query("SELECT * FROM countries ORDER BY country_code")
1880 validcountries
.append(row
.country_code
)
1882 for snetwork
in aws_ip_dump
["prefixes"] + aws_ip_dump
["ipv6_prefixes"]:
1884 network
= ipaddress
.ip_network(snetwork
.get("ip_prefix") or snetwork
.get("ipv6_prefix"), strict
=False)
1886 log
.warning("Unable to parse line: %s" % snetwork
)
1889 # Sanitize parsed networks...
1890 if not self
._check
_parsed
_network
(network
):
1893 # Determine region of this network...
1894 region
= snetwork
["region"]
1898 # Any region name starting with "us-" will get "US" country code assigned straight away...
1899 if region
.startswith("us-"):
1901 elif region
.startswith("cn-"):
1902 # ... same goes for China ...
1904 elif region
== "GLOBAL":
1905 # ... funny region name for anycast-like networks ...
1907 elif region
in aws_region_country_map
:
1908 # ... assign looked up country code otherwise ...
1909 cc
= aws_region_country_map
[region
]
1911 # ... and bail out if we are missing something here
1912 log
.warning("Unable to determine country code for line: %s" % snetwork
)
1915 # Skip networks with unknown country codes
1916 if not is_anycast
and validcountries
and cc
not in validcountries
:
1917 log
.warning("Skipping Amazon AWS network with bogus country '%s': %s" % \
1921 # Conduct SQL statement...
1935 ON CONFLICT (network, source) DO NOTHING
1936 """, "%s" % network
, "Amazon AWS IP feed", cc
, is_anycast
,
1939 def _update_feed_for_spamhaus_drop(self
):
1940 downloader
= location
.importer
.Downloader()
1943 ("SPAMHAUS-DROP", "https://www.spamhaus.org/drop/drop.txt"),
1944 ("SPAMHAUS-EDROP", "https://www.spamhaus.org/drop/edrop.txt"),
1945 ("SPAMHAUS-DROPV6", "https://www.spamhaus.org/drop/dropv6.txt")
1949 ("SPAMHAUS-ASNDROP", "https://www.spamhaus.org/drop/asndrop.json")
1952 for name
, url
in ip_lists
:
1953 # Fetch IP list from given URL
1954 f
= downloader
.retrieve(url
)
1957 fcontent
= f
.readlines()
1959 with self
.db
.transaction():
1960 # Conduct a very basic sanity check to rule out CDN issues causing bogus DROP
1962 if len(fcontent
) > 10:
1963 self
.db
.execute("DELETE FROM network_feeds WHERE source = %s", name
)
1965 log
.warning("%s (%s) returned likely bogus file, ignored" % (name
, url
))
1968 # Iterate through every line, filter comments and add remaining networks to
1969 # the override table in case they are valid...
1970 for sline
in fcontent
:
1971 # The response is assumed to be encoded in UTF-8...
1972 sline
= sline
.decode("utf-8")
1974 # Comments start with a semicolon...
1975 if sline
.startswith(";"):
1978 # Extract network and ignore anything afterwards...
1980 network
= ipaddress
.ip_network(sline
.split()[0], strict
=False)
1982 log
.error("Unable to parse line: %s" % sline
)
1985 # Sanitize parsed networks...
1986 if not self
._check
_parsed
_network
(network
):
1987 log
.warning("Skipping bogus network found in %s (%s): %s" % \
1988 (name
, url
, network
))
1991 # Conduct SQL statement...
2003 )""", "%s" % network
, name
, True,
2006 for name
, url
in asn_lists
:
2008 f
= downloader
.retrieve(url
)
2011 fcontent
= f
.readlines()
2013 with self
.db
.transaction():
2014 # Conduct a very basic sanity check to rule out CDN issues causing bogus DROP
2016 if len(fcontent
) > 10:
2017 self
.db
.execute("DELETE FROM autnum_feeds WHERE source = %s", name
)
2019 log
.warning("%s (%s) returned likely bogus file, ignored" % (name
, url
))
2022 # Iterate through every line, filter comments and add remaining ASNs to
2023 # the override table in case they are valid...
2024 for sline
in fcontent
:
2025 # The response is assumed to be encoded in UTF-8...
2026 sline
= sline
.decode("utf-8")
2028 # Load every line as a JSON object and try to obtain an ASN from it...
2030 lineobj
= json
.loads(sline
)
2031 except json
.decoder
.JSONDecodeError
:
2032 log
.error("Unable to parse line as a JSON object: %s" % sline
)
2035 # Skip line contiaining file metadata
2037 type = lineobj
["type"]
2039 if type == "metadata":
2045 asn
= lineobj
["asn"]
2046 as_name
= lineobj
["asname"]
2048 log
.warning("Unable to extract necessary information from line: %s" % sline
)
2051 # Filter invalid ASNs...
2052 if not self
._check
_parsed
_asn
(asn
):
2053 log
.warning("Skipping bogus ASN found in %s (%s): %s" % \
2057 # Conduct SQL statement...
2069 )""", "%s" % asn
, name
, True,
2073 def _parse_bool(block
, key
):
2074 val
= block
.get(key
)
2076 # There is no point to proceed when we got None
2080 # Convert to lowercase
2084 if val
in ("yes", "1"):
2088 if val
in ("no", "0"):
2095 def countries(self
):
2096 # Fetch all valid country codes to check parsed networks aganist
2097 rows
= self
.db
.query("SELECT * FROM countries ORDER BY country_code")
2099 # Return all countries
2100 return [row
.country_code
for row
in rows
]
2102 def handle_import_countries(self
, ns
):
2103 with self
.db
.transaction():
2104 # Drop all data that we have
2105 self
.db
.execute("TRUNCATE TABLE countries")
2107 for file in ns
.file:
2109 line
= line
.rstrip()
2111 # Ignore any comments
2112 if line
.startswith("#"):
2116 country_code
, continent_code
, name
= line
.split(maxsplit
=2)
2118 log
.warning("Could not parse line: %s" % line
)
2121 self
.db
.execute("INSERT INTO countries(country_code, name, continent_code) \
2122 VALUES(%s, %s, %s) ON CONFLICT DO NOTHING", country_code
, name
, continent_code
)
2125 def split_line(line
):
2126 key
, colon
, val
= line
.partition(":")
2128 # Strip any excess space
2135 # Run the command line interface