2 ###############################################################################
4 # libloc - A library to determine the location of someone on the Internet #
6 # Copyright (C) 2020-2024 IPFire Development Team <info@ipfire.org> #
8 # This library is free software; you can redistribute it and/or #
9 # modify it under the terms of the GNU Lesser General Public #
10 # License as published by the Free Software Foundation; either #
11 # version 2.1 of the License, or (at your option) any later version. #
13 # This library is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16 # Lesser General Public License for more details. #
18 ###############################################################################
21 import concurrent
.futures
33 # Load our location module
35 import location
.database
36 import location
.importer
37 from location
.i18n
import _
40 log
= logging
.getLogger("location.importer")
50 # Configure the CSV parser for ARIN
51 csv
.register_dialect("arin", delimiter
=",", quoting
=csv
.QUOTE_ALL
, quotechar
="\"")
55 parser
= argparse
.ArgumentParser(
56 description
=_("Location Importer Command Line Interface"),
58 subparsers
= parser
.add_subparsers()
60 # Global configuration flags
61 parser
.add_argument("--debug", action
="store_true",
62 help=_("Enable debug output"))
63 parser
.add_argument("--quiet", action
="store_true",
64 help=_("Enable quiet mode"))
67 parser
.add_argument("--version", action
="version",
68 version
="%(prog)s @VERSION@")
71 parser
.add_argument("--database-host", required
=True,
72 help=_("Database Hostname"), metavar
=_("HOST"))
73 parser
.add_argument("--database-name", required
=True,
74 help=_("Database Name"), metavar
=_("NAME"))
75 parser
.add_argument("--database-username", required
=True,
76 help=_("Database Username"), metavar
=_("USERNAME"))
77 parser
.add_argument("--database-password", required
=True,
78 help=_("Database Password"), metavar
=_("PASSWORD"))
81 write
= subparsers
.add_parser("write", help=_("Write database to file"))
82 write
.set_defaults(func
=self
.handle_write
)
83 write
.add_argument("file", nargs
=1, help=_("Database File"))
84 write
.add_argument("--signing-key", nargs
="?", type=open, help=_("Signing Key"))
85 write
.add_argument("--backup-signing-key", nargs
="?", type=open, help=_("Backup Signing Key"))
86 write
.add_argument("--vendor", nargs
="?", help=_("Sets the vendor"))
87 write
.add_argument("--description", nargs
="?", help=_("Sets a description"))
88 write
.add_argument("--license", nargs
="?", help=_("Sets the license"))
89 write
.add_argument("--version", type=int, help=_("Database Format Version"))
92 update_whois
= subparsers
.add_parser("update-whois", help=_("Update WHOIS Information"))
93 update_whois
.set_defaults(func
=self
.handle_update_whois
)
95 # Update announcements
96 update_announcements
= subparsers
.add_parser("update-announcements",
97 help=_("Update BGP Annoucements"))
98 update_announcements
.set_defaults(func
=self
.handle_update_announcements
)
99 update_announcements
.add_argument("server", nargs
=1,
100 help=_("Route Server to connect to"), metavar
=_("SERVER"))
103 update_geofeeds
= subparsers
.add_parser("update-geofeeds",
104 help=_("Update Geofeeds"))
105 update_geofeeds
.set_defaults(func
=self
.handle_update_geofeeds
)
108 update_overrides
= subparsers
.add_parser("update-overrides",
109 help=_("Update overrides"),
111 update_overrides
.add_argument(
112 "files", nargs
="+", help=_("Files to import"),
114 update_overrides
.set_defaults(func
=self
.handle_update_overrides
)
117 import_countries
= subparsers
.add_parser("import-countries",
118 help=_("Import countries"),
120 import_countries
.add_argument("file", nargs
=1, type=argparse
.FileType("r"),
121 help=_("File to import"))
122 import_countries
.set_defaults(func
=self
.handle_import_countries
)
124 args
= parser
.parse_args()
128 location
.logger
.set_level(logging
.DEBUG
)
130 location
.logger
.set_level(logging
.WARNING
)
132 # Print usage if no action was given
133 if not "func" in args
:
140 # Parse command line arguments
141 args
= self
.parse_cli()
143 # Initialise database
144 self
.db
= self
._setup
_database
(args
)
147 ret
= args
.func(args
)
149 # Return with exit code
153 # Otherwise just exit
156 def _setup_database(self
, ns
):
158 Initialise the database
160 # Connect to database
161 db
= location
.database
.Connection(
162 host
=ns
.database_host
, database
=ns
.database_name
,
163 user
=ns
.database_username
, password
=ns
.database_password
,
166 with db
.transaction():
169 CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint,
170 first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
171 last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
172 CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
173 CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network));
174 CREATE INDEX IF NOT EXISTS announcements_search ON announcements USING GIST(network inet_ops);
177 CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
178 ALTER TABLE autnums ADD COLUMN IF NOT EXISTS source text;
179 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
182 CREATE TABLE IF NOT EXISTS countries(
183 country_code text NOT NULL, name text NOT NULL, continent_code text NOT NULL);
184 CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code);
187 CREATE TABLE IF NOT EXISTS networks(network inet, country text);
188 ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[];
189 ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text;
190 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
191 CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
192 CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
195 CREATE TABLE IF NOT EXISTS geofeeds(
196 id serial primary key,
198 status integer default null,
199 updated_at timestamp without time zone default null
201 ALTER TABLE geofeeds ADD COLUMN IF NOT EXISTS error text;
202 CREATE UNIQUE INDEX IF NOT EXISTS geofeeds_unique
204 CREATE TABLE IF NOT EXISTS geofeed_networks(
205 geofeed_id integer references geofeeds(id) on delete cascade,
211 CREATE INDEX IF NOT EXISTS geofeed_networks_geofeed_id
212 ON geofeed_networks(geofeed_id);
213 CREATE INDEX IF NOT EXISTS geofeed_networks_search
214 ON geofeed_networks USING GIST(network inet_ops);
215 CREATE TABLE IF NOT EXISTS network_geofeeds(network inet, url text);
216 CREATE UNIQUE INDEX IF NOT EXISTS network_geofeeds_unique
217 ON network_geofeeds(network);
218 CREATE INDEX IF NOT EXISTS network_geofeeds_search
219 ON network_geofeeds USING GIST(network inet_ops);
220 CREATE INDEX IF NOT EXISTS network_geofeeds_url
221 ON network_geofeeds(url);
224 CREATE TABLE IF NOT EXISTS autnum_feeds(
225 number bigint NOT NULL,
226 source text NOT NULL,
229 is_anonymous_proxy boolean,
230 is_satellite_provider boolean,
234 CREATE UNIQUE INDEX IF NOT EXISTS autnum_feeds_unique
235 ON autnum_feeds(number, source);
237 CREATE TABLE IF NOT EXISTS network_feeds(
238 network inet NOT NULL,
239 source text NOT NULL,
241 is_anonymous_proxy boolean,
242 is_satellite_provider boolean,
246 CREATE UNIQUE INDEX IF NOT EXISTS network_feeds_unique
247 ON network_feeds(network, source);
248 CREATE INDEX IF NOT EXISTS network_feeds_search
249 ON network_feeds USING GIST(network inet_ops);
252 CREATE TABLE IF NOT EXISTS autnum_overrides(
253 number bigint NOT NULL,
256 is_anonymous_proxy boolean,
257 is_satellite_provider boolean,
260 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
261 ON autnum_overrides(number);
262 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS source text;
263 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
265 CREATE TABLE IF NOT EXISTS network_overrides(
266 network inet NOT NULL,
268 is_anonymous_proxy boolean,
269 is_satellite_provider boolean,
272 CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
273 ON network_overrides(network);
274 CREATE INDEX IF NOT EXISTS network_overrides_search
275 ON network_overrides USING GIST(network inet_ops);
276 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS source text;
277 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
282 def handle_write(self
, ns
):
284 Compiles a database in libloc format out of what is in the database
287 writer
= location
.Writer(ns
.signing_key
, ns
.backup_signing_key
)
291 writer
.vendor
= ns
.vendor
294 writer
.description
= ns
.description
297 writer
.license
= ns
.license
299 # Add all Autonomous Systems
300 log
.info("Writing Autonomous Systems...")
302 # Select all ASes with a name
303 rows
= self
.db
.query("""
305 autnums.number AS number,
313 autnum_overrides overrides ON autnums.number = overrides.number
319 # Skip AS without names
323 a
= writer
.add_as(row
.number
)
327 log
.info("Writing networks...")
329 # Select all known networks
330 rows
= self
.db
.query("""
331 WITH known_networks AS (
332 SELECT network FROM announcements
334 SELECT network FROM networks
336 SEELCT network FROM network_feeds
338 SELECT network FROM network_overrides
340 SELECT network FROM geofeed_networks
343 ordered_networks AS (
345 known_networks.network AS network,
346 announcements.autnum AS autnum,
347 networks.country AS country,
349 -- Must be part of returned values for ORDER BY clause
350 masklen(announcements.network) AS sort_a,
351 masklen(networks.network) AS sort_b
355 announcements ON known_networks.network <<= announcements.network
357 networks ON known_networks.network <<= networks.network
359 known_networks.network,
364 -- Return a list of those networks enriched with all
365 -- other information that we store in the database
367 DISTINCT ON (network)
374 SELECT country FROM network_overrides overrides
375 WHERE networks.network <<= overrides.network
376 ORDER BY masklen(overrides.network) DESC
380 SELECT country FROM autnum_overrides overrides
381 WHERE networks.autnum = overrides.number
384 SELECT country FROM network_feeds feeds
385 WHERE networks.network <<= feeds.network
386 ORDER BY masklen(feeds.network) DESC
390 SELECT country FROM autnum_feeds feeds
391 WHERE networks.autnum = feeds.number
392 ORDER BY source LIMIT 1
396 geofeed_networks.country AS country
400 -- Join the data from the geofeeds
402 geofeeds ON network_geofeeds.url = geofeeds.url
404 geofeed_networks ON geofeeds.id = geofeed_networks.geofeed_id
406 -- Check whether we have a geofeed for this network
408 networks.network <<= network_geofeeds.network
410 networks.network <<= geofeed_networks.network
412 -- Filter for the best result
414 masklen(geofeed_networks.network) DESC
423 SELECT is_anonymous_proxy FROM network_overrides overrides
424 WHERE networks.network <<= overrides.network
425 ORDER BY masklen(overrides.network) DESC
429 SELECT is_anonymous_proxy FROM network_feeds feeds
430 WHERE networks.network <<= feeds.network
431 ORDER BY masklen(feeds.network) DESC
435 SELECT is_anonymous_proxy FROM autnum_feeds feeds
436 WHERE networks.autnum = feeds.number
437 ORDER BY source LIMIT 1
440 SELECT is_anonymous_proxy FROM autnum_overrides overrides
441 WHERE networks.autnum = overrides.number
444 ) AS is_anonymous_proxy,
447 SELECT is_satellite_provider FROM network_overrides overrides
448 WHERE networks.network <<= overrides.network
449 ORDER BY masklen(overrides.network) DESC
453 SELECT is_satellite_provider FROM network_feeds feeds
454 WHERE networks.network <<= feeds.network
455 ORDER BY masklen(feeds.network) DESC
459 SELECT is_satellite_provider FROM autnum_feeds feeds
460 WHERE networks.autnum = feeds.number
461 ORDER BY source LIMIT 1
464 SELECT is_satellite_provider FROM autnum_overrides overrides
465 WHERE networks.autnum = overrides.number
468 ) AS is_satellite_provider,
471 SELECT is_anycast FROM network_overrides overrides
472 WHERE networks.network <<= overrides.network
473 ORDER BY masklen(overrides.network) DESC
477 SELECT is_anycast FROM network_feeds feeds
478 WHERE networks.network <<= feeds.network
479 ORDER BY masklen(feeds.network) DESC
483 SELECT is_anycast FROM autnum_feeds feeds
484 WHERE networks.autnum = feeds.number
485 ORDER BY source LIMIT 1
488 SELECT is_anycast FROM autnum_overrides overrides
489 WHERE networks.autnum = overrides.number
495 SELECT is_drop FROM network_overrides overrides
496 WHERE networks.network <<= overrides.network
497 ORDER BY masklen(overrides.network) DESC
501 SELECT is_drop FROM network_feeds feeds
502 WHERE networks.network <<= feeds.network
503 ORDER BY masklen(feeds.network) DESC
507 SELECT is_drop FROM autnum_feeds feeds
508 WHERE networks.autnum = feeds.number
509 ORDER BY source LIMIT 1
512 SELECT is_drop FROM autnum_overrides overrides
513 WHERE networks.autnum = overrides.number
518 ordered_networks networks
522 network
= writer
.add_network(row
.network
)
526 network
.country_code
= row
.country
530 network
.asn
= row
.autnum
533 if row
.is_anonymous_proxy
:
534 network
.set_flag(location
.NETWORK_FLAG_ANONYMOUS_PROXY
)
536 if row
.is_satellite_provider
:
537 network
.set_flag(location
.NETWORK_FLAG_SATELLITE_PROVIDER
)
540 network
.set_flag(location
.NETWORK_FLAG_ANYCAST
)
543 network
.set_flag(location
.NETWORK_FLAG_DROP
)
546 log
.info("Writing countries...")
547 rows
= self
.db
.query("SELECT * FROM countries ORDER BY country_code")
550 c
= writer
.add_country(row
.country_code
)
551 c
.continent_code
= row
.continent_code
554 # Write everything to file
555 log
.info("Writing database to file...")
559 def handle_update_whois(self
, ns
):
560 downloader
= location
.importer
.Downloader()
562 # Did we run successfully?
565 # Fetch all valid country codes to check parsed networks against
566 validcountries
= self
.countries
568 # Iterate over all potential sources
569 for source
in sorted(location
.importer
.SOURCES
):
570 with self
.db
.transaction():
571 # Create some temporary tables to store parsed data
573 CREATE TEMPORARY TABLE _autnums(number integer NOT NULL,
574 organization text NOT NULL, source text NOT NULL) ON COMMIT DROP;
575 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
577 CREATE TEMPORARY TABLE _organizations(handle text NOT NULL,
578 name text NOT NULL, source text NOT NULL) ON COMMIT DROP;
579 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
581 CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL,
582 original_countries text[] NOT NULL, source text NOT NULL)
584 CREATE INDEX _rirdata_search ON _rirdata
585 USING BTREE(family(network), masklen(network));
586 CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
589 # Remove all previously imported content
590 self
.db
.execute("DELETE FROM autnums WHERE source = %s", source
)
591 self
.db
.execute("DELETE FROM networks WHERE source = %s", source
)
594 # Fetch WHOIS sources
595 for url
in location
.importer
.WHOIS_SOURCES
.get(source
, []):
596 for block
in downloader
.request_blocks(url
):
597 self
._parse
_block
(block
, source
, validcountries
)
599 # Fetch extended sources
600 for url
in location
.importer
.EXTENDED_SOURCES
.get(source
, []):
601 for line
in downloader
.request_lines(url
):
602 self
._parse
_line
(line
, source
, validcountries
)
603 except urllib
.error
.URLError
as e
:
604 log
.error("Could not retrieve data from %s: %s" % (source
, e
))
607 # Continue with the next source
610 # Process all parsed networks from every RIR we happen to have access to,
611 # insert the largest network chunks into the networks table immediately...
612 families
= self
.db
.query("""
614 family(network) AS family
622 for family
in (row
.family
for row
in families
):
623 # Fetch the smallest mask length in our data set
624 smallest
= self
.db
.get("""
654 masklen(network) = %s
663 # ... determine any other prefixes for this network family, ...
664 prefixes
= self
.db
.query("""
666 DISTINCT masklen(network) AS prefix
677 # ... and insert networks with this prefix in case they provide additional
678 # information (i. e. subnet of a larger chunk with a different country)
679 for prefix
in (row
.prefix
for row
in prefixes
):
685 _rirdata.original_countries,
690 family(_rirdata.network) = %s
692 masklen(_rirdata.network) = %s
696 DISTINCT ON (c.network)
699 c.original_countries,
701 masklen(networks.network),
702 networks.country AS parent_country
708 c.network << networks.network
711 masklen(networks.network) DESC NULLS LAST
714 networks(network, country, original_countries, source)
723 parent_country IS NULL
725 country <> parent_country
726 ON CONFLICT DO NOTHING
741 _organizations.source
745 _organizations ON _autnums.organization = _organizations.handle
751 SET name = excluded.name
755 # Download and import (technical) AS names from ARIN
756 with self
.db
.transaction():
757 self
._import
_as
_names
_from
_arin
(downloader
)
759 # Return a non-zero exit code for errors
760 return 1 if error
else 0
762 def _check_parsed_network(self
, network
):
764 Assistive function to detect and subsequently sort out parsed
765 networks from RIR data (both Whois and so-called "extended sources"),
768 (a) not globally routable (RFC 1918 space, et al.)
769 (b) covering a too large chunk of the IP address space (prefix length
770 is < 7 for IPv4 networks, and < 10 for IPv6)
771 (c) "0.0.0.0" or "::" as a network address
772 (d) are too small for being publicly announced (we have decided not to
773 process them at the moment, as they significantly enlarge our
774 database without providing very helpful additional information)
776 This unfortunately is necessary due to brain-dead clutter across
777 various RIR databases, causing mismatches and eventually disruptions.
779 We will return False in case a network is not suitable for adding
780 it to our database, and True otherwise.
783 if not network
or not (isinstance(network
, ipaddress
.IPv4Network
) or isinstance(network
, ipaddress
.IPv6Network
)):
786 if not network
.is_global
:
787 log
.debug("Skipping non-globally routable network: %s" % network
)
790 if network
.version
== 4:
791 if network
.prefixlen
< 7:
792 log
.debug("Skipping too big IP chunk: %s" % network
)
795 if network
.prefixlen
> 24:
796 log
.debug("Skipping network too small to be publicly announced: %s" % network
)
799 if str(network
.network_address
) == "0.0.0.0":
800 log
.debug("Skipping network based on 0.0.0.0: %s" % network
)
803 elif network
.version
== 6:
804 if network
.prefixlen
< 10:
805 log
.debug("Skipping too big IP chunk: %s" % network
)
808 if network
.prefixlen
> 48:
809 log
.debug("Skipping network too small to be publicly announced: %s" % network
)
812 if str(network
.network_address
) == "::":
813 log
.debug("Skipping network based on '::': %s" % network
)
817 # This should not happen...
818 log
.warning("Skipping network of unknown family, this should not happen: %s" % network
)
821 # In case we have made it here, the network is considered to
822 # be suitable for libloc consumption...
825 def _check_parsed_asn(self
, asn
):
827 Assistive function to filter Autonomous System Numbers not being suitable
828 for adding to our database. Returns False in such cases, and True otherwise.
831 for start
, end
in VALID_ASN_RANGES
:
832 if start
<= asn
and end
>= asn
:
835 log
.info("Supplied ASN %s out of publicly routable ASN ranges" % asn
)
838 def _parse_block(self
, block
, source_key
, validcountries
= None):
839 # Get first line to find out what type of block this is
843 if line
.startswith("aut-num:"):
844 return self
._parse
_autnum
_block
(block
, source_key
)
847 if line
.startswith("inet6num:") or line
.startswith("inetnum:"):
848 return self
._parse
_inetnum
_block
(block
, source_key
, validcountries
)
851 elif line
.startswith("organisation:"):
852 return self
._parse
_org
_block
(block
, source_key
)
854 def _parse_autnum_block(self
, block
, source_key
):
858 key
, val
= split_line(line
)
861 m
= re
.match(r
"^(AS|as)(\d+)", val
)
863 autnum
["asn"] = m
.group(2)
866 autnum
[key
] = val
.upper()
869 # Save the first description line as well...
870 if not key
in autnum
:
874 if not autnum
or not "asn" in autnum
:
877 # Insert a dummy organisation handle into our temporary organisations
878 # table in case the AS does not have an organisation handle set, but
879 # has a description (a quirk often observed in APNIC area), so we can
880 # later display at least some string for this AS.
881 if not "org" in autnum
:
882 if "descr" in autnum
:
883 autnum
["org"] = "LIBLOC-%s-ORGHANDLE" % autnum
.get("asn")
885 self
.db
.execute("INSERT INTO _organizations(handle, name, source) \
886 VALUES(%s, %s, %s) ON CONFLICT (handle) DO NOTHING",
887 autnum
.get("org"), autnum
.get("descr"), source_key
,
890 log
.warning("ASN %s neither has an organisation handle nor a description line set, omitting" % \
894 # Insert into database
895 self
.db
.execute("INSERT INTO _autnums(number, organization, source) \
896 VALUES(%s, %s, %s) ON CONFLICT (number) DO UPDATE SET \
897 organization = excluded.organization",
898 autnum
.get("asn"), autnum
.get("org"), source_key
,
901 def _parse_inetnum_block(self
, block
, source_key
, validcountries
= None):
902 log
.debug("Parsing inetnum block:")
909 key
, val
= split_line(line
)
911 # Filter any inetnum records which are only referring to IP space
912 # not managed by that specific RIR...
914 if re
.match(r
"^(ERX-NETBLOCK|(AFRINIC|ARIN|LACNIC|RIPE)-CIDR-BLOCK|IANA-NETBLOCK-\d{1,3}|NON-RIPE-NCC-MANAGED-ADDRESS-BLOCK|STUB-[\d-]{3,}SLASH\d{1,2})", val
.strip()):
915 log
.debug("Skipping record indicating historic/orphaned data: %s" % val
.strip())
919 start_address
, delim
, end_address
= val
.partition("-")
921 # Strip any excess space
922 start_address
, end_address
= start_address
.rstrip(), end_address
.strip()
924 # Handle "inetnum" formatting in LACNIC DB (e.g. "24.152.8/22" instead of "24.152.8.0/22")
925 if start_address
and not (delim
or end_address
):
927 start_address
= ipaddress
.ip_network(start_address
, strict
=False)
929 start_address
= start_address
.split("/")
930 ldigits
= start_address
[0].count(".")
932 # How many octets do we need to add?
933 # (LACNIC does not seem to have a /8 or greater assigned, so the following should suffice.)
935 start_address
= start_address
[0] + ".0.0/" + start_address
[1]
937 start_address
= start_address
[0] + ".0/" + start_address
[1]
939 log
.warning("Could not recover IPv4 address from line in LACNIC DB format: %s" % line
)
943 start_address
= ipaddress
.ip_network(start_address
, strict
=False)
945 log
.warning("Could not parse line in LACNIC DB format: %s" % line
)
948 # Enumerate first and last IP address of this network
949 end_address
= start_address
[-1]
950 start_address
= start_address
[0]
953 # Convert to IP address
955 start_address
= ipaddress
.ip_address(start_address
)
956 end_address
= ipaddress
.ip_address(end_address
)
958 log
.warning("Could not parse line: %s" % line
)
961 inetnum
["inetnum"] = list(ipaddress
.summarize_address_range(start_address
, end_address
))
963 elif key
== "inet6num":
964 inetnum
[key
] = [ipaddress
.ip_network(val
, strict
=False)]
966 elif key
== "country":
969 # Catch RIR data objects with more than one country code...
970 if not key
in inetnum
:
973 if val
in inetnum
.get("country"):
974 # ... but keep this list distinct...
977 # When people set country codes to "UK", they actually mean "GB"
981 inetnum
[key
].append(val
)
983 # Parse the geofeed attribute
984 elif key
== "geofeed":
985 inetnum
["geofeed"] = val
987 # Parse geofeed when used as a remark
988 elif key
== "remarks":
989 m
= re
.match(r
"^(?:Geofeed)\s+(https://.*)", val
)
991 inetnum
["geofeed"] = m
.group(1)
994 if not inetnum
or not "country" in inetnum
:
997 # Prepare skipping objects with unknown country codes...
998 invalidcountries
= [singlecountry
for singlecountry
in inetnum
.get("country") if singlecountry
not in validcountries
]
1000 # Iterate through all networks enumerated from above, check them for plausibility and insert
1001 # them into the database, if _check_parsed_network() succeeded
1002 for single_network
in inetnum
.get("inet6num") or inetnum
.get("inetnum"):
1003 if self
._check
_parsed
_network
(single_network
):
1004 # Skip objects with unknown country codes if they are valid to avoid log spam...
1005 if validcountries
and invalidcountries
:
1006 log
.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
1007 (invalidcountries
, inetnum
.get("country"), inetnum
.get("inet6num") or inetnum
.get("inetnum")))
1010 # Everything is fine here, run INSERT statement...
1011 self
.db
.execute("INSERT INTO _rirdata(network, country, original_countries, source) \
1012 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
1013 "%s" % single_network
, inetnum
.get("country")[0], inetnum
.get("country"), source_key
,
1016 # Update any geofeed information
1017 geofeed
= inetnum
.get("geofeed", None)
1019 self
._parse
_geofeed
(geofeed
, single_network
)
1021 # Delete any previous geofeeds
1023 self
.db
.execute("DELETE FROM network_geofeeds WHERE network = %s",
1024 "%s" % single_network
)
1026 def _parse_geofeed(self
, url
, single_network
):
1028 url
= urllib
.parse
.urlparse(url
)
1030 # Make sure that this is a HTTPS URL
1031 if not url
.scheme
== "https":
1032 log
.debug("Geofeed URL is not using HTTPS: %s" % geofeed
)
1035 # Put the URL back together normalized
1038 # Store/update any geofeeds
1048 ON CONFLICT (network) DO
1049 UPDATE SET url = excluded.url""",
1050 "%s" % single_network
, url
,
1053 def _parse_org_block(self
, block
, source_key
):
1057 key
, val
= split_line(line
)
1059 if key
== "organisation":
1060 org
[key
] = val
.upper()
1061 elif key
== "org-name":
1064 # Skip empty objects
1068 self
.db
.execute("INSERT INTO _organizations(handle, name, source) \
1069 VALUES(%s, %s, %s) ON CONFLICT (handle) DO \
1070 UPDATE SET name = excluded.name",
1071 org
.get("organisation"), org
.get("org-name"), source_key
,
1074 def _parse_line(self
, line
, source_key
, validcountries
= None):
1076 if line
.startswith("2"):
1080 if line
.startswith("#"):
1084 registry
, country_code
, type, line
= line
.split("|", 3)
1086 log
.warning("Could not parse line: %s" % line
)
1089 # Skip any lines that are for stats only or do not have a country
1090 # code at all (avoids log spam below)
1091 if not country_code
or country_code
== '*':
1094 # Skip objects with unknown country codes
1095 if validcountries
and country_code
not in validcountries
:
1096 log
.warning("Skipping line with bogus country '%s': %s" % \
1097 (country_code
, line
))
1100 if type in ("ipv6", "ipv4"):
1101 return self
._parse
_ip
_line
(country_code
, type, line
, source_key
)
1103 def _parse_ip_line(self
, country
, type, line
, source_key
):
1105 address
, prefix
, date
, status
, organization
= line
.split("|")
1109 # Try parsing the line without organization
1111 address
, prefix
, date
, status
= line
.split("|")
1113 log
.warning("Unhandled line format: %s" % line
)
1116 # Skip anything that isn't properly assigned
1117 if not status
in ("assigned", "allocated"):
1120 # Cast prefix into an integer
1122 prefix
= int(prefix
)
1124 log
.warning("Invalid prefix: %s" % prefix
)
1127 # Fix prefix length for IPv4
1129 prefix
= 32 - int(math
.log(prefix
, 2))
1131 # Try to parse the address
1133 network
= ipaddress
.ip_network("%s/%s" % (address
, prefix
), strict
=False)
1135 log
.warning("Invalid IP address: %s" % address
)
1138 if not self
._check
_parsed
_network
(network
):
1141 self
.db
.execute("INSERT INTO networks(network, country, original_countries, source) \
1142 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \
1143 UPDATE SET country = excluded.country",
1144 "%s" % network
, country
, [country
], source_key
,
1147 def _import_as_names_from_arin(self
, downloader
):
1148 # Delete all previously imported content
1149 self
.db
.execute("DELETE FROM autnums WHERE source = %s", "ARIN")
1151 # Try to retrieve the feed from ftp.arin.net
1152 feed
= downloader
.request_lines("https://ftp.arin.net/pub/resource_registry_service/asns.csv")
1154 # Walk through the file
1155 for line
in csv
.DictReader(feed
, dialect
="arin"):
1156 log
.debug("Processing object: %s" % line
)
1159 status
= line
.get("Status")
1161 # We are only interested in anything managed by ARIN
1162 if not status
== "Full Registry Services":
1165 # Fetch organization name
1166 name
= line
.get("Org Name")
1169 first_asn
= line
.get("Start AS Number")
1170 last_asn
= line
.get("End AS Number")
1174 first_asn
= int(first_asn
)
1175 except TypeError as e
:
1176 log
.warning("Could not parse ASN '%s'" % first_asn
)
1180 last_asn
= int(last_asn
)
1181 except TypeError as e
:
1182 log
.warning("Could not parse ASN '%s'" % last_asn
)
1185 # Check if the range is valid
1186 if last_asn
< first_asn
:
1187 log
.warning("Invalid ASN range %s-%s" % (first_asn
, last_asn
))
1189 # Insert everything into the database
1190 for asn
in range(first_asn
, last_asn
+ 1):
1191 if not self
._check
_parsed
_asn
(asn
):
1192 log
.warning("Skipping invalid ASN %s" % asn
)
1212 """, asn
, name
, "ARIN",
1215 def handle_update_announcements(self
, ns
):
1216 server
= ns
.server
[0]
1218 with self
.db
.transaction():
1219 if server
.startswith("/"):
1220 self
._handle
_update
_announcements
_from
_bird
(server
)
1222 # Purge anything we never want here
1224 -- Delete default routes
1225 DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0';
1227 -- Delete anything that is not global unicast address space
1228 DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3';
1230 -- DELETE "current network" address space
1231 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8';
1233 -- DELETE local loopback address space
1234 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8';
1236 -- DELETE RFC 1918 address space
1237 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8';
1238 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12';
1239 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16';
1241 -- DELETE test, benchmark and documentation address space
1242 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24';
1243 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24';
1244 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15';
1245 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24';
1246 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24';
1248 -- DELETE CGNAT address space (RFC 6598)
1249 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10';
1251 -- DELETE link local address space
1252 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16';
1254 -- DELETE IPv6 to IPv4 (6to4) address space (RFC 3068)
1255 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24';
1256 DELETE FROM announcements WHERE family(network) = 6 AND network <<= '2002::/16';
1258 -- DELETE multicast and reserved address space
1259 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4';
1260 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4';
1262 -- Delete networks that are too small to be in the global routing table
1263 DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48;
1264 DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24;
1266 -- Delete any non-public or reserved ASNs
1267 DELETE FROM announcements WHERE NOT (
1268 (autnum >= 1 AND autnum <= 23455)
1270 (autnum >= 23457 AND autnum <= 64495)
1272 (autnum >= 131072 AND autnum <= 4199999999)
1275 -- Delete everything that we have not seen for 14 days
1276 DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days';
1279 def _handle_update_announcements_from_bird(self
, server
):
1280 # Pre-compile the regular expression for faster searching
1281 route
= re
.compile(b
"^\s(.+?)\s+.+?\[(?:AS(.*?))?.\]$")
1283 log
.info("Requesting routing table from Bird (%s)" % server
)
1285 aggregated_networks
= []
1287 # Send command to list all routes
1288 for line
in self
._bird
_cmd
(server
, "show route"):
1289 m
= route
.match(line
)
1295 # Ignore any header lines with the name of the routing table
1296 elif line
.startswith(b
"Table"):
1301 log
.debug("Could not parse line: %s" % line
.decode())
1305 # Fetch the extracted network and ASN
1306 network
, autnum
= m
.groups()
1308 # Decode into strings
1310 network
= network
.decode()
1312 autnum
= autnum
.decode()
1314 # Collect all aggregated networks
1316 log
.debug("%s is an aggregated network" % network
)
1317 aggregated_networks
.append(network
)
1320 # Insert it into the database
1321 self
.db
.execute("INSERT INTO announcements(network, autnum) \
1322 VALUES(%s, %s) ON CONFLICT (network) DO \
1323 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1327 # Process any aggregated networks
1328 for network
in aggregated_networks
:
1329 log
.debug("Processing aggregated network %s" % network
)
1331 # Run "show route all" for each network
1332 for line
in self
._bird
_cmd
(server
, "show route %s all" % network
):
1333 # Try finding the path
1334 m
= re
.match(b
"\s+BGP\.as_path:.* (\d+) {\d+}$", line
)
1336 # Select the last AS number in the path
1337 autnum
= m
.group(1).decode()
1339 # Insert it into the database
1340 self
.db
.execute("INSERT INTO announcements(network, autnum) \
1341 VALUES(%s, %s) ON CONFLICT (network) DO \
1342 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1346 # We don't need to process any more
1349 def _bird_cmd(self
, socket_path
, command
):
1350 # Connect to the socket
1351 s
= socket
.socket(socket
.AF_UNIX
, socket
.SOCK_STREAM
)
1352 s
.connect(socket_path
)
1354 # Allocate some buffer
1357 log
.debug("Sending Bird command: %s" % command
)
1360 s
.send(b
"%s\n" % command
.encode())
1363 # Fill up the buffer
1364 buffer += s
.recv(4096)
1367 # Search for the next newline
1368 pos
= buffer.find(b
"\n")
1370 # If we cannot find one, we go back and read more data
1374 # Cut after the newline character
1377 # Split the line we want and keep the rest in buffer
1378 line
, buffer = buffer[:pos
], buffer[pos
:]
1380 # Try parsing any status lines
1381 if len(line
) > 4 and line
[:4].isdigit() and line
[4] in (32, 45):
1382 code
, delim
, line
= int(line
[:4]), line
[4], line
[5:]
1384 log
.debug("Received response code %s from bird" % code
)
1394 # Otherwise return the line
1397 def handle_update_geofeeds(self
, ns
):
1399 with self
.db
.transaction():
1400 # Delete all geofeeds which are no longer linked
1411 geofeeds.url = network_geofeeds.url
1430 # Fetch all Geofeeds that require an update
1431 geofeeds
= self
.db
.query("""
1440 updated_at <= CURRENT_TIMESTAMP - INTERVAL '1 week'
1445 with concurrent
.futures
.ThreadPoolExecutor(max_workers
=10) as executor
:
1446 results
= executor
.map(self
._fetch
_geofeed
, geofeeds
)
1448 # Fetch all results to raise any exceptions
1449 for result
in results
:
1452 # Delete data from any feeds that did not update in the last two weeks
1453 with self
.db
.transaction():
1458 geofeed_networks.geofeed_id IN (
1466 updated_at <= CURRENT_TIMESTAMP - INTERVAL '2 weeks'
1470 def _fetch_geofeed(self
, geofeed
):
1471 log
.debug("Fetching Geofeed %s" % geofeed
.url
)
1473 with self
.db
.transaction():
1476 req
= urllib
.request
.Request(geofeed
.url
, headers
={
1477 "User-Agent" : "location/%s" % location
.__version
__,
1479 # We expect some plain text file in CSV format
1480 "Accept" : "text/csv, text/plain",
1486 with urllib
.request
.urlopen(req
, timeout
=10) as f
:
1487 # Remove any previous data
1488 self
.db
.execute("DELETE FROM geofeed_networks \
1489 WHERE geofeed_id = %s", geofeed
.id)
1493 # Read the output line by line
1498 line
= line
.decode()
1500 # Ignore any lines we cannot decode
1501 except UnicodeDecodeError:
1502 log
.debug("Could not decode line %s in %s" \
1503 % (lineno
, geofeed
.url
))
1507 line
= line
.rstrip()
1513 # Try to parse the line
1515 fields
= line
.split(",", 5)
1517 log
.debug("Could not parse line: %s" % line
)
1520 # Check if we have enough fields
1522 log
.debug("Not enough fields in line: %s" % line
)
1526 network
, country
, region
, city
, = fields
[:4]
1528 # Try to parse the network
1530 network
= ipaddress
.ip_network(network
, strict
=False)
1532 log
.debug("Could not parse network: %s" % network
)
1535 # Strip any excess whitespace from country codes
1536 country
= country
.strip()
1538 # Make the country code uppercase
1539 country
= country
.upper()
1541 # Check the country code
1543 log
.debug("Empty country code in Geofeed %s line %s" \
1544 % (geofeed
.url
, lineno
))
1547 elif not location
.country_code_is_valid(country
):
1548 log
.debug("Invalid country code in Geofeed %s:%s: %s" \
1549 % (geofeed
.url
, lineno
, country
))
1552 # Write this into the database
1562 VALUES (%s, %s, %s, %s, %s)""",
1570 # Catch any HTTP errors
1571 except urllib
.request
.HTTPError
as e
:
1572 self
.db
.execute("UPDATE geofeeds SET status = %s, error = %s \
1573 WHERE id = %s", e
.code
, "%s" % e
, geofeed
.id)
1575 # Remove any previous data when the feed has been deleted
1577 self
.db
.execute("DELETE FROM geofeed_networks \
1578 WHERE geofeed_id = %s", geofeed
.id)
1580 # Catch any other errors and connection timeouts
1581 except (http
.client
.InvalidURL
, urllib
.request
.URLError
, TimeoutError
) as e
:
1582 log
.debug("Could not fetch URL %s: %s" % (geofeed
.url
, e
))
1584 self
.db
.execute("UPDATE geofeeds SET status = %s, error = %s \
1585 WHERE id = %s", 599, "%s" % e
, geofeed
.id)
1587 # Mark the geofeed as updated
1593 updated_at = CURRENT_TIMESTAMP,
1601 def handle_update_overrides(self
, ns
):
1602 with self
.db
.transaction():
1603 # Only drop manually created overrides, as we can be reasonably sure to have them,
1604 # and preserve the rest. If appropriate, it is deleted by correspondent functions.
1606 DELETE FROM autnum_overrides WHERE source = 'manual';
1607 DELETE FROM network_overrides WHERE source = 'manual';
1610 # Update overrides for various cloud providers big enough to publish their own IP
1611 # network allocation lists in a machine-readable format...
1612 self
._update
_overrides
_for
_aws
()
1614 # Update overrides for Spamhaus DROP feeds...
1615 self
._update
_feed
_for
_spamhaus
_drop
()
1617 for file in ns
.files
:
1618 log
.info("Reading %s..." % file)
1620 with
open(file, "rb") as f
:
1621 for type, block
in location
.importer
.read_blocks(f
):
1623 network
= block
.get("net")
1624 # Try to parse and normalise the network
1626 network
= ipaddress
.ip_network(network
, strict
=False)
1627 except ValueError as e
:
1628 log
.warning("Invalid IP network: %s: %s" % (network
, e
))
1631 # Prevent that we overwrite all networks
1632 if network
.prefixlen
== 0:
1633 log
.warning("Skipping %s: You cannot overwrite default" % network
)
1637 INSERT INTO network_overrides(
1642 is_satellite_provider,
1645 ) VALUES (%s, %s, %s, %s, %s, %s, %s)
1646 ON CONFLICT (network) DO NOTHING""",
1648 block
.get("country"),
1650 self
._parse
_bool
(block
, "is-anonymous-proxy"),
1651 self
._parse
_bool
(block
, "is-satellite-provider"),
1652 self
._parse
_bool
(block
, "is-anycast"),
1653 self
._parse
_bool
(block
, "drop"),
1656 elif type == "aut-num":
1657 autnum
= block
.get("aut-num")
1659 # Check if AS number begins with "AS"
1660 if not autnum
.startswith("AS"):
1661 log
.warning("Invalid AS number: %s" % autnum
)
1668 INSERT INTO autnum_overrides(
1674 is_satellite_provider,
1677 ) VALUES(%s, %s, %s, %s, %s, %s, %s, %s)
1678 ON CONFLICT DO NOTHING""",
1681 block
.get("country"),
1683 self
._parse
_bool
(block
, "is-anonymous-proxy"),
1684 self
._parse
_bool
(block
, "is-satellite-provider"),
1685 self
._parse
_bool
(block
, "is-anycast"),
1686 self
._parse
_bool
(block
, "drop"),
1690 log
.warning("Unsupported type: %s" % type)
1692 def _update_overrides_for_aws(self
):
1693 # Download Amazon AWS IP allocation file to create overrides...
1694 downloader
= location
.importer
.Downloader()
1698 f
= downloader
.retrieve("https://ip-ranges.amazonaws.com/ip-ranges.json")
1700 # Parse downloaded file
1701 aws_ip_dump
= json
.load(f
)
1702 except Exception as e
:
1703 log
.error("unable to preprocess Amazon AWS IP ranges: %s" % e
)
1706 # At this point, we can assume the downloaded file to be valid
1708 DELETE FROM network_feeds WHERE source = 'Amazon AWS IP feed'
1711 # XXX: Set up a dictionary for mapping a region name to a country. Unfortunately,
1712 # there seems to be no machine-readable version available of this other than
1713 # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html
1714 # (worse, it seems to be incomplete :-/ ); https://www.cloudping.cloud/endpoints
1715 # was helpful here as well.
1716 aws_region_country_map
= {
1721 "ap-northeast-3": "JP",
1722 "ap-northeast-2": "KR",
1723 "ap-southeast-1": "SG",
1724 "ap-southeast-2": "AU",
1725 "ap-southeast-3": "MY",
1726 "ap-southeast-4": "AU",
1727 "ap-southeast-5": "NZ", # Auckland, NZ
1728 "ap-southeast-6": "AP", # XXX: Precise location not documented anywhere
1729 "ap-northeast-1": "JP",
1730 "ca-central-1": "CA",
1732 "eu-central-1": "DE",
1733 "eu-central-2": "CH",
1740 "eusc-de-east-1" : "DE", # XXX: Undocumented, likely located in Berlin rather than Frankfurt
1741 "il-central-1": "IL", # XXX: This one is not documented anywhere except for ip-ranges.json itself
1742 "me-central-1": "AE",
1747 # Fetch all valid country codes to check parsed networks aganist...
1748 rows
= self
.db
.query("SELECT * FROM countries ORDER BY country_code")
1752 validcountries
.append(row
.country_code
)
1754 with self
.db
.transaction():
1755 for snetwork
in aws_ip_dump
["prefixes"] + aws_ip_dump
["ipv6_prefixes"]:
1757 network
= ipaddress
.ip_network(snetwork
.get("ip_prefix") or snetwork
.get("ipv6_prefix"), strict
=False)
1759 log
.warning("Unable to parse line: %s" % snetwork
)
1762 # Sanitize parsed networks...
1763 if not self
._check
_parsed
_network
(network
):
1766 # Determine region of this network...
1767 region
= snetwork
["region"]
1771 # Any region name starting with "us-" will get "US" country code assigned straight away...
1772 if region
.startswith("us-"):
1774 elif region
.startswith("cn-"):
1775 # ... same goes for China ...
1777 elif region
== "GLOBAL":
1778 # ... funny region name for anycast-like networks ...
1780 elif region
in aws_region_country_map
:
1781 # ... assign looked up country code otherwise ...
1782 cc
= aws_region_country_map
[region
]
1784 # ... and bail out if we are missing something here
1785 log
.warning("Unable to determine country code for line: %s" % snetwork
)
1788 # Skip networks with unknown country codes
1789 if not is_anycast
and validcountries
and cc
not in validcountries
:
1790 log
.warning("Skipping Amazon AWS network with bogus country '%s': %s" % \
1794 # Conduct SQL statement...
1808 ON CONFLICT (network, source) DO NOTHING
1809 """, "%s" % network
, "Amazon AWS IP feed", cc
, is_anycast
,
1812 def _update_feed_for_spamhaus_drop(self
):
1813 downloader
= location
.importer
.Downloader()
1816 ("SPAMHAUS-DROP", "https://www.spamhaus.org/drop/drop.txt"),
1817 ("SPAMHAUS-EDROP", "https://www.spamhaus.org/drop/edrop.txt"),
1818 ("SPAMHAUS-DROPV6", "https://www.spamhaus.org/drop/dropv6.txt")
1822 ("SPAMHAUS-ASNDROP", "https://www.spamhaus.org/drop/asndrop.json")
1825 for name
, url
in ip_lists
:
1826 # Fetch IP list from given URL
1827 f
= downloader
.retrieve(url
)
1830 fcontent
= f
.readlines()
1832 with self
.db
.transaction():
1833 # Conduct a very basic sanity check to rule out CDN issues causing bogus DROP
1835 if len(fcontent
) > 10:
1836 self
.db
.execute("DELETE FROM network_feeds WHERE source = %s", name
)
1838 log
.warning("%s (%s) returned likely bogus file, ignored" % (name
, url
))
1841 # Iterate through every line, filter comments and add remaining networks to
1842 # the override table in case they are valid...
1843 for sline
in fcontent
:
1844 # The response is assumed to be encoded in UTF-8...
1845 sline
= sline
.decode("utf-8")
1847 # Comments start with a semicolon...
1848 if sline
.startswith(";"):
1851 # Extract network and ignore anything afterwards...
1853 network
= ipaddress
.ip_network(sline
.split()[0], strict
=False)
1855 log
.error("Unable to parse line: %s" % sline
)
1858 # Sanitize parsed networks...
1859 if not self
._check
_parsed
_network
(network
):
1860 log
.warning("Skipping bogus network found in %s (%s): %s" % \
1861 (name
, url
, network
))
1864 # Conduct SQL statement...
1876 )""", "%s" % network
, name
, True,
1879 for name
, url
in asn_lists
:
1881 f
= downloader
.retrieve(url
)
1884 fcontent
= f
.readlines()
1886 with self
.db
.transaction():
1887 # Conduct a very basic sanity check to rule out CDN issues causing bogus DROP
1889 if len(fcontent
) > 10:
1890 self
.db
.execute("DELETE FROM autnum_feeds WHERE source = %s", name
)
1892 log
.warning("%s (%s) returned likely bogus file, ignored" % (name
, url
))
1895 # Iterate through every line, filter comments and add remaining ASNs to
1896 # the override table in case they are valid...
1897 for sline
in fcontent
:
1898 # The response is assumed to be encoded in UTF-8...
1899 sline
= sline
.decode("utf-8")
1901 # Load every line as a JSON object and try to obtain an ASN from it...
1903 lineobj
= json
.loads(sline
)
1904 except json
.decoder
.JSONDecodeError
:
1905 log
.error("Unable to parse line as a JSON object: %s" % sline
)
1908 # Skip line contiaining file metadata
1910 type = lineobj
["type"]
1912 if type == "metadata":
1918 asn
= lineobj
["asn"]
1919 as_name
= lineobj
["asname"]
1921 log
.warning("Unable to extract necessary information from line: %s" % sline
)
1924 # Filter invalid ASNs...
1925 if not self
._check
_parsed
_asn
(asn
):
1926 log
.warning("Skipping bogus ASN found in %s (%s): %s" % \
1930 # Conduct SQL statement...
1942 )""", "%s" % asn
, name
, True,
1946 def _parse_bool(block
, key
):
1947 val
= block
.get(key
)
1949 # There is no point to proceed when we got None
1953 # Convert to lowercase
1957 if val
in ("yes", "1"):
1961 if val
in ("no", "0"):
1968 def countries(self
):
1969 # Fetch all valid country codes to check parsed networks aganist
1970 rows
= self
.db
.query("SELECT * FROM countries ORDER BY country_code")
1972 # Return all countries
1973 return [row
.country_code
for row
in rows
]
1975 def handle_import_countries(self
, ns
):
1976 with self
.db
.transaction():
1977 # Drop all data that we have
1978 self
.db
.execute("TRUNCATE TABLE countries")
1980 for file in ns
.file:
1982 line
= line
.rstrip()
1984 # Ignore any comments
1985 if line
.startswith("#"):
1989 country_code
, continent_code
, name
= line
.split(maxsplit
=2)
1991 log
.warning("Could not parse line: %s" % line
)
1994 self
.db
.execute("INSERT INTO countries(country_code, name, continent_code) \
1995 VALUES(%s, %s, %s) ON CONFLICT DO NOTHING", country_code
, name
, continent_code
)
1998 def split_line(line
):
1999 key
, colon
, val
= line
.partition(":")
2001 # Strip any excess space
2008 # Run the command line interface