2 ###############################################################################
4 # libloc - A library to determine the location of someone on the Internet #
6 # Copyright (C) 2020-2024 IPFire Development Team <info@ipfire.org> #
8 # This library is free software; you can redistribute it and/or #
9 # modify it under the terms of the GNU Lesser General Public #
10 # License as published by the Free Software Foundation; either #
11 # version 2.1 of the License, or (at your option) any later version. #
13 # This library is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16 # Lesser General Public License for more details. #
18 ###############################################################################
33 # Load our location module
35 import location
.database
36 from location
.downloader
import Downloader
37 from location
.i18n
import _
40 log
= logging
.getLogger("location.importer")
50 TRANSLATED_COUNTRIES
= {
51 # When people say UK, they mean GB
55 IGNORED_COUNTRIES
= set((
59 # Some people use ZZ to say "no country" or to hide the country
63 # Configure the CSV parser for ARIN
64 csv
.register_dialect("arin", delimiter
=",", quoting
=csv
.QUOTE_ALL
, quotechar
="\"")
68 parser
= argparse
.ArgumentParser(
69 description
=_("Location Importer Command Line Interface"),
71 subparsers
= parser
.add_subparsers()
73 # Global configuration flags
74 parser
.add_argument("--debug", action
="store_true",
75 help=_("Enable debug output"))
76 parser
.add_argument("--quiet", action
="store_true",
77 help=_("Enable quiet mode"))
80 parser
.add_argument("--version", action
="version",
81 version
="%(prog)s @VERSION@")
84 parser
.add_argument("--database-host", required
=True,
85 help=_("Database Hostname"), metavar
=_("HOST"))
86 parser
.add_argument("--database-name", required
=True,
87 help=_("Database Name"), metavar
=_("NAME"))
88 parser
.add_argument("--database-username", required
=True,
89 help=_("Database Username"), metavar
=_("USERNAME"))
90 parser
.add_argument("--database-password", required
=True,
91 help=_("Database Password"), metavar
=_("PASSWORD"))
94 write
= subparsers
.add_parser("write", help=_("Write database to file"))
95 write
.set_defaults(func
=self
.handle_write
)
96 write
.add_argument("file", nargs
=1, help=_("Database File"))
97 write
.add_argument("--signing-key", nargs
="?", type=open, help=_("Signing Key"))
98 write
.add_argument("--backup-signing-key", nargs
="?", type=open, help=_("Backup Signing Key"))
99 write
.add_argument("--vendor", nargs
="?", help=_("Sets the vendor"))
100 write
.add_argument("--description", nargs
="?", help=_("Sets a description"))
101 write
.add_argument("--license", nargs
="?", help=_("Sets the license"))
102 write
.add_argument("--version", type=int, help=_("Database Format Version"))
105 update_whois
= subparsers
.add_parser("update-whois", help=_("Update WHOIS Information"))
106 update_whois
.add_argument("sources", nargs
="*",
107 help=_("Only update these sources"))
108 update_whois
.set_defaults(func
=self
.handle_update_whois
)
110 # Update announcements
111 update_announcements
= subparsers
.add_parser("update-announcements",
112 help=_("Update BGP Annoucements"))
113 update_announcements
.set_defaults(func
=self
.handle_update_announcements
)
114 update_announcements
.add_argument("server", nargs
=1,
115 help=_("Route Server to connect to"), metavar
=_("SERVER"))
118 update_geofeeds
= subparsers
.add_parser("update-geofeeds",
119 help=_("Update Geofeeds"))
120 update_geofeeds
.set_defaults(func
=self
.handle_update_geofeeds
)
123 update_feeds
= subparsers
.add_parser("update-feeds",
124 help=_("Update Feeds"))
125 update_feeds
.add_argument("feeds", nargs
="*",
126 help=_("Only update these feeds"))
127 update_feeds
.set_defaults(func
=self
.handle_update_feeds
)
130 update_overrides
= subparsers
.add_parser("update-overrides",
131 help=_("Update overrides"),
133 update_overrides
.add_argument(
134 "files", nargs
="+", help=_("Files to import"),
136 update_overrides
.set_defaults(func
=self
.handle_update_overrides
)
139 import_countries
= subparsers
.add_parser("import-countries",
140 help=_("Import countries"),
142 import_countries
.add_argument("file", nargs
=1, type=argparse
.FileType("r"),
143 help=_("File to import"))
144 import_countries
.set_defaults(func
=self
.handle_import_countries
)
146 args
= parser
.parse_args()
150 location
.logger
.set_level(logging
.DEBUG
)
152 location
.logger
.set_level(logging
.WARNING
)
154 # Print usage if no action was given
155 if not "func" in args
:
162 # Parse command line arguments
163 args
= self
.parse_cli()
165 # Initialize the downloader
166 self
.downloader
= Downloader()
168 # Initialise database
169 self
.db
= self
._setup
_database
(args
)
172 ret
= args
.func(args
)
174 # Return with exit code
178 # Otherwise just exit
181 def _setup_database(self
, ns
):
183 Initialise the database
185 # Connect to database
186 db
= location
.database
.Connection(
187 host
=ns
.database_host
, database
=ns
.database_name
,
188 user
=ns
.database_username
, password
=ns
.database_password
,
191 with db
.transaction():
194 CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint,
195 first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
196 last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
197 CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
198 CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network));
199 CREATE INDEX IF NOT EXISTS announcements_search ON announcements USING GIST(network inet_ops);
202 CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
203 ALTER TABLE autnums ADD COLUMN IF NOT EXISTS source text;
204 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
207 CREATE TABLE IF NOT EXISTS countries(
208 country_code text NOT NULL, name text NOT NULL, continent_code text NOT NULL);
209 CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code);
212 CREATE TABLE IF NOT EXISTS networks(network inet, country text);
213 ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[];
214 ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text;
215 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
216 CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
217 CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
220 CREATE TABLE IF NOT EXISTS geofeeds(
221 id serial primary key,
223 status integer default null,
224 updated_at timestamp without time zone default null
226 ALTER TABLE geofeeds ADD COLUMN IF NOT EXISTS error text;
227 CREATE UNIQUE INDEX IF NOT EXISTS geofeeds_unique
229 CREATE TABLE IF NOT EXISTS geofeed_networks(
230 geofeed_id integer references geofeeds(id) on delete cascade,
236 CREATE INDEX IF NOT EXISTS geofeed_networks_geofeed_id
237 ON geofeed_networks(geofeed_id);
238 CREATE INDEX IF NOT EXISTS geofeed_networks_search
239 ON geofeed_networks USING GIST(network inet_ops);
240 CREATE TABLE IF NOT EXISTS network_geofeeds(network inet, url text);
241 ALTER TABLE network_geofeeds ADD COLUMN IF NOT EXISTS source text NOT NULL;
242 CREATE UNIQUE INDEX IF NOT EXISTS network_geofeeds_unique
243 ON network_geofeeds(network);
244 CREATE INDEX IF NOT EXISTS network_geofeeds_search
245 ON network_geofeeds USING GIST(network inet_ops);
246 CREATE INDEX IF NOT EXISTS network_geofeeds_url
247 ON network_geofeeds(url);
250 CREATE TABLE IF NOT EXISTS autnum_feeds(
251 number bigint NOT NULL,
252 source text NOT NULL,
255 is_anonymous_proxy boolean,
256 is_satellite_provider boolean,
260 CREATE UNIQUE INDEX IF NOT EXISTS autnum_feeds_unique
261 ON autnum_feeds(number, source);
263 CREATE TABLE IF NOT EXISTS network_feeds(
264 network inet NOT NULL,
265 source text NOT NULL,
267 is_anonymous_proxy boolean,
268 is_satellite_provider boolean,
272 CREATE UNIQUE INDEX IF NOT EXISTS network_feeds_unique
273 ON network_feeds(network, source);
274 CREATE INDEX IF NOT EXISTS network_feeds_search
275 ON network_feeds USING GIST(network inet_ops);
278 CREATE TABLE IF NOT EXISTS autnum_overrides(
279 number bigint NOT NULL,
282 is_anonymous_proxy boolean,
283 is_satellite_provider boolean,
286 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
287 ON autnum_overrides(number);
288 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
289 ALTER TABLE autnum_overrides DROP COLUMN IF EXISTS source;
291 CREATE TABLE IF NOT EXISTS network_overrides(
292 network inet NOT NULL,
294 is_anonymous_proxy boolean,
295 is_satellite_provider boolean,
298 CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
299 ON network_overrides(network);
300 CREATE INDEX IF NOT EXISTS network_overrides_search
301 ON network_overrides USING GIST(network inet_ops);
302 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
303 ALTER TABLE network_overrides DROP COLUMN IF EXISTS source;
305 CREATE TABLE IF NOT EXISTS geofeed_overrides(
308 CREATE UNIQUE INDEX IF NOT EXISTS geofeed_overrides_url
309 ON geofeed_overrides(url);
314 def fetch_countries(self
):
316 Returns a list of all countries on the list
318 # Fetch all valid country codes to check parsed networks aganist...
319 countries
= self
.db
.query("SELECT country_code FROM countries ORDER BY country_code")
321 return set((country
.country_code
for country
in countries
))
323 def handle_write(self
, ns
):
325 Compiles a database in libloc format out of what is in the database
328 writer
= location
.Writer(ns
.signing_key
, ns
.backup_signing_key
)
332 writer
.vendor
= ns
.vendor
335 writer
.description
= ns
.description
338 writer
.license
= ns
.license
340 # Add all Autonomous Systems
341 log
.info("Writing Autonomous Systems...")
343 # Select all ASes with a name
344 rows
= self
.db
.query("""
346 autnums.number AS number,
354 autnum_overrides overrides ON autnums.number = overrides.number
360 # Skip AS without names
364 a
= writer
.add_as(row
.number
)
368 log
.info("Writing networks...")
370 # Select all known networks
371 rows
= self
.db
.query("""
372 WITH known_networks AS (
373 SELECT network FROM announcements
375 SELECT network FROM networks
377 SELECT network FROM network_feeds
379 SELECT network FROM network_overrides
381 SELECT network FROM geofeed_networks
384 ordered_networks AS (
386 known_networks.network AS network,
387 announcements.autnum AS autnum,
388 networks.country AS country,
390 -- Must be part of returned values for ORDER BY clause
391 masklen(announcements.network) AS sort_a,
392 masklen(networks.network) AS sort_b
396 announcements ON known_networks.network <<= announcements.network
398 networks ON known_networks.network <<= networks.network
400 known_networks.network,
405 -- Return a list of those networks enriched with all
406 -- other information that we store in the database
408 DISTINCT ON (network)
418 network_overrides overrides
420 networks.network <<= overrides.network
422 masklen(overrides.network) DESC
429 autnum_overrides overrides
431 networks.autnum = overrides.number
439 networks.network <<= feeds.network
441 masklen(feeds.network) DESC
450 networks.autnum = feeds.number
457 geofeed_networks.country AS country
461 -- Join the data from the geofeeds
463 geofeeds ON network_geofeeds.url = geofeeds.url
465 geofeed_networks ON geofeeds.id = geofeed_networks.geofeed_id
467 -- Check whether we have a geofeed for this network
469 networks.network <<= network_geofeeds.network
471 networks.network <<= geofeed_networks.network
473 -- Filter for the best result
475 masklen(geofeed_networks.network) DESC
487 network_overrides overrides
489 networks.network <<= overrides.network
491 masklen(overrides.network) DESC
500 networks.network <<= feeds.network
502 masklen(feeds.network) DESC
511 networks.autnum = feeds.number
520 autnum_overrides overrides
522 networks.autnum = overrides.number
525 ) AS is_anonymous_proxy,
529 is_satellite_provider
531 network_overrides overrides
533 networks.network <<= overrides.network
535 masklen(overrides.network) DESC
540 is_satellite_provider
544 networks.network <<= feeds.network
546 masklen(feeds.network) DESC
551 is_satellite_provider
555 networks.autnum = feeds.number
562 is_satellite_provider
564 autnum_overrides overrides
566 networks.autnum = overrides.number
569 ) AS is_satellite_provider,
575 network_overrides overrides
577 networks.network <<= overrides.network
579 masklen(overrides.network) DESC
588 networks.network <<= feeds.network
590 masklen(feeds.network) DESC
599 networks.autnum = feeds.number
608 autnum_overrides overrides
610 networks.autnum = overrides.number
619 network_overrides overrides
621 networks.network <<= overrides.network
623 masklen(overrides.network) DESC
632 networks.network <<= feeds.network
634 masklen(feeds.network) DESC
643 networks.autnum = feeds.number
652 autnum_overrides overrides
654 networks.autnum = overrides.number
659 ordered_networks networks
663 network
= writer
.add_network(row
.network
)
667 network
.country_code
= row
.country
671 network
.asn
= row
.autnum
674 if row
.is_anonymous_proxy
:
675 network
.set_flag(location
.NETWORK_FLAG_ANONYMOUS_PROXY
)
677 if row
.is_satellite_provider
:
678 network
.set_flag(location
.NETWORK_FLAG_SATELLITE_PROVIDER
)
681 network
.set_flag(location
.NETWORK_FLAG_ANYCAST
)
684 network
.set_flag(location
.NETWORK_FLAG_DROP
)
687 log
.info("Writing countries...")
688 rows
= self
.db
.query("SELECT * FROM countries ORDER BY country_code")
691 c
= writer
.add_country(row
.country_code
)
692 c
.continent_code
= row
.continent_code
695 # Write everything to file
696 log
.info("Writing database to file...")
700 def handle_update_whois(self
, ns
):
701 # Did we run successfully?
705 # African Network Information Centre
707 (self
._import
_standard
_format
, "https://ftp.afrinic.net/pub/pub/dbase/afrinic.db.gz"),
710 # Asia Pacific Network Information Centre
712 (self
._import
_standard
_format
, "https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz"),
713 (self
._import
_standard
_format
, "https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz"),
714 (self
._import
_standard
_format
, "https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz"),
715 (self
._import
_standard
_format
, "https://ftp.apnic.net/apnic/whois/apnic.db.organisation.gz"),
718 # American Registry for Internet Numbers
720 (self
._import
_extended
_format
, "https://ftp.arin.net/pub/stats/arin/delegated-arin-extended-latest"),
721 (self
._import
_arin
_as
_names
, "https://ftp.arin.net/pub/resource_registry_service/asns.csv"),
724 # Japan Network Information Center
726 (self
._import
_standard
_format
, "https://ftp.nic.ad.jp/jpirr/jpirr.db.gz"),
729 # Latin America and Caribbean Network Information Centre
731 (self
._import
_standard
_format
, "https://ftp.lacnic.net/lacnic/dbase/lacnic.db.gz"),
732 (self
._import
_extended
_format
, "https://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-extended-latest"),
735 # Réseaux IP Européens
737 (self
._import
_standard
_format
, "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz"),
738 (self
._import
_standard
_format
, "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz"),
739 (self
._import
_standard
_format
, "https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz"),
740 (self
._import
_standard
_format
, "https://ftp.ripe.net/ripe/dbase/split/ripe.db.organisation.gz"),
744 # Fetch all valid country codes to check parsed networks against
745 countries
= self
.fetch_countries()
747 # Check if we have countries
749 log
.error("Please import countries before importing any WHOIS data")
752 # Iterate over all potential sources
753 for name
, feeds
in sources
:
754 # Skip anything that should not be updated
755 if ns
.sources
and not name
in ns
.sources
:
759 self
._process
_source
(name
, feeds
, countries
)
761 # Log an error but continue if an exception occurs
762 except Exception as e
:
763 log
.error("Error processing source %s" % name
, exc_info
=True)
766 # Return a non-zero exit code for errors
767 return 0 if success
else 1
769 def _process_source(self
, source
, feeds
, countries
):
771 This function processes one source
773 # Wrap everything into one large transaction
774 with self
.db
.transaction():
775 # Remove all previously imported content
776 self
.db
.execute("DELETE FROM autnums WHERE source = %s", source
)
777 self
.db
.execute("DELETE FROM networks WHERE source = %s", source
)
778 self
.db
.execute("DELETE FROM network_geofeeds WHERE source = %s", source
)
780 # Create some temporary tables to store parsed data
782 CREATE TEMPORARY TABLE _autnums(number integer NOT NULL,
783 organization text NOT NULL, source text NOT NULL) ON COMMIT DROP;
784 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
786 CREATE TEMPORARY TABLE _organizations(handle text NOT NULL,
787 name text NOT NULL, source text NOT NULL) ON COMMIT DROP;
788 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
790 CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text,
791 original_countries text[] NOT NULL, source text NOT NULL)
793 CREATE INDEX _rirdata_search ON _rirdata
794 USING BTREE(family(network), masklen(network));
795 CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
799 for callback
, url
, *args
in feeds
:
801 f
= self
.downloader
.retrieve(url
)
804 callback(source
, countries
, f
, *args
)
806 # Process all parsed networks from every RIR we happen to have access to,
807 # insert the largest network chunks into the networks table immediately...
808 families
= self
.db
.query("""
810 family(network) AS family
818 for family
in (row
.family
for row
in families
):
819 # Fetch the smallest mask length in our data set
820 smallest
= self
.db
.get("""
850 masklen(network) = %s
859 # ... determine any other prefixes for this network family, ...
860 prefixes
= self
.db
.query("""
862 DISTINCT masklen(network) AS prefix
873 # ... and insert networks with this prefix in case they provide additional
874 # information (i. e. subnet of a larger chunk with a different country)
875 for prefix
in (row
.prefix
for row
in prefixes
):
881 _rirdata.original_countries,
886 family(_rirdata.network) = %s
888 masklen(_rirdata.network) = %s
892 DISTINCT ON (c.network)
895 c.original_countries,
897 masklen(networks.network),
898 networks.country AS parent_country
904 c.network << networks.network
907 masklen(networks.network) DESC NULLS LAST
910 networks(network, country, original_countries, source)
919 parent_country IS NULL
921 country <> parent_country
922 ON CONFLICT DO NOTHING
937 _organizations.source
941 _organizations ON _autnums.organization = _organizations.handle
947 SET name = excluded.name
951 def _import_standard_format(self
, source
, countries
, f
, *args
):
953 Imports a single standard format source feed
955 # Iterate over all blocks
956 for block
in iterate_over_blocks(f
):
957 self
._parse
_block
(block
, source
, countries
)
959 def _import_extended_format(self
, source
, countries
, f
, *args
):
960 # Iterate over all lines
961 for line
in iterate_over_lines(f
):
962 self
._parse
_line
(block
, source
, countries
)
964 def _import_arin_as_names(self
, source
, countries
, f
, *args
):
965 # Walk through the file
966 for line
in csv
.DictReader(feed
, dialect
="arin"):
967 log
.debug("Processing object: %s" % line
)
970 status
= line
.get("Status")
972 # We are only interested in anything managed by ARIN
973 if not status
== "Full Registry Services":
976 # Fetch organization name
977 name
= line
.get("Org Name")
980 first_asn
= line
.get("Start AS Number")
981 last_asn
= line
.get("End AS Number")
985 first_asn
= int(first_asn
)
986 except TypeError as e
:
987 log
.warning("Could not parse ASN '%s'" % first_asn
)
991 last_asn
= int(last_asn
)
992 except TypeError as e
:
993 log
.warning("Could not parse ASN '%s'" % last_asn
)
996 # Check if the range is valid
997 if last_asn
< first_asn
:
998 log
.warning("Invalid ASN range %s-%s" % (first_asn
, last_asn
))
1000 # Insert everything into the database
1001 for asn
in range(first_asn
, last_asn
+ 1):
1002 if not self
._check
_parsed
_asn
(asn
):
1003 log
.warning("Skipping invalid ASN %s" % asn
)
1023 """, asn
, name
, "ARIN",
1026 def _check_parsed_network(self
, network
):
1028 Assistive function to detect and subsequently sort out parsed
1029 networks from RIR data (both Whois and so-called "extended sources"),
1030 which are or have...
1032 (a) not globally routable (RFC 1918 space, et al.)
1033 (b) covering a too large chunk of the IP address space (prefix length
1034 is < 7 for IPv4 networks, and < 10 for IPv6)
1035 (c) "0.0.0.0" or "::" as a network address
1037 This unfortunately is necessary due to brain-dead clutter across
1038 various RIR databases, causing mismatches and eventually disruptions.
1040 We will return False in case a network is not suitable for adding
1041 it to our database, and True otherwise.
1044 if isinstance(network
, ipaddress
.IPv6Network
):
1046 elif isinstance(network
, ipaddress
.IPv4Network
):
1049 raise ValueError("Invalid network: %s (type %s)" % (network
, type(network
)))
1051 # Ignore anything that isn't globally routable
1052 if not network
.is_global
:
1053 log
.debug("Skipping non-globally routable network: %s" % network
)
1056 # Ignore anything that is unspecified IP range (See RFC 5735 for IPv4 or RFC 2373 for IPv6)
1057 elif network
.is_unspecified
:
1058 log
.debug("Skipping unspecified network: %s" % network
)
1062 if network
.version
== 6:
1063 if network
.prefixlen
< 10:
1064 log
.debug("Skipping too big IP chunk: %s" % network
)
1068 elif network
.version
== 4:
1069 if network
.prefixlen
< 7:
1070 log
.debug("Skipping too big IP chunk: %s" % network
)
1073 # In case we have made it here, the network is considered to
1074 # be suitable for libloc consumption...
1077 def _check_parsed_asn(self
, asn
):
1079 Assistive function to filter Autonomous System Numbers not being suitable
1080 for adding to our database. Returns False in such cases, and True otherwise.
1083 for start
, end
in VALID_ASN_RANGES
:
1084 if start
<= asn
and end
>= asn
:
1087 log
.info("Supplied ASN %s out of publicly routable ASN ranges" % asn
)
1090 def _parse_block(self
, block
, source_key
, countries
):
1091 # Get first line to find out what type of block this is
1095 if line
.startswith("aut-num:"):
1096 return self
._parse
_autnum
_block
(block
, source_key
)
1099 if line
.startswith("inet6num:") or line
.startswith("inetnum:"):
1100 return self
._parse
_inetnum
_block
(block
, source_key
, countries
)
1103 elif line
.startswith("organisation:"):
1104 return self
._parse
_org
_block
(block
, source_key
)
1106 def _parse_autnum_block(self
, block
, source_key
):
1110 key
, val
= split_line(line
)
1112 if key
== "aut-num":
1113 m
= re
.match(r
"^(AS|as)(\d+)", val
)
1115 autnum
["asn"] = m
.group(2)
1118 autnum
[key
] = val
.upper()
1120 elif key
== "descr":
1121 # Save the first description line as well...
1122 if not key
in autnum
:
1125 # Skip empty objects
1126 if not autnum
or not "asn" in autnum
:
1129 # Insert a dummy organisation handle into our temporary organisations
1130 # table in case the AS does not have an organisation handle set, but
1131 # has a description (a quirk often observed in APNIC area), so we can
1132 # later display at least some string for this AS.
1133 if not "org" in autnum
:
1134 if "descr" in autnum
:
1135 autnum
["org"] = "LIBLOC-%s-ORGHANDLE" % autnum
.get("asn")
1137 self
.db
.execute("INSERT INTO _organizations(handle, name, source) \
1138 VALUES(%s, %s, %s) ON CONFLICT (handle) DO NOTHING",
1139 autnum
.get("org"), autnum
.get("descr"), source_key
,
1142 log
.warning("ASN %s neither has an organisation handle nor a description line set, omitting" % \
1146 # Insert into database
1147 self
.db
.execute("INSERT INTO _autnums(number, organization, source) \
1148 VALUES(%s, %s, %s) ON CONFLICT (number) DO UPDATE SET \
1149 organization = excluded.organization",
1150 autnum
.get("asn"), autnum
.get("org"), source_key
,
1153 def _parse_inetnum_block(self
, block
, source_key
, countries
):
1157 key
, val
= split_line(line
)
1159 # Filter any inetnum records which are only referring to IP space
1160 # not managed by that specific RIR...
1161 if key
== "netname":
1162 if re
.match(r
"^(ERX-NETBLOCK|(AFRINIC|ARIN|LACNIC|RIPE)-CIDR-BLOCK|IANA-NETBLOCK-\d{1,3}|NON-RIPE-NCC-MANAGED-ADDRESS-BLOCK|STUB-[\d-]{3,}SLASH\d{1,2})", val
.strip()):
1163 log
.debug("Skipping record indicating historic/orphaned data: %s" % val
.strip())
1166 if key
== "inetnum":
1167 start_address
, delim
, end_address
= val
.partition("-")
1169 # Strip any excess space
1170 start_address
, end_address
= start_address
.rstrip(), end_address
.strip()
1172 # Handle "inetnum" formatting in LACNIC DB (e.g. "24.152.8/22" instead of "24.152.8.0/22")
1173 if start_address
and not (delim
or end_address
):
1175 start_address
= ipaddress
.ip_network(start_address
, strict
=False)
1177 start_address
= start_address
.split("/")
1178 ldigits
= start_address
[0].count(".")
1180 # How many octets do we need to add?
1181 # (LACNIC does not seem to have a /8 or greater assigned, so the following should suffice.)
1183 start_address
= start_address
[0] + ".0.0/" + start_address
[1]
1185 start_address
= start_address
[0] + ".0/" + start_address
[1]
1187 log
.warning("Could not recover IPv4 address from line in LACNIC DB format: %s" % line
)
1191 start_address
= ipaddress
.ip_network(start_address
, strict
=False)
1193 log
.warning("Could not parse line in LACNIC DB format: %s" % line
)
1196 # Enumerate first and last IP address of this network
1197 end_address
= start_address
[-1]
1198 start_address
= start_address
[0]
1201 # Convert to IP address
1203 start_address
= ipaddress
.ip_address(start_address
)
1204 end_address
= ipaddress
.ip_address(end_address
)
1206 log
.warning("Could not parse line: %s" % line
)
1209 inetnum
["inetnum"] = list(ipaddress
.summarize_address_range(start_address
, end_address
))
1211 elif key
== "inet6num":
1212 inetnum
[key
] = [ipaddress
.ip_network(val
, strict
=False)]
1214 elif key
== "country":
1217 # Ignore certain country codes
1218 if cc
in IGNORED_COUNTRIES
:
1219 log
.debug("Ignoring country code '%s'" % cc
)
1222 # Translate country codes
1224 cc
= TRANSLATED_COUNTRIES
[cc
]
1228 # Do we know this country?
1229 if not cc
in countries
:
1230 log
.warning("Skipping invalid country code '%s'" % cc
)
1234 inetnum
[key
].append(cc
)
1238 # Parse the geofeed attribute
1239 elif key
== "geofeed":
1240 inetnum
["geofeed"] = val
1242 # Parse geofeed when used as a remark
1243 elif key
== "remarks":
1244 m
= re
.match(r
"^(?:Geofeed)\s+(https://.*)", val
)
1246 inetnum
["geofeed"] = m
.group(1)
1248 # Skip empty objects
1252 # Iterate through all networks enumerated from above, check them for plausibility and insert
1253 # them into the database, if _check_parsed_network() succeeded
1254 for single_network
in inetnum
.get("inet6num") or inetnum
.get("inetnum"):
1255 if not self
._check
_parsed
_network
(single_network
):
1258 # Fetch the countries or use a list with an empty country
1259 countries
= inetnum
.get("country", [None])
1261 # Insert the network into the database but only use the first country code
1262 for cc
in countries
:
1276 ON CONFLICT (network)
1277 DO UPDATE SET country = excluded.country
1278 """, "%s" % single_network
, cc
, [cc
for cc
in countries
if cc
], source_key
,
1281 # If there are more than one country, we will only use the first one
1284 # Update any geofeed information
1285 geofeed
= inetnum
.get("geofeed", None)
1287 self
._parse
_geofeed
(source_key
, geofeed
, single_network
)
1289 def _parse_geofeed(self
, source
, url
, single_network
):
1291 url
= urllib
.parse
.urlparse(url
)
1293 # Make sure that this is a HTTPS URL
1294 if not url
.scheme
== "https":
1295 log
.debug("Geofeed URL is not using HTTPS: %s" % geofeed
)
1298 # Put the URL back together normalized
1301 # Store/update any geofeeds
1314 ON CONFLICT (network) DO
1315 UPDATE SET url = excluded.url""",
1316 "%s" % single_network
, url
, source
,
1319 def _parse_org_block(self
, block
, source_key
):
1323 key
, val
= split_line(line
)
1325 if key
== "organisation":
1326 org
[key
] = val
.upper()
1327 elif key
== "org-name":
1330 # Skip empty objects
1334 self
.db
.execute("INSERT INTO _organizations(handle, name, source) \
1335 VALUES(%s, %s, %s) ON CONFLICT (handle) DO \
1336 UPDATE SET name = excluded.name",
1337 org
.get("organisation"), org
.get("org-name"), source_key
,
1340 def _parse_line(self
, line
, source_key
, validcountries
=None):
1342 if line
.startswith("2"):
1346 if line
.startswith("#"):
1350 registry
, country_code
, type, line
= line
.split("|", 3)
1352 log
.warning("Could not parse line: %s" % line
)
1355 # Skip any unknown protocols
1356 if not type in ("ipv6", "ipv4"):
1357 log
.warning("Unknown IP protocol '%s'" % type)
1360 # Skip any lines that are for stats only or do not have a country
1361 # code at all (avoids log spam below)
1362 if not country_code
or country_code
== '*':
1365 # Skip objects with unknown country codes
1366 if validcountries
and country_code
not in validcountries
:
1367 log
.warning("Skipping line with bogus country '%s': %s" % \
1368 (country_code
, line
))
1372 address
, prefix
, date
, status
, organization
= line
.split("|")
1376 # Try parsing the line without organization
1378 address
, prefix
, date
, status
= line
.split("|")
1380 log
.warning("Unhandled line format: %s" % line
)
1383 # Skip anything that isn't properly assigned
1384 if not status
in ("assigned", "allocated"):
1387 # Cast prefix into an integer
1389 prefix
= int(prefix
)
1391 log
.warning("Invalid prefix: %s" % prefix
)
1394 # Fix prefix length for IPv4
1396 prefix
= 32 - int(math
.log(prefix
, 2))
1398 # Try to parse the address
1400 network
= ipaddress
.ip_network("%s/%s" % (address
, prefix
), strict
=False)
1402 log
.warning("Invalid IP address: %s" % address
)
1405 if not self
._check
_parsed
_network
(network
):
1421 ON CONFLICT (network)
1422 DO UPDATE SET country = excluded.country
1423 """, "%s" % network
, country_code
, [country
], source_key
,
1426 def handle_update_announcements(self
, ns
):
1427 server
= ns
.server
[0]
1429 with self
.db
.transaction():
1430 if server
.startswith("/"):
1431 self
._handle
_update
_announcements
_from
_bird
(server
)
1433 # Purge anything we never want here
1435 -- Delete default routes
1436 DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0';
1438 -- Delete anything that is not global unicast address space
1439 DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3';
1441 -- DELETE "current network" address space
1442 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8';
1444 -- DELETE local loopback address space
1445 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8';
1447 -- DELETE RFC 1918 address space
1448 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8';
1449 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12';
1450 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16';
1452 -- DELETE test, benchmark and documentation address space
1453 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24';
1454 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24';
1455 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15';
1456 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24';
1457 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24';
1459 -- DELETE CGNAT address space (RFC 6598)
1460 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10';
1462 -- DELETE link local address space
1463 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16';
1465 -- DELETE IPv6 to IPv4 (6to4) address space (RFC 3068)
1466 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24';
1467 DELETE FROM announcements WHERE family(network) = 6 AND network <<= '2002::/16';
1469 -- DELETE multicast and reserved address space
1470 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4';
1471 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4';
1473 -- Delete networks that are too small to be in the global routing table
1474 DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48;
1475 DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24;
1477 -- Delete any non-public or reserved ASNs
1478 DELETE FROM announcements WHERE NOT (
1479 (autnum >= 1 AND autnum <= 23455)
1481 (autnum >= 23457 AND autnum <= 64495)
1483 (autnum >= 131072 AND autnum <= 4199999999)
1486 -- Delete everything that we have not seen for 14 days
1487 DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days';
1490 def _handle_update_announcements_from_bird(self
, server
):
1491 # Pre-compile the regular expression for faster searching
1492 route
= re
.compile(b
"^\s(.+?)\s+.+?\[(?:AS(.*?))?.\]$")
1494 log
.info("Requesting routing table from Bird (%s)" % server
)
1496 aggregated_networks
= []
1498 # Send command to list all routes
1499 for line
in self
._bird
_cmd
(server
, "show route"):
1500 m
= route
.match(line
)
1506 # Ignore any header lines with the name of the routing table
1507 elif line
.startswith(b
"Table"):
1512 log
.debug("Could not parse line: %s" % line
.decode())
1516 # Fetch the extracted network and ASN
1517 network
, autnum
= m
.groups()
1519 # Decode into strings
1521 network
= network
.decode()
1523 autnum
= autnum
.decode()
1525 # Collect all aggregated networks
1527 log
.debug("%s is an aggregated network" % network
)
1528 aggregated_networks
.append(network
)
1531 # Insert it into the database
1532 self
.db
.execute("INSERT INTO announcements(network, autnum) \
1533 VALUES(%s, %s) ON CONFLICT (network) DO \
1534 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1538 # Process any aggregated networks
1539 for network
in aggregated_networks
:
1540 log
.debug("Processing aggregated network %s" % network
)
1542 # Run "show route all" for each network
1543 for line
in self
._bird
_cmd
(server
, "show route %s all" % network
):
1544 # Try finding the path
1545 m
= re
.match(b
"\s+BGP\.as_path:.* (\d+) {\d+}$", line
)
1547 # Select the last AS number in the path
1548 autnum
= m
.group(1).decode()
1550 # Insert it into the database
1551 self
.db
.execute("INSERT INTO announcements(network, autnum) \
1552 VALUES(%s, %s) ON CONFLICT (network) DO \
1553 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1557 # We don't need to process any more
1560 def _bird_cmd(self
, socket_path
, command
):
1561 # Connect to the socket
1562 s
= socket
.socket(socket
.AF_UNIX
, socket
.SOCK_STREAM
)
1563 s
.connect(socket_path
)
1565 # Allocate some buffer
1568 log
.debug("Sending Bird command: %s" % command
)
1571 s
.send(b
"%s\n" % command
.encode())
1574 # Fill up the buffer
1575 buffer += s
.recv(4096)
1578 # Search for the next newline
1579 pos
= buffer.find(b
"\n")
1581 # If we cannot find one, we go back and read more data
1585 # Cut after the newline character
1588 # Split the line we want and keep the rest in buffer
1589 line
, buffer = buffer[:pos
], buffer[pos
:]
1591 # Try parsing any status lines
1592 if len(line
) > 4 and line
[:4].isdigit() and line
[4] in (32, 45):
1593 code
, delim
, line
= int(line
[:4]), line
[4], line
[5:]
1595 log
.debug("Received response code %s from bird" % code
)
1605 # Otherwise return the line
1608 def handle_update_geofeeds(self
, ns
):
1610 with self
.db
.transaction():
1611 # Delete all geofeeds which are no longer linked
1616 geofeeds.url NOT IN (
1618 network_geofeeds.url
1625 geofeed_overrides.url
1634 WITH all_geofeeds AS (
1636 network_geofeeds.url
1643 geofeed_overrides.url
1661 # Fetch all Geofeeds that require an update
1662 geofeeds
= self
.db
.query("""
1671 updated_at <= CURRENT_TIMESTAMP - INTERVAL '1 week'
1676 # Update all geofeeds
1677 for geofeed
in geofeeds
:
1678 with self
.db
.transaction():
1679 self
._fetch
_geofeed
(geofeed
)
1681 # Delete data from any feeds that did not update in the last two weeks
1682 with self
.db
.transaction():
1687 geofeed_networks.geofeed_id IN (
1695 updated_at <= CURRENT_TIMESTAMP - INTERVAL '2 weeks'
1699 def _fetch_geofeed(self
, geofeed
):
1700 log
.debug("Fetching Geofeed %s" % geofeed
.url
)
1702 with self
.db
.transaction():
1706 f
= self
.downloader
.retrieve(geofeed
.url
, headers
={
1707 "User-Agent" : "location/%s" % location
.__version
__,
1709 # We expect some plain text file in CSV format
1710 "Accept" : "text/csv, text/plain",
1713 # Remove any previous data
1714 self
.db
.execute("DELETE FROM geofeed_networks \
1715 WHERE geofeed_id = %s", geofeed
.id)
1719 # Read the output line by line
1724 line
= line
.decode()
1726 # Ignore any lines we cannot decode
1727 except UnicodeDecodeError:
1728 log
.debug("Could not decode line %s in %s" \
1729 % (lineno
, geofeed
.url
))
1733 line
= line
.rstrip()
1739 # Try to parse the line
1741 fields
= line
.split(",", 5)
1743 log
.debug("Could not parse line: %s" % line
)
1746 # Check if we have enough fields
1748 log
.debug("Not enough fields in line: %s" % line
)
1752 network
, country
, region
, city
, = fields
[:4]
1754 # Try to parse the network
1756 network
= ipaddress
.ip_network(network
, strict
=False)
1758 log
.debug("Could not parse network: %s" % network
)
1761 # Strip any excess whitespace from country codes
1762 country
= country
.strip()
1764 # Make the country code uppercase
1765 country
= country
.upper()
1767 # Check the country code
1769 log
.debug("Empty country code in Geofeed %s line %s" \
1770 % (geofeed
.url
, lineno
))
1773 elif not location
.country_code_is_valid(country
):
1774 log
.debug("Invalid country code in Geofeed %s:%s: %s" \
1775 % (geofeed
.url
, lineno
, country
))
1778 # Write this into the database
1788 VALUES (%s, %s, %s, %s, %s)""",
1796 # Catch any HTTP errors
1797 except urllib
.request
.HTTPError
as e
:
1798 self
.db
.execute("UPDATE geofeeds SET status = %s, error = %s \
1799 WHERE id = %s", e
.code
, "%s" % e
, geofeed
.id)
1801 # Remove any previous data when the feed has been deleted
1803 self
.db
.execute("DELETE FROM geofeed_networks \
1804 WHERE geofeed_id = %s", geofeed
.id)
1806 # Catch any other errors and connection timeouts
1807 except (http
.client
.InvalidURL
, urllib
.request
.URLError
, TimeoutError
) as e
:
1808 log
.debug("Could not fetch URL %s: %s" % (geofeed
.url
, e
))
1810 self
.db
.execute("UPDATE geofeeds SET status = %s, error = %s \
1811 WHERE id = %s", 599, "%s" % e
, geofeed
.id)
1813 # Mark the geofeed as updated
1819 updated_at = CURRENT_TIMESTAMP,
1827 def handle_update_overrides(self
, ns
):
1828 with self
.db
.transaction():
1829 # Drop any previous content
1830 self
.db
.execute("TRUNCATE TABLE autnum_overrides")
1831 self
.db
.execute("TRUNCATE TABLE geofeed_overrides")
1832 self
.db
.execute("TRUNCATE TABLE network_overrides")
1834 for file in ns
.files
:
1835 log
.info("Reading %s..." % file)
1837 with
open(file, "rb") as f
:
1838 for type, block
in read_blocks(f
):
1840 network
= block
.get("net")
1841 # Try to parse and normalise the network
1843 network
= ipaddress
.ip_network(network
, strict
=False)
1844 except ValueError as e
:
1845 log
.warning("Invalid IP network: %s: %s" % (network
, e
))
1848 # Prevent that we overwrite all networks
1849 if network
.prefixlen
== 0:
1850 log
.warning("Skipping %s: You cannot overwrite default" % network
)
1860 is_satellite_provider,
1866 %s, %s, %s, %s, %s, %s
1868 ON CONFLICT (network) DO NOTHING
1871 block
.get("country"),
1872 self
._parse
_bool
(block
, "is-anonymous-proxy"),
1873 self
._parse
_bool
(block
, "is-satellite-provider"),
1874 self
._parse
_bool
(block
, "is-anycast"),
1875 self
._parse
_bool
(block
, "drop"),
1878 elif type == "aut-num":
1879 autnum
= block
.get("aut-num")
1881 # Check if AS number begins with "AS"
1882 if not autnum
.startswith("AS"):
1883 log
.warning("Invalid AS number: %s" % autnum
)
1897 is_satellite_provider,
1903 %s, %s, %s, %s, %s, %s, %s
1905 ON CONFLICT (number) DO NOTHING
1909 block
.get("country"),
1910 self
._parse
_bool
(block
, "is-anonymous-proxy"),
1911 self
._parse
_bool
(block
, "is-satellite-provider"),
1912 self
._parse
_bool
(block
, "is-anycast"),
1913 self
._parse
_bool
(block
, "drop"),
1917 elif type == "geofeed":
1918 url
= block
.get("geofeed")
1932 ON CONFLICT (url) DO NOTHING
1937 log
.warning("Unsupported type: %s" % type)
1939 def handle_update_feeds(self
, ns
):
1941 Update any third-party feeds
1947 ("AWS-IP-RANGES", self
._import
_aws
_ip
_ranges
, "https://ip-ranges.amazonaws.com/ip-ranges.json"),
1950 ("SPAMHAUS-DROP", self
._import
_spamhaus
_drop
, "https://www.spamhaus.org/drop/drop.txt"),
1951 ("SPAMHAUS-EDROP", self
._import
_spamhaus
_drop
, "https://www.spamhaus.org/drop/edrop.txt"),
1952 ("SPAMHAUS-DROPV6", self
._import
_spamhaus
_drop
, "https://www.spamhaus.org/drop/dropv6.txt"),
1955 ("SPAMHAUS-ASNDROP", self
._import
_spamhaus
_asndrop
, "https://www.spamhaus.org/drop/asndrop.json"),
1958 # Drop any data from feeds that we don't support (any more)
1959 with self
.db
.transaction():
1960 # Fetch the names of all feeds we support
1961 sources
= [name
for name
, *rest
in feeds
]
1963 self
.db
.execute("DELETE FROM autnum_feeds WHERE NOT source = ANY(%s)", sources
)
1964 self
.db
.execute("DELETE FROM network_feeds WHERE NOT source = ANY(%s)", sources
)
1966 # Walk through all feeds
1967 for name
, callback
, url
, *args
in feeds
:
1968 # Skip any feeds that were not requested on the command line
1969 if ns
.feeds
and not name
in ns
.feeds
:
1973 self
._process
_feed
(name
, callback
, url
, *args
)
1975 # Log an error but continue if an exception occurs
1976 except Exception as e
:
1977 log
.error("Error processing feed '%s': %s" % (name
, e
))
1981 return 0 if success
else 1
1983 def _process_feed(self
, name
, callback
, url
, *args
):
1988 f
= self
.downloader
.retrieve(url
)
1990 with self
.db
.transaction():
1991 # Drop any previous content
1992 self
.db
.execute("DELETE FROM autnum_feeds WHERE source = %s", name
)
1993 self
.db
.execute("DELETE FROM network_feeds WHERE source = %s", name
)
1995 # Call the callback to process the feed
1996 return callback(name
, f
, *args
)
1998 def _import_aws_ip_ranges(self
, name
, f
):
2002 # Set up a dictionary for mapping a region name to a country. Unfortunately,
2003 # there seems to be no machine-readable version available of this other than
2004 # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html
2005 # (worse, it seems to be incomplete :-/ ); https://www.cloudping.cloud/endpoints
2006 # was helpful here as well.
2007 aws_region_country_map
= {
2009 "af-south-1" : "ZA",
2012 "il-central-1" : "IL", # Tel Aviv
2015 "ap-northeast-1" : "JP",
2016 "ap-northeast-2" : "KR",
2017 "ap-northeast-3" : "JP",
2019 "ap-south-1" : "IN",
2020 "ap-south-2" : "IN",
2021 "ap-southeast-1" : "SG",
2022 "ap-southeast-2" : "AU",
2023 "ap-southeast-3" : "MY",
2024 "ap-southeast-4" : "AU",
2025 "ap-southeast-5" : "NZ", # Auckland, NZ
2026 "ap-southeast-6" : "AP", # XXX: Precise location not documented anywhere
2029 "ca-central-1" : "CA",
2033 "eu-central-1" : "DE",
2034 "eu-central-2" : "CH",
2035 "eu-north-1" : "SE",
2039 "eu-south-1" : "IT",
2040 "eu-south-2" : "ES",
2043 "me-central-1" : "AE",
2044 "me-south-1" : "BH",
2049 # Undocumented, likely located in Berlin rather than Frankfurt
2050 "eusc-de-east-1" : "DE",
2053 # Collect a list of all networks
2054 prefixes
= feed
.get("ipv6_prefixes", []) + feed
.get("prefixes", [])
2056 for prefix
in prefixes
:
2058 network
= prefix
.get("ipv6_prefix") or prefix
.get("ip_prefix")
2062 network
= ipaddress
.ip_network(network
)
2063 except ValuleError
as e
:
2064 log
.warning("%s: Unable to parse prefix %s" % (name
, network
))
2067 # Sanitize parsed networks...
2068 if not self
._check
_parsed
_network
(network
):
2072 region
= prefix
.get("region")
2078 # Fetch the CC from the dictionary
2080 cc
= aws_region_country_map
[region
]
2082 # If we couldn't find anything, let's try something else...
2083 except KeyError as e
:
2084 # Find anycast networks
2085 if region
== "GLOBAL":
2088 # Everything that starts with us- is probably in the United States
2089 elif region
.startswith("us-"):
2092 # Everything that starts with cn- is probably China
2093 elif region
.startswith("cn-"):
2096 # Log a warning for anything else
2098 log
.warning("%s: Could not determine country code for AWS region %s" \
2116 ON CONFLICT (network, source) DO NOTHING
2117 """, "%s" % network
, name
, cc
, is_anycast
,
2120 def _import_spamhaus_drop(self
, name
, f
):
2122 Import Spamhaus DROP IP feeds
2127 # Walk through all lines
2130 line
= line
.decode("utf-8")
2132 # Strip off any comments
2133 line
, _
, comment
= line
.partition(";")
2135 # Ignore empty lines
2139 # Strip any excess whitespace
2142 # Increment line counter
2147 network
= ipaddress
.ip_network(line
)
2148 except ValueError as e
:
2149 log
.warning("%s: Could not parse network: %s - %s" % (name
, line
, e
))
2153 if not self
._check
_parsed
_network
(network
):
2154 log
.warning("%s: Skipping bogus network: %s" % (name
, network
))
2157 # Insert into the database
2169 )""", "%s" % network
, name
, True,
2172 # Raise an exception if we could not import anything
2174 raise RuntimeError("Received bogus feed %s with no data" % name
)
2176 def _import_spamhaus_asndrop(self
, name
, f
):
2178 Import Spamhaus ASNDROP feed
2182 line
= line
.decode("utf-8")
2186 line
= json
.loads(line
)
2187 except json
.JSONDecodeError
as e
:
2188 log
.warning("%s: Unable to parse JSON object %s: %s" % (name
, line
, e
))
2192 type = line
.get("type")
2195 if type == "metadata":
2199 asn
= line
.get("asn")
2201 # Skip any lines without an ASN
2205 # Filter invalid ASNs
2206 if not self
._check
_parsed
_asn
(asn
):
2207 log
.warning("%s: Skipping bogus ASN %s" % (name
, asn
))
2222 )""", "%s" % asn
, name
, True,
2226 def _parse_bool(block
, key
):
2227 val
= block
.get(key
)
2229 # There is no point to proceed when we got None
2233 # Convert to lowercase
2237 if val
in ("yes", "1"):
2241 if val
in ("no", "0"):
2247 def handle_import_countries(self
, ns
):
2248 with self
.db
.transaction():
2249 # Drop all data that we have
2250 self
.db
.execute("TRUNCATE TABLE countries")
2252 for file in ns
.file:
2254 line
= line
.rstrip()
2256 # Ignore any comments
2257 if line
.startswith("#"):
2261 country_code
, continent_code
, name
= line
.split(maxsplit
=2)
2263 log
.warning("Could not parse line: %s" % line
)
2266 self
.db
.execute("INSERT INTO countries(country_code, name, continent_code) \
2267 VALUES(%s, %s, %s) ON CONFLICT DO NOTHING", country_code
, name
, continent_code
)
2270 def split_line(line
):
2271 key
, colon
, val
= line
.partition(":")
2273 # Strip any excess space
2280 for block
in iterate_over_blocks(f
):
2284 for i
, line
in enumerate(block
):
2285 key
, value
= line
.split(":", 1)
2287 # The key of the first line defines the type
2292 data
[key
] = value
.strip()
2296 def iterate_over_blocks(f
, charsets
=("utf-8", "latin1")):
2300 # Skip commented lines
2301 if line
.startswith(b
"#") or line
.startswith(b
"%"):
2305 for charset
in charsets
:
2307 line
= line
.decode(charset
)
2308 except UnicodeDecodeError:
2313 # Remove any comments at the end of line
2314 line
, hash, comment
= line
.partition("#")
2316 # Strip any whitespace at the end of the line
2317 line
= line
.rstrip()
2319 # If we cut off some comment and the line is empty, we can skip it
2320 if comment
and not line
:
2323 # If the line has some content, keep collecting it
2328 # End the block on an empty line
2335 # Return the last block
2339 def iterate_over_lines(f
):
2342 line
= line
.decode()
2348 # Run the command line interface