2 ###############################################################################
4 # libloc - A library to determine the location of someone on the Internet #
6 # Copyright (C) 2020-2024 IPFire Development Team <info@ipfire.org> #
8 # This library is free software; you can redistribute it and/or #
9 # modify it under the terms of the GNU Lesser General Public #
10 # License as published by the Free Software Foundation; either #
11 # version 2.1 of the License, or (at your option) any later version. #
13 # This library is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16 # Lesser General Public License for more details. #
18 ###############################################################################
33 # Load our location module
35 import location
.database
36 from location
.downloader
import Downloader
37 from location
.i18n
import _
40 log
= logging
.getLogger("location.importer")
50 TRANSLATED_COUNTRIES
= {
51 # When people say UK, they mean GB
55 IGNORED_COUNTRIES
= set((
59 # Some people use ZZ to say "no country" or to hide the country
63 # Configure the CSV parser for ARIN
64 csv
.register_dialect("arin", delimiter
=",", quoting
=csv
.QUOTE_ALL
, quotechar
="\"")
68 parser
= argparse
.ArgumentParser(
69 description
=_("Location Importer Command Line Interface"),
71 subparsers
= parser
.add_subparsers()
73 # Global configuration flags
74 parser
.add_argument("--debug", action
="store_true",
75 help=_("Enable debug output"))
76 parser
.add_argument("--quiet", action
="store_true",
77 help=_("Enable quiet mode"))
80 parser
.add_argument("--version", action
="version",
81 version
="%(prog)s @VERSION@")
84 parser
.add_argument("--database-host", required
=True,
85 help=_("Database Hostname"), metavar
=_("HOST"))
86 parser
.add_argument("--database-name", required
=True,
87 help=_("Database Name"), metavar
=_("NAME"))
88 parser
.add_argument("--database-username", required
=True,
89 help=_("Database Username"), metavar
=_("USERNAME"))
90 parser
.add_argument("--database-password", required
=True,
91 help=_("Database Password"), metavar
=_("PASSWORD"))
94 write
= subparsers
.add_parser("write", help=_("Write database to file"))
95 write
.set_defaults(func
=self
.handle_write
)
96 write
.add_argument("file", nargs
=1, help=_("Database File"))
97 write
.add_argument("--signing-key", nargs
="?", type=open, help=_("Signing Key"))
98 write
.add_argument("--backup-signing-key", nargs
="?", type=open, help=_("Backup Signing Key"))
99 write
.add_argument("--vendor", nargs
="?", help=_("Sets the vendor"))
100 write
.add_argument("--description", nargs
="?", help=_("Sets a description"))
101 write
.add_argument("--license", nargs
="?", help=_("Sets the license"))
102 write
.add_argument("--version", type=int, help=_("Database Format Version"))
105 update_whois
= subparsers
.add_parser("update-whois", help=_("Update WHOIS Information"))
106 update_whois
.add_argument("sources", nargs
="*",
107 help=_("Only update these sources"))
108 update_whois
.set_defaults(func
=self
.handle_update_whois
)
110 # Update announcements
111 update_announcements
= subparsers
.add_parser("update-announcements",
112 help=_("Update BGP Annoucements"))
113 update_announcements
.set_defaults(func
=self
.handle_update_announcements
)
114 update_announcements
.add_argument("server", nargs
=1,
115 help=_("Route Server to connect to"), metavar
=_("SERVER"))
118 update_geofeeds
= subparsers
.add_parser("update-geofeeds",
119 help=_("Update Geofeeds"))
120 update_geofeeds
.set_defaults(func
=self
.handle_update_geofeeds
)
123 update_feeds
= subparsers
.add_parser("update-feeds",
124 help=_("Update Feeds"))
125 update_feeds
.add_argument("feeds", nargs
="*",
126 help=_("Only update these feeds"))
127 update_feeds
.set_defaults(func
=self
.handle_update_feeds
)
130 update_overrides
= subparsers
.add_parser("update-overrides",
131 help=_("Update overrides"),
133 update_overrides
.add_argument(
134 "files", nargs
="+", help=_("Files to import"),
136 update_overrides
.set_defaults(func
=self
.handle_update_overrides
)
139 import_countries
= subparsers
.add_parser("import-countries",
140 help=_("Import countries"),
142 import_countries
.add_argument("file", nargs
=1, type=argparse
.FileType("r"),
143 help=_("File to import"))
144 import_countries
.set_defaults(func
=self
.handle_import_countries
)
146 args
= parser
.parse_args()
150 location
.logger
.set_level(logging
.DEBUG
)
152 location
.logger
.set_level(logging
.WARNING
)
154 # Print usage if no action was given
155 if not "func" in args
:
162 # Parse command line arguments
163 args
= self
.parse_cli()
165 # Initialize the downloader
166 self
.downloader
= Downloader()
168 # Initialise database
169 self
.db
= self
._setup
_database
(args
)
172 ret
= args
.func(args
)
174 # Return with exit code
178 # Otherwise just exit
181 def _setup_database(self
, ns
):
183 Initialise the database
185 # Connect to database
186 db
= location
.database
.Connection(
187 host
=ns
.database_host
, database
=ns
.database_name
,
188 user
=ns
.database_username
, password
=ns
.database_password
,
191 with db
.transaction():
194 CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint,
195 first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
196 last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
197 CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
198 CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network));
199 CREATE INDEX IF NOT EXISTS announcements_search ON announcements USING GIST(network inet_ops);
202 CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
203 ALTER TABLE autnums ADD COLUMN IF NOT EXISTS source text;
204 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
207 CREATE TABLE IF NOT EXISTS countries(
208 country_code text NOT NULL, name text NOT NULL, continent_code text NOT NULL);
209 CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code);
212 CREATE TABLE IF NOT EXISTS networks(network inet, country text);
213 ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[];
214 ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text;
215 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
216 CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
217 CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
220 CREATE TABLE IF NOT EXISTS geofeeds(
221 id serial primary key,
223 status integer default null,
224 updated_at timestamp without time zone default null
226 ALTER TABLE geofeeds ADD COLUMN IF NOT EXISTS error text;
227 CREATE UNIQUE INDEX IF NOT EXISTS geofeeds_unique
229 CREATE TABLE IF NOT EXISTS geofeed_networks(
230 geofeed_id integer references geofeeds(id) on delete cascade,
236 CREATE INDEX IF NOT EXISTS geofeed_networks_geofeed_id
237 ON geofeed_networks(geofeed_id);
238 CREATE INDEX IF NOT EXISTS geofeed_networks_search
239 ON geofeed_networks USING GIST(network inet_ops);
240 CREATE TABLE IF NOT EXISTS network_geofeeds(network inet, url text);
241 ALTER TABLE network_geofeeds ADD COLUMN IF NOT EXISTS source text NOT NULL;
242 CREATE UNIQUE INDEX IF NOT EXISTS network_geofeeds_unique
243 ON network_geofeeds(network);
244 CREATE INDEX IF NOT EXISTS network_geofeeds_search
245 ON network_geofeeds USING GIST(network inet_ops);
246 CREATE INDEX IF NOT EXISTS network_geofeeds_url
247 ON network_geofeeds(url);
250 CREATE TABLE IF NOT EXISTS autnum_feeds(
251 number bigint NOT NULL,
252 source text NOT NULL,
255 is_anonymous_proxy boolean,
256 is_satellite_provider boolean,
260 CREATE UNIQUE INDEX IF NOT EXISTS autnum_feeds_unique
261 ON autnum_feeds(number, source);
263 CREATE TABLE IF NOT EXISTS network_feeds(
264 network inet NOT NULL,
265 source text NOT NULL,
267 is_anonymous_proxy boolean,
268 is_satellite_provider boolean,
272 CREATE UNIQUE INDEX IF NOT EXISTS network_feeds_unique
273 ON network_feeds(network, source);
274 CREATE INDEX IF NOT EXISTS network_feeds_search
275 ON network_feeds USING GIST(network inet_ops);
278 CREATE TABLE IF NOT EXISTS autnum_overrides(
279 number bigint NOT NULL,
282 is_anonymous_proxy boolean,
283 is_satellite_provider boolean,
286 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
287 ON autnum_overrides(number);
288 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
289 ALTER TABLE autnum_overrides DROP COLUMN IF EXISTS source;
291 CREATE TABLE IF NOT EXISTS network_overrides(
292 network inet NOT NULL,
294 is_anonymous_proxy boolean,
295 is_satellite_provider boolean,
298 CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
299 ON network_overrides(network);
300 CREATE INDEX IF NOT EXISTS network_overrides_search
301 ON network_overrides USING GIST(network inet_ops);
302 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
303 ALTER TABLE network_overrides DROP COLUMN IF EXISTS source;
308 def fetch_countries(self
):
310 Returns a list of all countries on the list
312 # Fetch all valid country codes to check parsed networks aganist...
313 countries
= self
.db
.query("SELECT country_code FROM countries ORDER BY country_code")
315 return set((country
.country_code
for country
in countries
))
317 def handle_write(self
, ns
):
319 Compiles a database in libloc format out of what is in the database
322 writer
= location
.Writer(ns
.signing_key
, ns
.backup_signing_key
)
326 writer
.vendor
= ns
.vendor
329 writer
.description
= ns
.description
332 writer
.license
= ns
.license
334 # Add all Autonomous Systems
335 log
.info("Writing Autonomous Systems...")
337 # Select all ASes with a name
338 rows
= self
.db
.query("""
340 autnums.number AS number,
348 autnum_overrides overrides ON autnums.number = overrides.number
354 # Skip AS without names
358 a
= writer
.add_as(row
.number
)
362 log
.info("Writing networks...")
364 # Select all known networks
365 rows
= self
.db
.query("""
366 WITH known_networks AS (
367 SELECT network FROM announcements
369 SELECT network FROM networks
371 SELECT network FROM network_feeds
373 SELECT network FROM network_overrides
375 SELECT network FROM geofeed_networks
378 ordered_networks AS (
380 known_networks.network AS network,
381 announcements.autnum AS autnum,
382 networks.country AS country,
384 -- Must be part of returned values for ORDER BY clause
385 masklen(announcements.network) AS sort_a,
386 masklen(networks.network) AS sort_b
390 announcements ON known_networks.network <<= announcements.network
392 networks ON known_networks.network <<= networks.network
394 known_networks.network,
399 -- Return a list of those networks enriched with all
400 -- other information that we store in the database
402 DISTINCT ON (network)
412 network_overrides overrides
414 networks.network <<= overrides.network
416 masklen(overrides.network) DESC
423 autnum_overrides overrides
425 networks.autnum = overrides.number
433 networks.network <<= feeds.network
435 masklen(feeds.network) DESC
444 networks.autnum = feeds.number
451 geofeed_networks.country AS country
455 -- Join the data from the geofeeds
457 geofeeds ON network_geofeeds.url = geofeeds.url
459 geofeed_networks ON geofeeds.id = geofeed_networks.geofeed_id
461 -- Check whether we have a geofeed for this network
463 networks.network <<= network_geofeeds.network
465 networks.network <<= geofeed_networks.network
467 -- Filter for the best result
469 masklen(geofeed_networks.network) DESC
481 network_overrides overrides
483 networks.network <<= overrides.network
485 masklen(overrides.network) DESC
494 networks.network <<= feeds.network
496 masklen(feeds.network) DESC
505 networks.autnum = feeds.number
514 autnum_overrides overrides
516 networks.autnum = overrides.number
519 ) AS is_anonymous_proxy,
523 is_satellite_provider
525 network_overrides overrides
527 networks.network <<= overrides.network
529 masklen(overrides.network) DESC
534 is_satellite_provider
538 networks.network <<= feeds.network
540 masklen(feeds.network) DESC
545 is_satellite_provider
549 networks.autnum = feeds.number
556 is_satellite_provider
558 autnum_overrides overrides
560 networks.autnum = overrides.number
563 ) AS is_satellite_provider,
569 network_overrides overrides
571 networks.network <<= overrides.network
573 masklen(overrides.network) DESC
582 networks.network <<= feeds.network
584 masklen(feeds.network) DESC
593 networks.autnum = feeds.number
602 autnum_overrides overrides
604 networks.autnum = overrides.number
613 network_overrides overrides
615 networks.network <<= overrides.network
617 masklen(overrides.network) DESC
626 networks.network <<= feeds.network
628 masklen(feeds.network) DESC
637 networks.autnum = feeds.number
646 autnum_overrides overrides
648 networks.autnum = overrides.number
653 ordered_networks networks
657 network
= writer
.add_network(row
.network
)
661 network
.country_code
= row
.country
665 network
.asn
= row
.autnum
668 if row
.is_anonymous_proxy
:
669 network
.set_flag(location
.NETWORK_FLAG_ANONYMOUS_PROXY
)
671 if row
.is_satellite_provider
:
672 network
.set_flag(location
.NETWORK_FLAG_SATELLITE_PROVIDER
)
675 network
.set_flag(location
.NETWORK_FLAG_ANYCAST
)
678 network
.set_flag(location
.NETWORK_FLAG_DROP
)
681 log
.info("Writing countries...")
682 rows
= self
.db
.query("SELECT * FROM countries ORDER BY country_code")
685 c
= writer
.add_country(row
.country_code
)
686 c
.continent_code
= row
.continent_code
689 # Write everything to file
690 log
.info("Writing database to file...")
694 def handle_update_whois(self
, ns
):
695 # Did we run successfully?
699 # African Network Information Centre
701 (self
._import
_standard
_format
, "https://ftp.afrinic.net/pub/pub/dbase/afrinic.db.gz"),
704 # Asia Pacific Network Information Centre
706 (self
._import
_standard
_format
, "https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz"),
707 (self
._import
_standard
_format
, "https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz"),
708 (self
._import
_standard
_format
, "https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz"),
709 (self
._import
_standard
_format
, "https://ftp.apnic.net/apnic/whois/apnic.db.organisation.gz"),
712 # American Registry for Internet Numbers
714 (self
._import
_extended
_format
, "https://ftp.arin.net/pub/stats/arin/delegated-arin-extended-latest"),
715 (self
._import
_arin
_as
_names
, "https://ftp.arin.net/pub/resource_registry_service/asns.csv"),
718 # Japan Network Information Center
720 (self
._import
_standard
_format
, "https://ftp.nic.ad.jp/jpirr/jpirr.db.gz"),
723 # Latin America and Caribbean Network Information Centre
725 (self
._import
_standard
_format
, "https://ftp.lacnic.net/lacnic/dbase/lacnic.db.gz"),
726 (self
._import
_extended
_format
, "https://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-extended-latest"),
729 # Réseaux IP Européens
731 (self
._import
_standard
_format
, "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz"),
732 (self
._import
_standard
_format
, "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz"),
733 (self
._import
_standard
_format
, "https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz"),
734 (self
._import
_standard
_format
, "https://ftp.ripe.net/ripe/dbase/split/ripe.db.organisation.gz"),
738 # Fetch all valid country codes to check parsed networks against
739 countries
= self
.fetch_countries()
741 # Check if we have countries
743 log
.error("Please import countries before importing any WHOIS data")
746 # Iterate over all potential sources
747 for name
, feeds
in sources
:
748 # Skip anything that should not be updated
749 if ns
.sources
and not name
in ns
.sources
:
753 self
._process
_source
(name
, feeds
, countries
)
755 # Log an error but continue if an exception occurs
756 except Exception as e
:
757 log
.error("Error processing source %s" % name
, exc_info
=True)
760 # Return a non-zero exit code for errors
761 return 0 if success
else 1
763 def _process_source(self
, source
, feeds
, countries
):
765 This function processes one source
767 # Wrap everything into one large transaction
768 with self
.db
.transaction():
769 # Remove all previously imported content
770 self
.db
.execute("DELETE FROM autnums WHERE source = %s", source
)
771 self
.db
.execute("DELETE FROM networks WHERE source = %s", source
)
772 self
.db
.execute("DELETE FROM network_geofeeds WHERE source = %s", source
)
774 # Create some temporary tables to store parsed data
776 CREATE TEMPORARY TABLE _autnums(number integer NOT NULL,
777 organization text NOT NULL, source text NOT NULL) ON COMMIT DROP;
778 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
780 CREATE TEMPORARY TABLE _organizations(handle text NOT NULL,
781 name text NOT NULL, source text NOT NULL) ON COMMIT DROP;
782 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
784 CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text,
785 original_countries text[] NOT NULL, source text NOT NULL)
787 CREATE INDEX _rirdata_search ON _rirdata
788 USING BTREE(family(network), masklen(network));
789 CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
793 for callback
, url
, *args
in feeds
:
795 f
= self
.downloader
.retrieve(url
)
798 callback(source
, countries
, f
, *args
)
800 # Process all parsed networks from every RIR we happen to have access to,
801 # insert the largest network chunks into the networks table immediately...
802 families
= self
.db
.query("""
804 family(network) AS family
812 for family
in (row
.family
for row
in families
):
813 # Fetch the smallest mask length in our data set
814 smallest
= self
.db
.get("""
844 masklen(network) = %s
853 # ... determine any other prefixes for this network family, ...
854 prefixes
= self
.db
.query("""
856 DISTINCT masklen(network) AS prefix
867 # ... and insert networks with this prefix in case they provide additional
868 # information (i. e. subnet of a larger chunk with a different country)
869 for prefix
in (row
.prefix
for row
in prefixes
):
875 _rirdata.original_countries,
880 family(_rirdata.network) = %s
882 masklen(_rirdata.network) = %s
886 DISTINCT ON (c.network)
889 c.original_countries,
891 masklen(networks.network),
892 networks.country AS parent_country
898 c.network << networks.network
901 masklen(networks.network) DESC NULLS LAST
904 networks(network, country, original_countries, source)
913 parent_country IS NULL
915 country <> parent_country
916 ON CONFLICT DO NOTHING
931 _organizations.source
935 _organizations ON _autnums.organization = _organizations.handle
941 SET name = excluded.name
945 def _import_standard_format(self
, source
, countries
, f
, *args
):
947 Imports a single standard format source feed
949 # Iterate over all blocks
950 for block
in iterate_over_blocks(f
):
951 self
._parse
_block
(block
, source
, countries
)
953 def _import_extended_format(self
, source
, countries
, f
, *args
):
954 # Iterate over all lines
955 for line
in iterate_over_lines(f
):
956 self
._parse
_line
(block
, source
, countries
)
958 def _import_arin_as_names(self
, source
, countries
, f
, *args
):
959 # Walk through the file
960 for line
in csv
.DictReader(feed
, dialect
="arin"):
961 log
.debug("Processing object: %s" % line
)
964 status
= line
.get("Status")
966 # We are only interested in anything managed by ARIN
967 if not status
== "Full Registry Services":
970 # Fetch organization name
971 name
= line
.get("Org Name")
974 first_asn
= line
.get("Start AS Number")
975 last_asn
= line
.get("End AS Number")
979 first_asn
= int(first_asn
)
980 except TypeError as e
:
981 log
.warning("Could not parse ASN '%s'" % first_asn
)
985 last_asn
= int(last_asn
)
986 except TypeError as e
:
987 log
.warning("Could not parse ASN '%s'" % last_asn
)
990 # Check if the range is valid
991 if last_asn
< first_asn
:
992 log
.warning("Invalid ASN range %s-%s" % (first_asn
, last_asn
))
994 # Insert everything into the database
995 for asn
in range(first_asn
, last_asn
+ 1):
996 if not self
._check
_parsed
_asn
(asn
):
997 log
.warning("Skipping invalid ASN %s" % asn
)
1017 """, asn
, name
, "ARIN",
1020 def _check_parsed_network(self
, network
):
1022 Assistive function to detect and subsequently sort out parsed
1023 networks from RIR data (both Whois and so-called "extended sources"),
1024 which are or have...
1026 (a) not globally routable (RFC 1918 space, et al.)
1027 (b) covering a too large chunk of the IP address space (prefix length
1028 is < 7 for IPv4 networks, and < 10 for IPv6)
1029 (c) "0.0.0.0" or "::" as a network address
1031 This unfortunately is necessary due to brain-dead clutter across
1032 various RIR databases, causing mismatches and eventually disruptions.
1034 We will return False in case a network is not suitable for adding
1035 it to our database, and True otherwise.
1038 if isinstance(network
, ipaddress
.IPv6Network
):
1040 elif isinstance(network
, ipaddress
.IPv4Network
):
1043 raise ValueError("Invalid network: %s (type %s)" % (network
, type(network
)))
1045 # Ignore anything that isn't globally routable
1046 if not network
.is_global
:
1047 log
.debug("Skipping non-globally routable network: %s" % network
)
1050 # Ignore anything that is unspecified IP range (See RFC 5735 for IPv4 or RFC 2373 for IPv6)
1051 elif network
.is_unspecified
:
1052 log
.debug("Skipping unspecified network: %s" % network
)
1056 if network
.version
== 6:
1057 if network
.prefixlen
< 10:
1058 log
.debug("Skipping too big IP chunk: %s" % network
)
1062 elif network
.version
== 4:
1063 if network
.prefixlen
< 7:
1064 log
.debug("Skipping too big IP chunk: %s" % network
)
1067 # In case we have made it here, the network is considered to
1068 # be suitable for libloc consumption...
1071 def _check_parsed_asn(self
, asn
):
1073 Assistive function to filter Autonomous System Numbers not being suitable
1074 for adding to our database. Returns False in such cases, and True otherwise.
1077 for start
, end
in VALID_ASN_RANGES
:
1078 if start
<= asn
and end
>= asn
:
1081 log
.info("Supplied ASN %s out of publicly routable ASN ranges" % asn
)
1084 def _parse_block(self
, block
, source_key
, countries
):
1085 # Get first line to find out what type of block this is
1089 if line
.startswith("aut-num:"):
1090 return self
._parse
_autnum
_block
(block
, source_key
)
1093 if line
.startswith("inet6num:") or line
.startswith("inetnum:"):
1094 return self
._parse
_inetnum
_block
(block
, source_key
, countries
)
1097 elif line
.startswith("organisation:"):
1098 return self
._parse
_org
_block
(block
, source_key
)
1100 def _parse_autnum_block(self
, block
, source_key
):
1104 key
, val
= split_line(line
)
1106 if key
== "aut-num":
1107 m
= re
.match(r
"^(AS|as)(\d+)", val
)
1109 autnum
["asn"] = m
.group(2)
1112 autnum
[key
] = val
.upper()
1114 elif key
== "descr":
1115 # Save the first description line as well...
1116 if not key
in autnum
:
1119 # Skip empty objects
1120 if not autnum
or not "asn" in autnum
:
1123 # Insert a dummy organisation handle into our temporary organisations
1124 # table in case the AS does not have an organisation handle set, but
1125 # has a description (a quirk often observed in APNIC area), so we can
1126 # later display at least some string for this AS.
1127 if not "org" in autnum
:
1128 if "descr" in autnum
:
1129 autnum
["org"] = "LIBLOC-%s-ORGHANDLE" % autnum
.get("asn")
1131 self
.db
.execute("INSERT INTO _organizations(handle, name, source) \
1132 VALUES(%s, %s, %s) ON CONFLICT (handle) DO NOTHING",
1133 autnum
.get("org"), autnum
.get("descr"), source_key
,
1136 log
.warning("ASN %s neither has an organisation handle nor a description line set, omitting" % \
1140 # Insert into database
1141 self
.db
.execute("INSERT INTO _autnums(number, organization, source) \
1142 VALUES(%s, %s, %s) ON CONFLICT (number) DO UPDATE SET \
1143 organization = excluded.organization",
1144 autnum
.get("asn"), autnum
.get("org"), source_key
,
1147 def _parse_inetnum_block(self
, block
, source_key
, countries
):
1151 key
, val
= split_line(line
)
1153 # Filter any inetnum records which are only referring to IP space
1154 # not managed by that specific RIR...
1155 if key
== "netname":
1156 if re
.match(r
"^(ERX-NETBLOCK|(AFRINIC|ARIN|LACNIC|RIPE)-CIDR-BLOCK|IANA-NETBLOCK-\d{1,3}|NON-RIPE-NCC-MANAGED-ADDRESS-BLOCK|STUB-[\d-]{3,}SLASH\d{1,2})", val
.strip()):
1157 log
.debug("Skipping record indicating historic/orphaned data: %s" % val
.strip())
1160 if key
== "inetnum":
1161 start_address
, delim
, end_address
= val
.partition("-")
1163 # Strip any excess space
1164 start_address
, end_address
= start_address
.rstrip(), end_address
.strip()
1166 # Handle "inetnum" formatting in LACNIC DB (e.g. "24.152.8/22" instead of "24.152.8.0/22")
1167 if start_address
and not (delim
or end_address
):
1169 start_address
= ipaddress
.ip_network(start_address
, strict
=False)
1171 start_address
= start_address
.split("/")
1172 ldigits
= start_address
[0].count(".")
1174 # How many octets do we need to add?
1175 # (LACNIC does not seem to have a /8 or greater assigned, so the following should suffice.)
1177 start_address
= start_address
[0] + ".0.0/" + start_address
[1]
1179 start_address
= start_address
[0] + ".0/" + start_address
[1]
1181 log
.warning("Could not recover IPv4 address from line in LACNIC DB format: %s" % line
)
1185 start_address
= ipaddress
.ip_network(start_address
, strict
=False)
1187 log
.warning("Could not parse line in LACNIC DB format: %s" % line
)
1190 # Enumerate first and last IP address of this network
1191 end_address
= start_address
[-1]
1192 start_address
= start_address
[0]
1195 # Convert to IP address
1197 start_address
= ipaddress
.ip_address(start_address
)
1198 end_address
= ipaddress
.ip_address(end_address
)
1200 log
.warning("Could not parse line: %s" % line
)
1203 inetnum
["inetnum"] = list(ipaddress
.summarize_address_range(start_address
, end_address
))
1205 elif key
== "inet6num":
1206 inetnum
[key
] = [ipaddress
.ip_network(val
, strict
=False)]
1208 elif key
== "country":
1211 # Ignore certain country codes
1212 if cc
in IGNORED_COUNTRIES
:
1213 log
.debug("Ignoring country code '%s'" % cc
)
1216 # Translate country codes
1218 cc
= TRANSLATED_COUNTRIES
[cc
]
1222 # Do we know this country?
1223 if not cc
in countries
:
1224 log
.warning("Skipping invalid country code '%s'" % cc
)
1228 inetnum
[key
].append(cc
)
1232 # Parse the geofeed attribute
1233 elif key
== "geofeed":
1234 inetnum
["geofeed"] = val
1236 # Parse geofeed when used as a remark
1237 elif key
== "remarks":
1238 m
= re
.match(r
"^(?:Geofeed)\s+(https://.*)", val
)
1240 inetnum
["geofeed"] = m
.group(1)
1242 # Skip empty objects
1246 # Iterate through all networks enumerated from above, check them for plausibility and insert
1247 # them into the database, if _check_parsed_network() succeeded
1248 for single_network
in inetnum
.get("inet6num") or inetnum
.get("inetnum"):
1249 if not self
._check
_parsed
_network
(single_network
):
1252 # Fetch the countries or use a list with an empty country
1253 countries
= inetnum
.get("country", [None])
1255 # Insert the network into the database but only use the first country code
1256 for cc
in countries
:
1270 ON CONFLICT (network)
1271 DO UPDATE SET country = excluded.country
1272 """, "%s" % single_network
, cc
, [cc
for cc
in countries
if cc
], source_key
,
1275 # If there are more than one country, we will only use the first one
1278 # Update any geofeed information
1279 geofeed
= inetnum
.get("geofeed", None)
1281 self
._parse
_geofeed
(source_key
, geofeed
, single_network
)
1283 def _parse_geofeed(self
, source
, url
, single_network
):
1285 url
= urllib
.parse
.urlparse(url
)
1287 # Make sure that this is a HTTPS URL
1288 if not url
.scheme
== "https":
1289 log
.debug("Geofeed URL is not using HTTPS: %s" % geofeed
)
1292 # Put the URL back together normalized
1295 # Store/update any geofeeds
1308 ON CONFLICT (network) DO
1309 UPDATE SET url = excluded.url""",
1310 "%s" % single_network
, url
, source
,
1313 def _parse_org_block(self
, block
, source_key
):
1317 key
, val
= split_line(line
)
1319 if key
== "organisation":
1320 org
[key
] = val
.upper()
1321 elif key
== "org-name":
1324 # Skip empty objects
1328 self
.db
.execute("INSERT INTO _organizations(handle, name, source) \
1329 VALUES(%s, %s, %s) ON CONFLICT (handle) DO \
1330 UPDATE SET name = excluded.name",
1331 org
.get("organisation"), org
.get("org-name"), source_key
,
1334 def _parse_line(self
, line
, source_key
, validcountries
=None):
1336 if line
.startswith("2"):
1340 if line
.startswith("#"):
1344 registry
, country_code
, type, line
= line
.split("|", 3)
1346 log
.warning("Could not parse line: %s" % line
)
1349 # Skip any unknown protocols
1350 if not type in ("ipv6", "ipv4"):
1351 log
.warning("Unknown IP protocol '%s'" % type)
1354 # Skip any lines that are for stats only or do not have a country
1355 # code at all (avoids log spam below)
1356 if not country_code
or country_code
== '*':
1359 # Skip objects with unknown country codes
1360 if validcountries
and country_code
not in validcountries
:
1361 log
.warning("Skipping line with bogus country '%s': %s" % \
1362 (country_code
, line
))
1366 address
, prefix
, date
, status
, organization
= line
.split("|")
1370 # Try parsing the line without organization
1372 address
, prefix
, date
, status
= line
.split("|")
1374 log
.warning("Unhandled line format: %s" % line
)
1377 # Skip anything that isn't properly assigned
1378 if not status
in ("assigned", "allocated"):
1381 # Cast prefix into an integer
1383 prefix
= int(prefix
)
1385 log
.warning("Invalid prefix: %s" % prefix
)
1388 # Fix prefix length for IPv4
1390 prefix
= 32 - int(math
.log(prefix
, 2))
1392 # Try to parse the address
1394 network
= ipaddress
.ip_network("%s/%s" % (address
, prefix
), strict
=False)
1396 log
.warning("Invalid IP address: %s" % address
)
1399 if not self
._check
_parsed
_network
(network
):
1415 ON CONFLICT (network)
1416 DO UPDATE SET country = excluded.country
1417 """, "%s" % network
, country_code
, [country
], source_key
,
1420 def handle_update_announcements(self
, ns
):
1421 server
= ns
.server
[0]
1423 with self
.db
.transaction():
1424 if server
.startswith("/"):
1425 self
._handle
_update
_announcements
_from
_bird
(server
)
1427 # Purge anything we never want here
1429 -- Delete default routes
1430 DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0';
1432 -- Delete anything that is not global unicast address space
1433 DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3';
1435 -- DELETE "current network" address space
1436 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8';
1438 -- DELETE local loopback address space
1439 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8';
1441 -- DELETE RFC 1918 address space
1442 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8';
1443 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12';
1444 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16';
1446 -- DELETE test, benchmark and documentation address space
1447 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24';
1448 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24';
1449 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15';
1450 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24';
1451 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24';
1453 -- DELETE CGNAT address space (RFC 6598)
1454 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10';
1456 -- DELETE link local address space
1457 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16';
1459 -- DELETE IPv6 to IPv4 (6to4) address space (RFC 3068)
1460 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24';
1461 DELETE FROM announcements WHERE family(network) = 6 AND network <<= '2002::/16';
1463 -- DELETE multicast and reserved address space
1464 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4';
1465 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4';
1467 -- Delete networks that are too small to be in the global routing table
1468 DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48;
1469 DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24;
1471 -- Delete any non-public or reserved ASNs
1472 DELETE FROM announcements WHERE NOT (
1473 (autnum >= 1 AND autnum <= 23455)
1475 (autnum >= 23457 AND autnum <= 64495)
1477 (autnum >= 131072 AND autnum <= 4199999999)
1480 -- Delete everything that we have not seen for 14 days
1481 DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days';
1484 def _handle_update_announcements_from_bird(self
, server
):
1485 # Pre-compile the regular expression for faster searching
1486 route
= re
.compile(b
"^\s(.+?)\s+.+?\[(?:AS(.*?))?.\]$")
1488 log
.info("Requesting routing table from Bird (%s)" % server
)
1490 aggregated_networks
= []
1492 # Send command to list all routes
1493 for line
in self
._bird
_cmd
(server
, "show route"):
1494 m
= route
.match(line
)
1500 # Ignore any header lines with the name of the routing table
1501 elif line
.startswith(b
"Table"):
1506 log
.debug("Could not parse line: %s" % line
.decode())
1510 # Fetch the extracted network and ASN
1511 network
, autnum
= m
.groups()
1513 # Decode into strings
1515 network
= network
.decode()
1517 autnum
= autnum
.decode()
1519 # Collect all aggregated networks
1521 log
.debug("%s is an aggregated network" % network
)
1522 aggregated_networks
.append(network
)
1525 # Insert it into the database
1526 self
.db
.execute("INSERT INTO announcements(network, autnum) \
1527 VALUES(%s, %s) ON CONFLICT (network) DO \
1528 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1532 # Process any aggregated networks
1533 for network
in aggregated_networks
:
1534 log
.debug("Processing aggregated network %s" % network
)
1536 # Run "show route all" for each network
1537 for line
in self
._bird
_cmd
(server
, "show route %s all" % network
):
1538 # Try finding the path
1539 m
= re
.match(b
"\s+BGP\.as_path:.* (\d+) {\d+}$", line
)
1541 # Select the last AS number in the path
1542 autnum
= m
.group(1).decode()
1544 # Insert it into the database
1545 self
.db
.execute("INSERT INTO announcements(network, autnum) \
1546 VALUES(%s, %s) ON CONFLICT (network) DO \
1547 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1551 # We don't need to process any more
1554 def _bird_cmd(self
, socket_path
, command
):
1555 # Connect to the socket
1556 s
= socket
.socket(socket
.AF_UNIX
, socket
.SOCK_STREAM
)
1557 s
.connect(socket_path
)
1559 # Allocate some buffer
1562 log
.debug("Sending Bird command: %s" % command
)
1565 s
.send(b
"%s\n" % command
.encode())
1568 # Fill up the buffer
1569 buffer += s
.recv(4096)
1572 # Search for the next newline
1573 pos
= buffer.find(b
"\n")
1575 # If we cannot find one, we go back and read more data
1579 # Cut after the newline character
1582 # Split the line we want and keep the rest in buffer
1583 line
, buffer = buffer[:pos
], buffer[pos
:]
1585 # Try parsing any status lines
1586 if len(line
) > 4 and line
[:4].isdigit() and line
[4] in (32, 45):
1587 code
, delim
, line
= int(line
[:4]), line
[4], line
[5:]
1589 log
.debug("Received response code %s from bird" % code
)
1599 # Otherwise return the line
1602 def handle_update_geofeeds(self
, ns
):
1604 with self
.db
.transaction():
1605 # Delete all geofeeds which are no longer linked
1616 geofeeds.url = network_geofeeds.url
1635 # Fetch all Geofeeds that require an update
1636 geofeeds
= self
.db
.query("""
1645 updated_at <= CURRENT_TIMESTAMP - INTERVAL '1 week'
1650 # Update all geofeeds
1651 for geofeed
in geofeeds
:
1652 with self
.db
.transaction():
1653 self
._fetch
_geofeed
(geofeed
)
1655 # Delete data from any feeds that did not update in the last two weeks
1656 with self
.db
.transaction():
1661 geofeed_networks.geofeed_id IN (
1669 updated_at <= CURRENT_TIMESTAMP - INTERVAL '2 weeks'
1673 def _fetch_geofeed(self
, geofeed
):
1674 log
.debug("Fetching Geofeed %s" % geofeed
.url
)
1676 with self
.db
.transaction():
1680 f
= self
.downloader
.retrieve(geofeed
.url
, headers
={
1681 "User-Agent" : "location/%s" % location
.__version
__,
1683 # We expect some plain text file in CSV format
1684 "Accept" : "text/csv, text/plain",
1687 # Remove any previous data
1688 self
.db
.execute("DELETE FROM geofeed_networks \
1689 WHERE geofeed_id = %s", geofeed
.id)
1693 # Read the output line by line
1698 line
= line
.decode()
1700 # Ignore any lines we cannot decode
1701 except UnicodeDecodeError:
1702 log
.debug("Could not decode line %s in %s" \
1703 % (lineno
, geofeed
.url
))
1707 line
= line
.rstrip()
1713 # Try to parse the line
1715 fields
= line
.split(",", 5)
1717 log
.debug("Could not parse line: %s" % line
)
1720 # Check if we have enough fields
1722 log
.debug("Not enough fields in line: %s" % line
)
1726 network
, country
, region
, city
, = fields
[:4]
1728 # Try to parse the network
1730 network
= ipaddress
.ip_network(network
, strict
=False)
1732 log
.debug("Could not parse network: %s" % network
)
1735 # Strip any excess whitespace from country codes
1736 country
= country
.strip()
1738 # Make the country code uppercase
1739 country
= country
.upper()
1741 # Check the country code
1743 log
.debug("Empty country code in Geofeed %s line %s" \
1744 % (geofeed
.url
, lineno
))
1747 elif not location
.country_code_is_valid(country
):
1748 log
.debug("Invalid country code in Geofeed %s:%s: %s" \
1749 % (geofeed
.url
, lineno
, country
))
1752 # Write this into the database
1762 VALUES (%s, %s, %s, %s, %s)""",
1770 # Catch any HTTP errors
1771 except urllib
.request
.HTTPError
as e
:
1772 self
.db
.execute("UPDATE geofeeds SET status = %s, error = %s \
1773 WHERE id = %s", e
.code
, "%s" % e
, geofeed
.id)
1775 # Remove any previous data when the feed has been deleted
1777 self
.db
.execute("DELETE FROM geofeed_networks \
1778 WHERE geofeed_id = %s", geofeed
.id)
1780 # Catch any other errors and connection timeouts
1781 except (http
.client
.InvalidURL
, urllib
.request
.URLError
, TimeoutError
) as e
:
1782 log
.debug("Could not fetch URL %s: %s" % (geofeed
.url
, e
))
1784 self
.db
.execute("UPDATE geofeeds SET status = %s, error = %s \
1785 WHERE id = %s", 599, "%s" % e
, geofeed
.id)
1787 # Mark the geofeed as updated
1793 updated_at = CURRENT_TIMESTAMP,
1801 def handle_update_overrides(self
, ns
):
1802 with self
.db
.transaction():
1803 # Drop any previous content
1804 self
.db
.execute("TRUNCATE TABLE autnum_overrides")
1805 self
.db
.execute("TRUNCATE TABLE network_overrides")
1807 for file in ns
.files
:
1808 log
.info("Reading %s..." % file)
1810 with
open(file, "rb") as f
:
1811 for type, block
in read_blocks(f
):
1813 network
= block
.get("net")
1814 # Try to parse and normalise the network
1816 network
= ipaddress
.ip_network(network
, strict
=False)
1817 except ValueError as e
:
1818 log
.warning("Invalid IP network: %s: %s" % (network
, e
))
1821 # Prevent that we overwrite all networks
1822 if network
.prefixlen
== 0:
1823 log
.warning("Skipping %s: You cannot overwrite default" % network
)
1833 is_satellite_provider,
1839 %s, %s, %s, %s, %s, %s
1841 ON CONFLICT (network) DO NOTHING
1844 block
.get("country"),
1845 self
._parse
_bool
(block
, "is-anonymous-proxy"),
1846 self
._parse
_bool
(block
, "is-satellite-provider"),
1847 self
._parse
_bool
(block
, "is-anycast"),
1848 self
._parse
_bool
(block
, "drop"),
1851 elif type == "aut-num":
1852 autnum
= block
.get("aut-num")
1854 # Check if AS number begins with "AS"
1855 if not autnum
.startswith("AS"):
1856 log
.warning("Invalid AS number: %s" % autnum
)
1870 is_satellite_provider,
1876 %s, %s, %s, %s, %s, %s, %s
1878 ON CONFLICT (number) DO NOTHING
1882 block
.get("country"),
1883 self
._parse
_bool
(block
, "is-anonymous-proxy"),
1884 self
._parse
_bool
(block
, "is-satellite-provider"),
1885 self
._parse
_bool
(block
, "is-anycast"),
1886 self
._parse
_bool
(block
, "drop"),
1890 log
.warning("Unsupported type: %s" % type)
1892 def handle_update_feeds(self
, ns
):
1894 Update any third-party feeds
1900 ("AWS-IP-RANGES", self
._import
_aws
_ip
_ranges
, "https://ip-ranges.amazonaws.com/ip-ranges.json"),
1903 ("SPAMHAUS-DROP", self
._import
_spamhaus
_drop
, "https://www.spamhaus.org/drop/drop.txt"),
1904 ("SPAMHAUS-EDROP", self
._import
_spamhaus
_drop
, "https://www.spamhaus.org/drop/edrop.txt"),
1905 ("SPAMHAUS-DROPV6", self
._import
_spamhaus
_drop
, "https://www.spamhaus.org/drop/dropv6.txt"),
1908 ("SPAMHAUS-ASNDROP", self
._import
_spamhaus
_asndrop
, "https://www.spamhaus.org/drop/asndrop.json"),
1911 # Drop any data from feeds that we don't support (any more)
1912 with self
.db
.transaction():
1913 # Fetch the names of all feeds we support
1914 sources
= [name
for name
, *rest
in feeds
]
1916 self
.db
.execute("DELETE FROM autnum_feeds WHERE NOT source = ANY(%s)", sources
)
1917 self
.db
.execute("DELETE FROM network_feeds WHERE NOT source = ANY(%s)", sources
)
1919 # Walk through all feeds
1920 for name
, callback
, url
, *args
in feeds
:
1921 # Skip any feeds that were not requested on the command line
1922 if ns
.feeds
and not name
in ns
.feeds
:
1926 self
._process
_feed
(name
, callback
, url
, *args
)
1928 # Log an error but continue if an exception occurs
1929 except Exception as e
:
1930 log
.error("Error processing feed '%s': %s" % (name
, e
))
1934 return 0 if success
else 1
1936 def _process_feed(self
, name
, callback
, url
, *args
):
1941 f
= self
.downloader
.retrieve(url
)
1943 with self
.db
.transaction():
1944 # Drop any previous content
1945 self
.db
.execute("DELETE FROM autnum_feeds WHERE source = %s", name
)
1946 self
.db
.execute("DELETE FROM network_feeds WHERE source = %s", name
)
1948 # Call the callback to process the feed
1949 return callback(name
, f
, *args
)
1951 def _import_aws_ip_ranges(self
, name
, f
):
1955 # Set up a dictionary for mapping a region name to a country. Unfortunately,
1956 # there seems to be no machine-readable version available of this other than
1957 # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html
1958 # (worse, it seems to be incomplete :-/ ); https://www.cloudping.cloud/endpoints
1959 # was helpful here as well.
1960 aws_region_country_map
= {
1962 "af-south-1" : "ZA",
1965 "il-central-1" : "IL", # Tel Aviv
1968 "ap-northeast-1" : "JP",
1969 "ap-northeast-2" : "KR",
1970 "ap-northeast-3" : "JP",
1972 "ap-south-1" : "IN",
1973 "ap-south-2" : "IN",
1974 "ap-southeast-1" : "SG",
1975 "ap-southeast-2" : "AU",
1976 "ap-southeast-3" : "MY",
1977 "ap-southeast-4" : "AU",
1978 "ap-southeast-5" : "NZ", # Auckland, NZ
1979 "ap-southeast-6" : "AP", # XXX: Precise location not documented anywhere
1982 "ca-central-1" : "CA",
1986 "eu-central-1" : "DE",
1987 "eu-central-2" : "CH",
1988 "eu-north-1" : "SE",
1992 "eu-south-1" : "IT",
1993 "eu-south-2" : "ES",
1996 "me-central-1" : "AE",
1997 "me-south-1" : "BH",
2002 # Undocumented, likely located in Berlin rather than Frankfurt
2003 "eusc-de-east-1" : "DE",
2006 # Collect a list of all networks
2007 prefixes
= feed
.get("ipv6_prefixes", []) + feed
.get("prefixes", [])
2009 for prefix
in prefixes
:
2011 network
= prefix
.get("ipv6_prefix") or prefix
.get("ip_prefix")
2015 network
= ipaddress
.ip_network(network
)
2016 except ValuleError
as e
:
2017 log
.warning("%s: Unable to parse prefix %s" % (name
, network
))
2020 # Sanitize parsed networks...
2021 if not self
._check
_parsed
_network
(network
):
2025 region
= prefix
.get("region")
2031 # Fetch the CC from the dictionary
2033 cc
= aws_region_country_map
[region
]
2035 # If we couldn't find anything, let's try something else...
2036 except KeyError as e
:
2037 # Find anycast networks
2038 if region
== "GLOBAL":
2041 # Everything that starts with us- is probably in the United States
2042 elif region
.startswith("us-"):
2045 # Everything that starts with cn- is probably China
2046 elif region
.startswith("cn-"):
2049 # Log a warning for anything else
2051 log
.warning("%s: Could not determine country code for AWS region %s" \
2069 ON CONFLICT (network, source) DO NOTHING
2070 """, "%s" % network
, name
, cc
, is_anycast
,
2073 def _import_spamhaus_drop(self
, name
, f
):
2075 Import Spamhaus DROP IP feeds
2080 # Walk through all lines
2083 line
= line
.decode("utf-8")
2085 # Strip off any comments
2086 line
, _
, comment
= line
.partition(";")
2088 # Ignore empty lines
2092 # Strip any excess whitespace
2095 # Increment line counter
2100 network
= ipaddress
.ip_network(line
)
2101 except ValueError as e
:
2102 log
.warning("%s: Could not parse network: %s - %s" % (name
, line
, e
))
2106 if not self
._check
_parsed
_network
(network
):
2107 log
.warning("%s: Skipping bogus network: %s" % (name
, network
))
2110 # Insert into the database
2122 )""", "%s" % network
, name
, True,
2125 # Raise an exception if we could not import anything
2127 raise RuntimeError("Received bogus feed %s with no data" % name
)
2129 def _import_spamhaus_asndrop(self
, name
, f
):
2131 Import Spamhaus ASNDROP feed
2135 line
= line
.decode("utf-8")
2139 line
= json
.loads(line
)
2140 except json
.JSONDecodeError
as e
:
2141 log
.warning("%s: Unable to parse JSON object %s: %s" % (name
, line
, e
))
2145 type = line
.get("type")
2148 if type == "metadata":
2152 asn
= line
.get("asn")
2154 # Skip any lines without an ASN
2158 # Filter invalid ASNs
2159 if not self
._check
_parsed
_asn
(asn
):
2160 log
.warning("%s: Skipping bogus ASN %s" % (name
, asn
))
2175 )""", "%s" % asn
, name
, True,
2179 def _parse_bool(block
, key
):
2180 val
= block
.get(key
)
2182 # There is no point to proceed when we got None
2186 # Convert to lowercase
2190 if val
in ("yes", "1"):
2194 if val
in ("no", "0"):
2200 def handle_import_countries(self
, ns
):
2201 with self
.db
.transaction():
2202 # Drop all data that we have
2203 self
.db
.execute("TRUNCATE TABLE countries")
2205 for file in ns
.file:
2207 line
= line
.rstrip()
2209 # Ignore any comments
2210 if line
.startswith("#"):
2214 country_code
, continent_code
, name
= line
.split(maxsplit
=2)
2216 log
.warning("Could not parse line: %s" % line
)
2219 self
.db
.execute("INSERT INTO countries(country_code, name, continent_code) \
2220 VALUES(%s, %s, %s) ON CONFLICT DO NOTHING", country_code
, name
, continent_code
)
2223 def split_line(line
):
2224 key
, colon
, val
= line
.partition(":")
2226 # Strip any excess space
2233 for block
in iterate_over_blocks(f
):
2237 for i
, line
in enumerate(block
):
2238 key
, value
= line
.split(":", 1)
2240 # The key of the first line defines the type
2245 data
[key
] = value
.strip()
2249 def iterate_over_blocks(f
, charsets
=("utf-8", "latin1")):
2253 # Skip commented lines
2254 if line
.startswith(b
"#") or line
.startswith(b
"%"):
2258 for charset
in charsets
:
2260 line
= line
.decode(charset
)
2261 except UnicodeDecodeError:
2266 # Remove any comments at the end of line
2267 line
, hash, comment
= line
.partition("#")
2269 # Strip any whitespace at the end of the line
2270 line
= line
.rstrip()
2272 # If we cut off some comment and the line is empty, we can skip it
2273 if comment
and not line
:
2276 # If the line has some content, keep collecting it
2281 # End the block on an empty line
2288 # Return the last block
2292 def iterate_over_lines(f
):
2295 line
= line
.decode()
2301 # Run the command line interface