# Add all networks
log.info("Writing networks...")
- # Select all known networks
- rows = self.db.query("""
- WITH known_networks AS (
- SELECT network FROM announcements
- UNION
- SELECT network FROM networks
- UNION
- SELECT network FROM network_feeds
- UNION
- SELECT network FROM network_overrides
- UNION
- SELECT network FROM geofeed_networks
- ),
-
- ordered_networks AS (
- SELECT
- known_networks.network AS network,
- announcements.autnum AS autnum,
- networks.country AS country,
+ # Create a new temporary table where we collect
+ # the networks that we are interested in
+ self.db.execute("""
+ CREATE TEMPORARY TABLE
+ n
+ (
+ network inet NOT NULL,
+ autnum integer,
+ country text,
+ is_anonymous_proxy boolean,
+ is_satellite_provider boolean,
+ is_anycast boolean,
+ is_drop boolean
+ )
+ WITH (FILLFACTOR = 50)
+ """)
- -- Must be part of returned values for ORDER BY clause
- masklen(announcements.network) AS sort_a,
- masklen(networks.network) AS sort_b
- FROM
- known_networks
- LEFT JOIN
- announcements ON known_networks.network <<= announcements.network
- LEFT JOIN
- networks ON known_networks.network <<= networks.network
- ORDER BY
- known_networks.network,
- sort_a DESC,
- sort_b DESC
+ # Add all known networks
+ self.db.execute("""
+ INSERT INTO
+ n
+ (
+ network
)
- -- Return a list of those networks enriched with all
- -- other information that we store in the database
SELECT
- DISTINCT ON (network)
- network,
- autnum,
+ network
+ FROM
+ announcements
- -- Country
- COALESCE(
- (
- SELECT
- country
- FROM
- network_overrides overrides
- WHERE
- networks.network <<= overrides.network
- ORDER BY
- masklen(overrides.network) DESC
- LIMIT 1
- ),
- (
- SELECT
- country
- FROM
- autnum_overrides overrides
- WHERE
- networks.autnum = overrides.number
- ),
- (
- SELECT
- country
- FROM
- network_feeds feeds
- WHERE
- networks.network <<= feeds.network
- ORDER BY
- masklen(feeds.network) DESC
- LIMIT 1
- ),
- (
- SELECT
- country
- FROM
- autnum_feeds feeds
- WHERE
- networks.autnum = feeds.number
- ORDER BY
- source
- LIMIT 1
- ),
- (
- SELECT
- geofeed_networks.country AS country
- FROM
- network_geofeeds
+ UNION
- -- Join the data from the geofeeds
- LEFT JOIN
- geofeeds ON network_geofeeds.url = geofeeds.url
- LEFT JOIN
- geofeed_networks ON geofeeds.id = geofeed_networks.geofeed_id
+ SELECT
+ network
+ FROM
+ networks
- -- Check whether we have a geofeed for this network
- WHERE
- networks.network <<= network_geofeeds.network
- AND
- networks.network <<= geofeed_networks.network
+ UNION
- -- Filter for the best result
- ORDER BY
- masklen(geofeed_networks.network) DESC
- LIMIT 1
- ),
- networks.country
- ) AS country,
+ SELECT
+ network
+ FROM
+ network_feeds
- -- Flags
- COALESCE(
- (
- SELECT
- is_anonymous_proxy
- FROM
- network_overrides overrides
- WHERE
- networks.network <<= overrides.network
- ORDER BY
- masklen(overrides.network) DESC
- LIMIT 1
- ),
- (
- SELECT
- is_anonymous_proxy
- FROM
- network_feeds feeds
- WHERE
- networks.network <<= feeds.network
- ORDER BY
- masklen(feeds.network) DESC
- LIMIT 1
- ),
- (
- SELECT
- is_anonymous_proxy
- FROM
- autnum_feeds feeds
- WHERE
- networks.autnum = feeds.number
- ORDER BY
- source
- LIMIT 1
- ),
- (
- SELECT
- is_anonymous_proxy
- FROM
- autnum_overrides overrides
- WHERE
- networks.autnum = overrides.number
- ),
- FALSE
- ) AS is_anonymous_proxy,
- COALESCE(
- (
- SELECT
- is_satellite_provider
- FROM
- network_overrides overrides
- WHERE
- networks.network <<= overrides.network
- ORDER BY
- masklen(overrides.network) DESC
- LIMIT 1
- ),
- (
- SELECT
- is_satellite_provider
- FROM
- network_feeds feeds
- WHERE
- networks.network <<= feeds.network
- ORDER BY
- masklen(feeds.network) DESC
- LIMIT 1
- ),
- (
- SELECT
- is_satellite_provider
- FROM
- autnum_feeds feeds
- WHERE
- networks.autnum = feeds.number
- ORDER BY
- source
- LIMIT 1
- ),
- (
- SELECT
- is_satellite_provider
- FROM
- autnum_overrides overrides
- WHERE
- networks.autnum = overrides.number
- ),
- FALSE
- ) AS is_satellite_provider,
- COALESCE(
- (
- SELECT
- is_anycast
- FROM
- network_overrides overrides
- WHERE
- networks.network <<= overrides.network
- ORDER BY
- masklen(overrides.network) DESC
- LIMIT 1
- ),
+ UNION
+
+ SELECT
+ network
+ FROM
+ network_overrides
+
+ UNION
+
+ SELECT
+ network
+ FROM
+ geofeed_networks
+ """)
+
+ # Create an index to search through networks faster
+ self.db.execute("""
+ CREATE INDEX
+ n_search
+ ON
+ n
+ USING
+ SPGIST(network)
+ """)
+
+ # Analyze n
+ self.db.execute("ANALYZE n")
+
+ # Apply the AS number to all networks
+ self.db.execute("""
+ -- Join all networks together with their most specific announcements
+ WITH announcements AS (
+ SELECT
+ n.network,
+ announcements.autnum,
+
+ -- Sort all merges and number them so
+ -- that we can later select the best one
+ ROW_NUMBER()
+ OVER
(
- SELECT
- is_anycast
- FROM
- network_feeds feeds
- WHERE
- networks.network <<= feeds.network
+ PARTITION BY
+ n.network
ORDER BY
- masklen(feeds.network) DESC
- LIMIT 1
- ),
+ masklen(announcements.network) DESC
+ ) AS row
+ FROM
+ n
+ JOIN
+ announcements
+ ON
+ announcements.network >>= n.network
+ )
+
+ -- Store the result
+ UPDATE
+ n
+ SET
+ autnum = announcements.autnum
+ FROM
+ announcements
+ WHERE
+ announcements.network = n.network
+ AND
+ announcements.row = 1
+ """,
+ )
+
+ # Apply country information
+ self.db.execute("""
+ WITH networks AS (
+ SELECT
+ n.network,
+ networks.country,
+
+ ROW_NUMBER()
+ OVER
(
- SELECT
- is_anycast
- FROM
- autnum_feeds feeds
- WHERE
- networks.autnum = feeds.number
+ PARTITION BY
+ n.network
ORDER BY
- source
- LIMIT 1
- ),
- (
- SELECT
- is_anycast
- FROM
- autnum_overrides overrides
- WHERE
- networks.autnum = overrides.number
- ),
- FALSE
- ) AS is_anycast,
- COALESCE(
+ masklen(networks.network) DESC
+ ) AS row
+ FROM
+ n
+ JOIN
+ networks
+ ON
+ networks.network >>= n.network
+ )
+
+ UPDATE
+ n
+ SET
+ country = networks.country
+ FROM
+ networks
+ WHERE
+ networks.network = n.network
+ AND
+ networks.row = 1
+ """,
+ )
+
+ # Add all country information from Geofeeds
+ self.db.execute("""
+ WITH geofeeds AS (
+ SELECT
+ DISTINCT ON (geofeed_networks.network)
+ geofeed_networks.network,
+ geofeed_networks.country
+ FROM
+ geofeeds
+ JOIN
+ network_geofeeds networks
+ ON
+ geofeeds.url = networks.url
+ JOIN
+ geofeed_networks
+ ON
+ geofeeds.id = geofeed_networks.geofeed_id
+ AND
+ networks.network >>= geofeed_networks.network
+ ),
+
+ networks AS (
+ SELECT
+ n.network,
+ geofeeds.country,
+
+ ROW_NUMBER()
+ OVER
(
- SELECT
- is_drop
- FROM
- network_overrides overrides
- WHERE
- networks.network <<= overrides.network
+ PARTITION BY
+ n.network
ORDER BY
- masklen(overrides.network) DESC
- LIMIT 1
- ),
+ masklen(geofeeds.network) DESC
+ ) AS row
+ FROM
+ n
+ JOIN
+ geofeeds
+ ON
+ geofeeds.network >>= n.network
+ )
+
+ UPDATE
+ n
+ SET
+ country = networks.country
+ FROM
+ networks
+ WHERE
+ networks.network = n.network
+ AND
+ networks.row = 1
+ """,
+ )
+
+ # Apply country and flags from feeds
+ self.db.execute("""
+ WITH networks AS (
+ SELECT
+ n.network,
+ network_feeds.country,
+
+ -- Flags
+ network_feeds.is_anonymous_proxy,
+ network_feeds.is_satellite_provider,
+ network_feeds.is_anycast,
+ network_feeds.is_drop,
+
+ ROW_NUMBER()
+ OVER
(
- SELECT
- is_drop
- FROM
- network_feeds feeds
- WHERE
- networks.network <<= feeds.network
+ PARTITION BY
+ n.network
ORDER BY
- masklen(feeds.network) DESC
- LIMIT 1
- ),
+ masklen(network_feeds.network) DESC
+ ) AS row
+ FROM
+ n
+ JOIN
+ network_feeds
+ ON
+ network_feeds.network >>= n.network
+ )
+
+ UPDATE
+ n
+ SET
+ country =
+ COALESCE(networks.country, n.country),
+
+ is_anonymous_proxy =
+ COALESCE(networks.is_anonymous_proxy, n.is_anonymous_proxy),
+
+ is_satellite_provider =
+ COALESCE(networks.is_satellite_provider, n.is_satellite_provider),
+
+ is_anycast =
+ COALESCE(networks.is_anycast, n.is_anycast),
+
+ is_drop =
+ COALESCE(networks.is_drop, n.is_drop)
+ FROM
+ networks
+ WHERE
+ networks.network = n.network
+ AND
+ networks.row = 1
+ """,
+ )
+
+ # Apply country and flags from AS feeds
+ self.db.execute("""
+ WITH networks AS (
+ SELECT
+ n.network,
+ autnum_feeds.country,
+
+ -- Flags
+ autnum_feeds.is_anonymous_proxy,
+ autnum_feeds.is_satellite_provider,
+ autnum_feeds.is_anycast,
+ autnum_feeds.is_drop
+ FROM
+ n
+ JOIN
+ autnum_feeds
+ ON
+ autnum_feeds.number = n.autnum
+ )
+
+ UPDATE
+ n
+ SET
+ country =
+ COALESCE(networks.country, n.country),
+
+ is_anonymous_proxy =
+ COALESCE(networks.is_anonymous_proxy, n.is_anonymous_proxy),
+
+ is_satellite_provider =
+ COALESCE(networks.is_satellite_provider, n.is_satellite_provider),
+
+ is_anycast =
+ COALESCE(networks.is_anycast, n.is_anycast),
+
+ is_drop =
+ COALESCE(networks.is_drop, n.is_drop)
+ FROM
+ networks
+ WHERE
+ networks.network = n.network
+ """)
+
+ # Apply network overrides
+ self.db.execute("""
+ WITH networks AS (
+ SELECT
+ n.network,
+ network_overrides.country,
+
+ -- Flags
+ network_overrides.is_anonymous_proxy,
+ network_overrides.is_satellite_provider,
+ network_overrides.is_anycast,
+ network_overrides.is_drop,
+
+ ROW_NUMBER()
+ OVER
(
- SELECT
- is_drop
- FROM
- autnum_feeds feeds
- WHERE
- networks.autnum = feeds.number
+ PARTITION BY
+ n.network
ORDER BY
- source
- LIMIT 1
- ),
- (
- SELECT
- is_drop
- FROM
- autnum_overrides overrides
- WHERE
- networks.autnum = overrides.number
- ),
- FALSE
- ) AS is_drop
+ masklen(network_overrides.network) DESC
+ ) AS row
+ FROM
+ n
+ JOIN
+ network_overrides
+ ON
+ network_overrides.network >>= n.network
+ )
+
+ UPDATE
+ n
+ SET
+ country =
+ COALESCE(networks.country, n.country),
+
+ is_anonymous_proxy =
+ COALESCE(networks.is_anonymous_proxy, n.is_anonymous_proxy),
+
+ is_satellite_provider =
+ COALESCE(networks.is_satellite_provider, n.is_satellite_provider),
+
+ is_anycast =
+ COALESCE(networks.is_anycast, n.is_anycast),
+
+ is_drop =
+ COALESCE(networks.is_drop, n.is_drop)
FROM
- ordered_networks networks
+ networks
+ WHERE
+ networks.network = n.network
+ AND
+ networks.row = 1
+ """)
+
+ # Apply AS overrides
+ self.db.execute("""
+ WITH networks AS (
+ SELECT
+ n.network,
+ autnum_overrides.country,
+
+ -- Flags
+ autnum_overrides.is_anonymous_proxy,
+ autnum_overrides.is_satellite_provider,
+ autnum_overrides.is_anycast,
+ autnum_overrides.is_drop
+ FROM
+ n
+ JOIN
+ autnum_overrides
+ ON
+ autnum_overrides.number = n.autnum
+ )
+
+ UPDATE
+ n
+ SET
+ country =
+ COALESCE(networks.country, n.country),
+
+ is_anonymous_proxy =
+ COALESCE(networks.is_anonymous_proxy, n.is_anonymous_proxy),
+
+ is_satellite_provider =
+ COALESCE(networks.is_satellite_provider, n.is_satellite_provider),
+
+ is_anycast =
+ COALESCE(networks.is_anycast, n.is_anycast),
+
+ is_drop =
+ COALESCE(networks.is_drop, n.is_drop)
+ FROM
+ networks
+ WHERE
+ networks.network = n.network
+ """)
+
+ # Here we could remove some networks that we no longer need, but since we
+ # already have implemented our deduplication/merge algorithm this would not
+ # be necessary.
+
+ # Export the entire temporary table
+ rows = self.db.query("""
+ SELECT
+ *
+ FROM
+ n
+ ORDER BY
+ network
""")
for row in rows:
# Add all countries
log.info("Writing countries...")
- rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
+
+ # Select all countries
+ rows = self.db.query("""
+ SELECT
+ *
+ FROM
+ countries
+ ORDER BY
+ country_code
+ """,
+ )
for row in rows:
c = writer.add_country(row.country_code)