src/python/importer.py

   1 #!/usr/bin/python3
   2 ###############################################################################
   3 #                                                                             #
   4 # libloc - A library to determine the location of someone on the Internet     #
   5 #                                                                             #
   6 # Copyright (C) 2020 IPFire Development Team <info@ipfire.org>                #
   7 #                                                                             #
   8 # This library is free software; you can redistribute it and/or               #
   9 # modify it under the terms of the GNU Lesser General Public                  #
  10 # License as published by the Free Software Foundation; either                #
  11 # version 2.1 of the License, or (at your option) any later version.          #
  12 #                                                                             #
  13 # This library is distributed in the hope that it will be useful,             #
  14 # but WITHOUT ANY WARRANTY; without even the implied warranty of              #
  15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU           #
  16 # Lesser General Public License for more details.                             #
  17 #                                                                             #
  18 ###############################################################################
  19
  20 import gzip
  21 import logging
  22 import urllib.request
  23
  24 # Initialise logging
  25 log = logging.getLogger("location.importer")
  26 log.propagate = 1
  27
  28 WHOIS_SOURCES = (
  29         # African Network Information Centre
  30         "https://ftp.afrinic.net/pub/pub/dbase/afrinic.db.gz",
  31
  32         # Asia Pacific Network Information Centre
  33         #"https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz",
  34         #"https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz",
  35         #"https://ftp.apnic.net/apnic/whois/apnic.db.route6.gz",
  36         #"https://ftp.apnic.net/apnic/whois/apnic.db.route.gz",
  37         "https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz",
  38         "https://ftp.apnic.net/apnic/whois/apnic.db.organisation.gz",
  39
  40         # American Registry for Internet Numbers
  41         # XXX there is nothing useful for us in here
  42         #"https://ftp.arin.net/pub/rr/arin.db",
  43
  44         # Latin America and Caribbean Network Information Centre
  45         # XXX ???
  46
  47         # Réseaux IP Européens
  48         #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz",
  49         #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz",
  50         #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route6.gz",
  51         #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route.gz",
  52         "https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz",
  53         "https://ftp.ripe.net/ripe/dbase/split/ripe.db.organisation.gz",
  54 )
  55
  56 EXTENDED_SOURCES = (
  57         # African Network Information Centre
  58         "https://ftp.afrinic.net/pub/stats/afrinic/delegated-afrinic-extended-latest",
  59
  60         # Asia Pacific Network Information Centre
  61         "https://ftp.apnic.net/apnic/stats/apnic/delegated-apnic-extended-latest",
  62
  63         # American Registry for Internet Numbers
  64         "https://ftp.arin.net/pub/stats/arin/delegated-arin-extended-latest",
  65
  66         # Latin America and Caribbean Network Information Centre
  67         "http://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-extended-latest",
  68
  69         # Réseaux IP Européens
  70         "https://ftp.ripe.net/pub/stats/ripencc/delegated-ripencc-extended-latest",
  71 )
  72
  73 class Downloader(object):
  74         def __init__(self):
  75                 self.proxy = None
  76
  77         def set_proxy(self, url):
  78                 """
  79                         Sets a HTTP proxy that is used to perform all requests
  80                 """
  81                 log.info("Using proxy %s" % url)
  82                 self.proxy = url
  83
  84         def request(self, url, data=None, return_blocks=False):
  85                 req = urllib.request.Request(url, data=data)
  86
  87                 # Configure proxy
  88                 if self.proxy:
  89                         req.set_proxy(self.proxy, "http")
  90
  91                 return DownloaderContext(self, req, return_blocks=return_blocks)
  92
  93
  94 class DownloaderContext(object):
  95         def __init__(self, downloader, request, return_blocks=False):
  96                 self.downloader = downloader
  97                 self.request = request
  98
  99                 # Should we return one block or a single line?
 100                 self.return_blocks = return_blocks
 101
 102                 # Save the response object
 103                 self.response = None
 104
 105         def __enter__(self):
 106                 log.info("Retrieving %s..." % self.request.full_url)
 107
 108                 # Send request
 109                 self.response = urllib.request.urlopen(self.request)
 110
 111                 # Log the response headers
 112                 log.debug("Response Headers:")
 113                 for header in self.headers:
 114                         log.debug("     %s: %s" % (header, self.get_header(header)))
 115
 116                 return self
 117
 118         def __exit__(self, type, value, traceback):
 119                 pass
 120
 121         def __iter__(self):
 122                 """
 123                         Makes the object iterable by going through each block
 124                 """
 125                 if self.return_blocks:
 126                         return iterate_over_blocks(self.body)
 127
 128                 return iterate_over_lines(self.body)
 129
 130         @property
 131         def headers(self):
 132                 if self.response:
 133                         return self.response.headers
 134
 135         def get_header(self, name):
 136                 if self.headers:
 137                         return self.headers.get(name)
 138
 139         @property
 140         def body(self):
 141                 """
 142                         Returns a file-like object with the decoded content
 143                         of the response.
 144                 """
 145                 content_type = self.get_header("Content-Type")
 146
 147                 # Decompress any gzipped response on the fly
 148                 if content_type in ("application/x-gzip", "application/gzip"):
 149                         return gzip.GzipFile(fileobj=self.response, mode="rb")
 150
 151                 # Return the response by default
 152                 return self.response
 153
 154
 155 def read_blocks(f):
 156         for block in iterate_over_blocks(f):
 157                 type = None
 158                 data = {}
 159
 160                 for i, line in enumerate(block):
 161                         key, value = line.split(":", 1)
 162
 163                         # The key of the first line defines the type
 164                         if i == 0:
 165                                 type = key
 166
 167                         # Store value
 168                         data[key] = value.strip()
 169
 170                 yield type, data
 171
 172 def iterate_over_blocks(f, charsets=("utf-8", "latin1")):
 173         block = []
 174
 175         for line in f:
 176                 # Convert to string
 177                 for charset in charsets:
 178                         try:
 179                                 line = line.decode(charset)
 180                         except UnicodeDecodeError:
 181                                 continue
 182                         else:
 183                                 break
 184
 185                 # Skip commented lines
 186                 if line.startswith("#") or line.startswith("%"):
 187                         continue
 188
 189                 # Strip line-endings
 190                 line = line.rstrip()
 191
 192                 # Remove any comments at the end of line
 193                 line, hash, comment = line.partition("#")
 194
 195                 if comment:
 196                         # Strip any whitespace before the comment
 197                         line = line.rstrip()
 198
 199                         # If the line is now empty, we move on
 200                         if not line:
 201                                 continue
 202
 203                 if line:
 204                         block.append(line)
 205                         continue
 206
 207                 # End the block on an empty line
 208                 if block:
 209                         yield block
 210
 211                 # Reset the block
 212                 block = []
 213
 214         # Return the last block
 215         if block:
 216                 yield block
 217
 218
 219 def iterate_over_lines(f):
 220         for line in f:
 221                 # Decode the line
 222                 line = line.decode()
 223
 224                 # Strip the ending
 225                 yield line.rstrip()