]> git.ipfire.org Git - location/location-database.git/blob - tools/downloader.py
d6d2744da01d48a89dfd48ea44911e478c57e194
[location/location-database.git] / tools / downloader.py
1 #!/usr/bin/python3
2 ###############################################################################
3 # #
4 # location-database - A database to determine someone's #
5 # location on the Internet #
6 # Copyright (C) 2018 Michael Tremer #
7 # #
8 # This program is free software: you can redistribute it and/or modify #
9 # it under the terms of the GNU General Public License as published by #
10 # the Free Software Foundation, either version 3 of the License, or #
11 # (at your option) any later version. #
12 # #
13 # This program is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16 # GNU General Public License for more details. #
17 # #
18 # You should have received a copy of the GNU General Public License #
19 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
20 # #
21 ###############################################################################
22
23 import gzip
24 import logging
25 import urllib.request
26
27 # Setup logger
28 log = logging.getLogger("downloader")
29 log.propagate = 1
30
31 class Downloader(object):
32 USER_AGENT = "location-database/1.0"
33
34 def __init__(self):
35 self.proxy = None
36
37 def set_proxy(self, url):
38 """
39 Sets a HTTP proxy that is used to perform all requests
40 """
41 log.info("Using proxy %s" % url)
42 self.proxy = url
43
44 def request(self, url, data=None):
45 req = urllib.request.Request(url, data=data)
46
47 # Configure proxy
48 if self.proxy:
49 req.set_proxy(self.proxy, "http")
50
51 # Set User-Agent
52 if self.USER_AGENT:
53 req.add_header("User-Agent", self.USER_AGENT)
54
55 return DownloaderContext(self, req)
56
57
58 class DownloaderContext(object):
59 def __init__(self, downloader, request):
60 self.downloader = downloader
61 self.request = request
62
63 # Save the response object
64 self.response = None
65
66 def __enter__(self):
67 log.info("Retrieving %s..." % self.request.full_url)
68
69 # Send request
70 self.response = urllib.request.urlopen(self.request)
71
72 # Log the response headers
73 log.debug("Response Headers:")
74 for header in self.headers:
75 log.debug(" %s: %s" % (header, self.get_header(header)))
76
77 return self
78
79 def __exit__(self, type, value, traceback):
80 pass
81
82 def __iter__(self):
83 """
84 Makes the object iterable by going through each block
85 """
86 block = []
87
88 for line in self.body:
89 # Convert to string
90 for charset in ("utf-8", "latin1"):
91 try:
92 line = line.decode(charset)
93 except UnicodeDecodeError:
94 continue
95 else:
96 break
97
98 # Strip line-endings
99 line = line.rstrip()
100
101 if line:
102 block.append(line)
103 continue
104
105 # End the block on an empty line
106 yield block
107
108 # Reset the block
109 block = []
110
111 @property
112 def headers(self):
113 if self.response:
114 return self.response.headers
115
116 def get_header(self, name):
117 if self.headers:
118 return self.headers.get(name)
119
120 @property
121 def body(self):
122 """
123 Returns a file-like object with the decoded content
124 of the response.
125 """
126 content_type = self.get_header("Content-Type")
127
128 # Decompress any gzipped response on the fly
129 if content_type in ("application/x-gzip", "application/gzip"):
130 return gzip.GzipFile(fileobj=self.response, mode="rb")
131
132 # Return the response by default
133 return self.response
134
135
136 if __name__ == "__main__":
137 import sys
138
139 # Enable debug logging
140 logging.basicConfig(level=logging.DEBUG)
141
142 d = Downloader()
143
144 for url in sys.argv[1:]:
145 print("Downloading %s..." % url)
146
147 with d.request(url) as r:
148 for block in r:
149 for line in block:
150 print(line)
151 print()