]>
Commit | Line | Data |
---|---|---|
28e09035 MT |
1 | #!/usr/bin/python3 |
2 | ||
3 | import datetime | |
4 | import json | |
5 | import urllib.parse | |
6 | ||
7 | from . import misc | |
8 | from .decorators import * | |
9 | ||
10 | INVALID_REFERRERS = ( | |
11 | # Broken schema | |
12 | "://", | |
13 | ||
14 | # Localhost | |
15 | "http://localhost", | |
16 | "https://localhost", | |
17 | "http://127.0.0.1", | |
18 | "https://127.0.0.1", | |
19 | ) | |
20 | ||
21 | class Analytics(misc.Object): | |
22 | def log_unique_visit(self, address, referrer, country_code=None, user_agent=None, | |
23 | host=None, uri=None, source=None, medium=None, campaign=None, content=None, | |
24 | term=None, q=None): | |
25 | """ | |
26 | Logs a unique visit to this a page | |
27 | """ | |
28 | asn, query_args = None, None | |
29 | ||
30 | if referrer: | |
31 | # Parse referrer | |
32 | url = urllib.parse.urlparse(referrer) | |
33 | ||
34 | # Remove everything after ? and # | |
35 | referrer = "%s://%s%s" % (url.scheme, url.netloc, url.path) | |
36 | ||
37 | # Drop anything that isn't valid | |
38 | for invalid_referrer in INVALID_REFERRERS: | |
39 | if referrer.startswith(invalid_referrer): | |
40 | referrer = None | |
41 | break | |
42 | ||
43 | # Fetch the ASN | |
44 | if address: | |
45 | asn = address.asn | |
46 | ||
47 | # Strip URI | |
48 | if uri: | |
49 | uri, _, query_args = uri.partition("?") | |
50 | ||
51 | # Parse query arguments | |
52 | if query_args: | |
53 | query_args = urllib.parse.parse_qs(query_args) | |
54 | ||
55 | # Mark bots | |
56 | if user_agent: | |
57 | bot = "bot" in user_agent.lower() | |
58 | ||
59 | # Split q | |
60 | if q: | |
61 | q = q.split() | |
62 | ||
63 | self.db.execute(""" | |
64 | INSERT INTO | |
65 | analytics_unique_visits | |
66 | ( | |
67 | host, | |
68 | uri, | |
69 | query_args, | |
70 | country_code, | |
71 | asn, | |
72 | referrer, | |
73 | user_agent, | |
74 | q, | |
75 | bot, | |
76 | source, | |
77 | medium, | |
78 | campaign, | |
79 | content, | |
80 | term | |
81 | ) | |
82 | VALUES | |
83 | ( | |
84 | %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s | |
85 | ) | |
86 | """, | |
87 | host, uri, json.dumps(query_args or {}), country_code, asn, referrer or "", | |
88 | user_agent, q, bot, source or "", medium or "", campaign or "", content or "", | |
89 | term or "", | |
90 | ) | |
672be316 | 91 | |
ee3fe069 MT |
92 | def get_total_page_views(self, host, since=None): |
93 | # Make since an absolute timestamp | |
94 | if since and isinstance(since, datetime.timedelta): | |
95 | since = datetime.datetime.utcnow() - since | |
96 | ||
97 | if since: | |
98 | res = self.db.get(""" | |
99 | SELECT | |
100 | COUNT(*) AS c | |
101 | FROM | |
102 | analytics_unique_visits | |
103 | WHERE | |
104 | host = %s | |
105 | AND | |
106 | created_at >= %s | |
107 | """, host, since, | |
108 | ) | |
109 | else: | |
110 | res = self.db.get(""" | |
111 | SELECT | |
112 | COUNT(*) AS c | |
113 | FROM | |
114 | analytics_unique_visits | |
115 | WHERE | |
116 | host = %s | |
117 | """, host, | |
118 | ) | |
119 | ||
120 | if res and res.c: | |
121 | return res.c | |
122 | ||
123 | return 0 | |
124 | ||
125 | def get_page_views(self, host, uri, since=None): | |
672be316 MT |
126 | # Make since an absolute timestamp |
127 | if since and isinstance(since, datetime.timedelta): | |
128 | since = datetime.datetime.utcnow() - since | |
129 | ||
130 | if since: | |
131 | res = self.db.get(""" | |
132 | SELECT | |
133 | COUNT(*) AS c | |
134 | FROM | |
135 | analytics_unique_visits | |
136 | WHERE | |
137 | host = %s | |
138 | AND | |
139 | uri = %s | |
140 | AND | |
141 | created_at >= %s | |
142 | """, host, uri, since, | |
143 | ) | |
144 | else: | |
145 | res = self.db.get(""" | |
146 | SELECT | |
147 | COUNT(*) AS c | |
148 | FROM | |
149 | analytics_unique_visits | |
150 | WHERE | |
151 | host = %s | |
152 | AND | |
153 | uri = %s | |
154 | """, host, uri, | |
155 | ) | |
156 | ||
157 | if res and res.c: | |
158 | return res.c | |
159 | ||
160 | return 0 | |
55ed268d MT |
161 | |
162 | # Popular Pages | |
163 | ||
164 | def get_most_popular_docs_pages(self, host, since=None, offset=None, limit=None): | |
165 | # Make since an absolute timestamp | |
166 | if since and isinstance(since, datetime.timedelta): | |
167 | since = datetime.datetime.utcnow() - since | |
168 | ||
169 | pages = self.backend.wiki._get_pages(""" | |
170 | SELECT | |
171 | wiki.*, | |
172 | COUNT(*) AS _c | |
173 | FROM | |
174 | wiki_current | |
175 | LEFT JOIN | |
176 | wiki ON wiki_current.id = wiki.id | |
177 | LEFT JOIN | |
178 | analytics_unique_visits | |
179 | ON (CASE WHEN wiki.page = '/' THEN '/docs' | |
180 | ELSE '/docs' || wiki.page END) = analytics_unique_visits.uri | |
181 | WHERE | |
182 | host = %s | |
183 | AND | |
184 | uri LIKE '/docs%%' | |
185 | GROUP BY | |
186 | wiki.id | |
187 | ORDER BY | |
188 | _c DESC | |
189 | LIMIT | |
190 | %s | |
191 | OFFSET | |
192 | %s | |
193 | """, host, limit, offset, | |
194 | ) | |
195 | ||
196 | return list(pages) | |
197 | ||
198 | # Search | |
199 | ||
200 | def get_search_queries(self, host, uri, limit=None): | |
201 | res = self.db.query(""" | |
202 | SELECT | |
203 | q, | |
204 | COUNT(*) AS c | |
205 | FROM | |
206 | analytics_unique_visits | |
207 | WHERE | |
208 | host = %s | |
209 | AND | |
210 | uri = %s | |
211 | AND | |
212 | q IS NOT NULL | |
213 | GROUP BY | |
214 | q | |
215 | LIMIT | |
216 | %s | |
217 | """, host, uri, limit, | |
218 | ) | |
219 | ||
220 | return { " ".join(row.q) : row.c for row in res } |