From: Michael Tremer Date: Sun, 7 Jan 2024 14:50:31 +0000 (+0000) Subject: analytics: Record some page views X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=28e090352cc56de206f40f7d61f9dc11396ab9c0;p=ipfire.org.git analytics: Record some page views Signed-off-by: Michael Tremer --- diff --git a/Makefile.am b/Makefile.am index 37a6c084..b59bbfa4 100644 --- a/Makefile.am +++ b/Makefile.am @@ -49,6 +49,7 @@ CLEANFILES += \ backend_PYTHON = \ src/backend/__init__.py \ src/backend/accounts.py \ + src/backend/analytics.py \ src/backend/asterisk.py \ src/backend/base.py \ src/backend/blog.py \ diff --git a/src/backend/analytics.py b/src/backend/analytics.py new file mode 100644 index 00000000..cfecbd78 --- /dev/null +++ b/src/backend/analytics.py @@ -0,0 +1,90 @@ +#!/usr/bin/python3 + +import datetime +import json +import urllib.parse + +from . import misc +from .decorators import * + +INVALID_REFERRERS = ( + # Broken schema + "://", + + # Localhost + "http://localhost", + "https://localhost", + "http://127.0.0.1", + "https://127.0.0.1", +) + +class Analytics(misc.Object): + def log_unique_visit(self, address, referrer, country_code=None, user_agent=None, + host=None, uri=None, source=None, medium=None, campaign=None, content=None, + term=None, q=None): + """ + Logs a unique visit to this a page + """ + asn, query_args = None, None + + if referrer: + # Parse referrer + url = urllib.parse.urlparse(referrer) + + # Remove everything after ? and # + referrer = "%s://%s%s" % (url.scheme, url.netloc, url.path) + + # Drop anything that isn't valid + for invalid_referrer in INVALID_REFERRERS: + if referrer.startswith(invalid_referrer): + referrer = None + break + + # Fetch the ASN + if address: + asn = address.asn + + # Strip URI + if uri: + uri, _, query_args = uri.partition("?") + + # Parse query arguments + if query_args: + query_args = urllib.parse.parse_qs(query_args) + + # Mark bots + if user_agent: + bot = "bot" in user_agent.lower() + + # Split q + if q: + q = q.split() + + self.db.execute(""" + INSERT INTO + analytics_unique_visits + ( + host, + uri, + query_args, + country_code, + asn, + referrer, + user_agent, + q, + bot, + source, + medium, + campaign, + content, + term + ) + VALUES + ( + %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s + ) + """, + host, uri, json.dumps(query_args or {}), country_code, asn, referrer or "", + user_agent, q, bot, source or "", medium or "", campaign or "", content or "", + term or "", + ) diff --git a/src/backend/base.py b/src/backend/base.py index 00dcefcb..7e86251a 100644 --- a/src/backend/base.py +++ b/src/backend/base.py @@ -9,6 +9,7 @@ import tornado.httpclient from . import accounts from . import asterisk +from . import analytics from . import blog from . import bugzilla from . import cache @@ -65,6 +66,7 @@ class Backend(object): # Initialize backend modules. self.accounts = accounts.Accounts(self) + self.analytics = analytics.Analytics(self) self.bugzilla = bugzilla.Bugzilla(self) self.fireinfo = fireinfo.Fireinfo(self) self.iuse = iuse.IUse(self) diff --git a/src/web/auth.py b/src/web/auth.py index 023ca6e8..4a4c88a4 100644 --- a/src/web/auth.py +++ b/src/web/auth.py @@ -31,7 +31,7 @@ class AuthenticationMixin(object): self.clear_cookie("session_id") -class LoginHandler(AuthenticationMixin, base.BaseHandler): +class LoginHandler(base.AnalyticsMixin, AuthenticationMixin, base.BaseHandler): def get(self): next = self.get_argument("next", None) @@ -74,7 +74,7 @@ class LogoutHandler(AuthenticationMixin, base.BaseHandler): self.redirect("/") -class JoinHandler(base.BaseHandler): +class JoinHandler(base.AnalyticsMixin, base.BaseHandler): def get(self): # Redirect logged in users away if self.current_user: diff --git a/src/web/base.py b/src/web/base.py index 57dd07dd..fec01bdf 100644 --- a/src/web/base.py +++ b/src/web/base.py @@ -16,6 +16,9 @@ import tornado.web from ..decorators import * from .. import util +# Setup logging +log = logging.getLogger(__name__) + class ratelimit(object): """ A decorator class which limits how often a function can be called @@ -155,6 +158,17 @@ class BaseHandler(tornado.web.RequestHandler): if self.current_address: return self.current_address.country_code + @property + def user_agent(self): + """ + Returns the HTTP user agent + """ + return self.request.headers.get("User-Agent", None) + + @property + def referrer(self): + return self.request.headers.get("Referer", None) + def get_argument_int(self, *args, **kwargs): arg = self.get_argument(*args, **kwargs) @@ -279,6 +293,60 @@ class BaseHandler(tornado.web.RequestHandler): return self.backend.releases +class AnalyticsMixin(object): + def on_finish(self): + """ + Collect some data about this request + """ + # Log something + log.debug("Analytics for %s:" % self) + log.debug(" User-Agent: %s" % self.user_agent) + log.debug(" Referrer : %s" % self.referrer) + + # Do nothing if this requst should be ignored + if self._ignore_analytics(): + return + + with self.db.transaction(): + # Log unique visits + self.backend.analytics.log_unique_visit( + address=self.current_address, + referrer=self.referrer, + country_code=self.current_country_code, + user_agent=self.user_agent, + host=self.request.host, + uri=self.request.uri, + + # UTMs + source=self.get_argument("utm_source", None), + medium=self.get_argument("utm_medium", None), + campaign=self.get_argument("utm_campaign", None), + content=self.get_argument("utm_content", None), + term=self.get_argument("utm_term", None), + + # Search queries + q=self.get_argument("q", None), + ) + + def _ignore_analytics(self): + """ + Checks if this request should be ignored + """ + ignored_user_agents = ( + "LWP::Simple", + "check_http", + ) + + # Only log GET requests + if not self.request.method == "GET": + return True + + # Ignore everything from matching user agents + for ignored_user_agent in ignored_user_agents: + if self.user_agent.startswith(ignored_user_agent): + return True + + class APIHandler(BaseHandler): def check_xsrf_cookie(self): """ diff --git a/src/web/blog.py b/src/web/blog.py index 5d7dfcae..d5e0d0da 100644 --- a/src/web/blog.py +++ b/src/web/blog.py @@ -8,7 +8,7 @@ import tornado.web from . import base from . import ui_modules -class IndexHandler(base.BaseHandler): +class IndexHandler(base.AnalyticsMixin, base.BaseHandler): def get(self): latest_post = None @@ -29,7 +29,7 @@ class IndexHandler(base.BaseHandler): self.render("blog/index.html", q=q, posts=posts, latest_post=latest_post) -class FeedHandler(base.BaseHandler): +class FeedHandler(base.AnalyticsMixin, base.BaseHandler): def get(self): posts = self.backend.blog.get_newest(limit=10) if not posts: @@ -46,7 +46,7 @@ class FeedHandler(base.BaseHandler): now=datetime.datetime.now()) -class PostHandler(base.BaseHandler): +class PostHandler(base.AnalyticsMixin, base.BaseHandler): def get(self, slug): post = self.backend.blog.get_by_slug(slug) if not post: @@ -117,7 +117,7 @@ class DraftsHandler(base.BaseHandler): self.render("blog/drafts.html", drafts=drafts) -class YearHandler(base.BaseHandler): +class YearHandler(base.AnalyticsMixin, base.BaseHandler): def get(self, year): posts = self.backend.blog.get_by_year(year) if not posts: diff --git a/src/web/docs.py b/src/web/docs.py index 1570a3d4..39f65476 100644 --- a/src/web/docs.py +++ b/src/web/docs.py @@ -6,7 +6,7 @@ import tornado.web from . import base from . import ui_modules -class PageHandler(base.BaseHandler): +class PageHandler(base.AnalyticsMixin, base.BaseHandler): @property def action(self): return self.get_argument("action", None) @@ -104,7 +104,7 @@ class FilesHandler(base.BaseHandler): self.render("docs/files/index.html", path=path, files=files) -class FileHandler(base.BaseHandler): +class FileHandler(base.AnalyticsMixin, base.BaseHandler): @property def action(self): return self.get_argument("action", None) @@ -357,7 +357,7 @@ class DeleteFileHandler(base.BaseHandler): self.redirect("/docs%s/_files" % file.path) -class SearchHandler(base.BaseHandler): +class SearchHandler(base.AnalyticsMixin, base.BaseHandler): @base.ratelimit(minutes=5, requests=25) def get(self): q = self.get_argument("q") @@ -369,19 +369,19 @@ class SearchHandler(base.BaseHandler): self.render("docs/search-results.html", q=q, pages=pages) -class RecentChangesHandler(base.BaseHandler): +class RecentChangesHandler(base.AnalyticsMixin, base.BaseHandler): def get(self): recent_changes = self.backend.wiki.get_recent_changes(self.current_user, limit=50) self.render("docs/recent-changes.html", recent_changes=recent_changes) -class TreeHandler(base.BaseHandler): +class TreeHandler(base.AnalyticsMixin, base.BaseHandler): def get(self): self.render("docs/tree.html", pages=self.backend.wiki) -class WatchlistHandler(base.BaseHandler): +class WatchlistHandler(base.AnalyticsMixin, base.BaseHandler): @tornado.web.authenticated def get(self): pages = self.backend.wiki.get_watchlist(self.current_user) diff --git a/src/web/donate.py b/src/web/donate.py index bae21574..fb243708 100644 --- a/src/web/donate.py +++ b/src/web/donate.py @@ -12,7 +12,7 @@ SKUS = { } DEFAULT_SKU = "IPFIRE-DONATION" -class DonateHandler(base.BaseHandler): +class DonateHandler(base.AnalyticsMixin, base.BaseHandler): def get(self): if self.current_user: country = self.current_user.country_code diff --git a/src/web/downloads.py b/src/web/downloads.py index a0fb2f02..de1c79a1 100644 --- a/src/web/downloads.py +++ b/src/web/downloads.py @@ -5,7 +5,7 @@ import tornado.web from . import base -class IndexHandler(base.BaseHandler): +class IndexHandler(base.AnalyticsMixin, base.BaseHandler): def get(self): release = self.backend.releases.get_latest() if not release: @@ -15,7 +15,7 @@ class IndexHandler(base.BaseHandler): self.redirect("/downloads/%s" % release.slug) -class MirrorsHandler(base.BaseHandler): +class MirrorsHandler(base.AnalyticsMixin, base.BaseHandler): def get(self): mirrors = self.backend.mirrors.get_by_countries() if not mirrors: @@ -24,7 +24,7 @@ class MirrorsHandler(base.BaseHandler): self.render("downloads/mirrors.html", mirrors=mirrors) -class ReleaseHandler(base.BaseHandler): +class ReleaseHandler(base.AnalyticsMixin, base.BaseHandler): def get(self, slug): release = self.backend.releases.get_by_sname(slug) if not release: @@ -33,12 +33,12 @@ class ReleaseHandler(base.BaseHandler): self.render("downloads/release.html", release=release) -class ThankYouHandler(base.BaseHandler): +class ThankYouHandler(base.AnalyticsMixin, base.BaseHandler): def get(self): self.render("downloads/thank-you.html") -class FileHandler(base.BaseHandler): +class FileHandler(base.AnalyticsMixin, base.BaseHandler): def prepare(self): self.set_header("Pragma", "no-cache") diff --git a/src/web/fireinfo.py b/src/web/fireinfo.py index 8b0f8e8e..fb33a92c 100644 --- a/src/web/fireinfo.py +++ b/src/web/fireinfo.py @@ -45,7 +45,7 @@ class ProfileSendHandler(BaseHandler): self.finish("Your profile was successfully saved to the database.") -class IndexHandler(BaseHandler): +class IndexHandler(base.AnalyticsMixin, BaseHandler): def get(self): data = { "when" : self.when, @@ -71,14 +71,14 @@ class IndexHandler(BaseHandler): self.render("fireinfo/index.html", **data) -class DriverDetail(BaseHandler): +class DriverDetail(base.AnalyticsMixin, BaseHandler): def get(self, driver): devices = self.fireinfo.get_devices_by_driver(driver, when=self.when) self.render("fireinfo/driver.html", driver=driver, devices=devices) -class ProfileHandler(BaseHandler): +class ProfileHandler(base.AnalyticsMixin, BaseHandler): def get(self, profile_id): profile = self.fireinfo.get_profile(profile_id, when=self.when) @@ -88,7 +88,7 @@ class ProfileHandler(BaseHandler): self.render("fireinfo/profile.html", profile=profile) -class RandomProfileHandler(BaseHandler): +class RandomProfileHandler(base.AnalyticsMixin, BaseHandler): def get(self): profile = self.fireinfo.get_random_profile(when=self.when) if not profile: @@ -97,7 +97,7 @@ class RandomProfileHandler(BaseHandler): self.redirect("/profile/%s" % profile.profile_id) -class ReleasesHandler(BaseHandler): +class ReleasesHandler(base.AnalyticsMixin, BaseHandler): def get(self): data = { "releases" : self.fireinfo.get_releases_map(when=self.when), @@ -107,19 +107,19 @@ class ReleasesHandler(BaseHandler): return self.render("fireinfo/releases.html", **data) -class ProcessorsHandler(BaseHandler): +class ProcessorsHandler(base.AnalyticsMixin, BaseHandler): def get(self): return self.render("fireinfo/processors.html", when=self.when) -class VendorsHandler(BaseHandler): +class VendorsHandler(base.AnalyticsMixin, BaseHandler): def get(self): vendors = self.fireinfo.get_vendor_list(when=self.when) self.render("fireinfo/vendors.html", vendors=vendors) -class VendorHandler(BaseHandler): +class VendorHandler(base.AnalyticsMixin, BaseHandler): def get(self, subsystem, vendor_id): devices = self.fireinfo.get_devices_by_vendor(subsystem, vendor_id, when=self.when) if not devices: diff --git a/src/web/handlers.py b/src/web/handlers.py index 739c4b62..4372dc8f 100644 --- a/src/web/handlers.py +++ b/src/web/handlers.py @@ -2,7 +2,7 @@ from . import base -class IndexHandler(base.BaseHandler): +class IndexHandler(base.AnalyticsMixin, base.BaseHandler): """ This handler displays the welcome page. """ @@ -13,7 +13,7 @@ class IndexHandler(base.BaseHandler): return self.render("index.html", latest_release=latest_release) -class StaticHandler(base.BaseHandler): +class StaticHandler(base.AnalyticsMixin, base.BaseHandler): def initialize(self, template): self._template = template diff --git a/src/web/iuse.py b/src/web/iuse.py index faa6d406..c3ab4927 100644 --- a/src/web/iuse.py +++ b/src/web/iuse.py @@ -5,7 +5,7 @@ import tornado.web from . import base -class ImageHandler(base.BaseHandler): +class ImageHandler(base.AnalyticsMixin, base.BaseHandler): def write_error(self, status_code, **kwargs): """ Select a random image from the errors directory diff --git a/src/web/location.py b/src/web/location.py index de7a9cce..c466c447 100644 --- a/src/web/location.py +++ b/src/web/location.py @@ -6,14 +6,14 @@ from .. import util from . import base -class IndexHandler(base.BaseHandler): +class IndexHandler(base.AnalyticsMixin, base.BaseHandler): def get(self): self.render("location/index.html", address=self.current_address, ) -class LookupHandler(base.BaseHandler): +class LookupHandler(base.AnalyticsMixin, base.BaseHandler): async def get(self, address): # Lookup address address = util.Address(self.backend, address) diff --git a/src/web/nopaste.py b/src/web/nopaste.py index a15b72f4..e3c49183 100644 --- a/src/web/nopaste.py +++ b/src/web/nopaste.py @@ -5,7 +5,7 @@ import tornado.web from . import base from . import ui_modules -class CreateHandler(base.BaseHandler): +class CreateHandler(base.AnalyticsMixin, base.BaseHandler): MODES = ("paste", "upload") def get(self): @@ -64,7 +64,7 @@ class CreateHandler(base.BaseHandler): return 2 * (1024 ** 2) -class RawHandler(base.BaseHandler): +class RawHandler(base.AnalyticsMixin, base.BaseHandler): def get(self, uid): entry = self.backend.nopaste.get(uid) if not entry: @@ -84,7 +84,7 @@ class RawHandler(base.BaseHandler): self.finish(content) -class ViewHandler(base.BaseHandler): +class ViewHandler(base.AnalyticsMixin, base.BaseHandler): def get(self, uid): entry = self.backend.nopaste.get(uid) if not entry: diff --git a/src/web/users.py b/src/web/users.py index 003caebb..204c608f 100644 --- a/src/web/users.py +++ b/src/web/users.py @@ -16,7 +16,7 @@ from . import ui_modules COLOUR_LIGHT = (237,232,232) COLOUR_DARK = (49,53,60) -class IndexHandler(base.BaseHandler): +class IndexHandler(base.AnalyticsMixin, base.BaseHandler): @tornado.web.authenticated def get(self): results = None @@ -31,7 +31,7 @@ class IndexHandler(base.BaseHandler): self.render("users/index.html", q=q, results=results) -class ShowHandler(base.BaseHandler): +class ShowHandler(base.AnalyticsMixin, base.BaseHandler): @tornado.web.authenticated async def get(self, uid): account = self.backend.accounts.get_by_uid(uid)