]> git.ipfire.org Git - ipfire.org.git/commitdiff
analytics: Record some page views
authorMichael Tremer <michael.tremer@ipfire.org>
Sun, 7 Jan 2024 14:50:31 +0000 (14:50 +0000)
committerMichael Tremer <michael.tremer@ipfire.org>
Sun, 7 Jan 2024 14:50:31 +0000 (14:50 +0000)
Signed-off-by: Michael Tremer <michael.tremer@ipfire.org>
15 files changed:
Makefile.am
src/backend/analytics.py [new file with mode: 0644]
src/backend/base.py
src/web/auth.py
src/web/base.py
src/web/blog.py
src/web/docs.py
src/web/donate.py
src/web/downloads.py
src/web/fireinfo.py
src/web/handlers.py
src/web/iuse.py
src/web/location.py
src/web/nopaste.py
src/web/users.py

index 37a6c084f88b1531d6225f0aed05918948beed4e..b59bbfa40b0a45425da32ae6c88322307bd8bd84 100644 (file)
@@ -49,6 +49,7 @@ CLEANFILES += \
 backend_PYTHON = \
        src/backend/__init__.py \
        src/backend/accounts.py \
+       src/backend/analytics.py \
        src/backend/asterisk.py \
        src/backend/base.py \
        src/backend/blog.py \
diff --git a/src/backend/analytics.py b/src/backend/analytics.py
new file mode 100644 (file)
index 0000000..cfecbd7
--- /dev/null
@@ -0,0 +1,90 @@
+#!/usr/bin/python3
+
+import datetime
+import json
+import urllib.parse
+
+from . import misc
+from .decorators import *
+
+INVALID_REFERRERS = (
+       # Broken schema
+       "://",
+
+       # Localhost
+       "http://localhost",
+       "https://localhost",
+       "http://127.0.0.1",
+       "https://127.0.0.1",
+)
+
+class Analytics(misc.Object):
+       def log_unique_visit(self, address, referrer, country_code=None, user_agent=None,
+                       host=None, uri=None, source=None, medium=None, campaign=None, content=None,
+                       term=None, q=None):
+               """
+                       Logs a unique visit to this a page
+               """
+               asn, query_args = None, None
+
+               if referrer:
+                       # Parse referrer
+                       url = urllib.parse.urlparse(referrer)
+
+                       # Remove everything after ? and #
+                       referrer = "%s://%s%s" % (url.scheme, url.netloc, url.path)
+
+                       # Drop anything that isn't valid
+                       for invalid_referrer in INVALID_REFERRERS:
+                               if referrer.startswith(invalid_referrer):
+                                       referrer = None
+                                       break
+
+               # Fetch the ASN
+               if address:
+                       asn = address.asn
+
+               # Strip URI
+               if uri:
+                       uri, _, query_args = uri.partition("?")
+
+               # Parse query arguments
+               if query_args:
+                       query_args = urllib.parse.parse_qs(query_args)
+
+               # Mark bots
+               if user_agent:
+                       bot = "bot" in user_agent.lower()
+
+               # Split q
+               if q:
+                       q = q.split()
+
+               self.db.execute("""
+                       INSERT INTO
+                               analytics_unique_visits
+                       (
+                               host,
+                               uri,
+                               query_args,
+                               country_code,
+                               asn,
+                               referrer,
+                               user_agent,
+                               q,
+                               bot,
+                               source,
+                               medium,
+                               campaign,
+                               content,
+                               term
+                       )
+                       VALUES
+                       (
+                               %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s
+                       )
+                       """,
+                       host, uri, json.dumps(query_args or {}), country_code, asn, referrer or "",
+                       user_agent, q, bot, source or "", medium or "", campaign or "", content or "",
+                       term or "",
+               )
index 00dcefcbf4b576099d2fbaa3382e0f53be1d3970..7e86251aa86553c63a43cf7511bede17af930635 100644 (file)
@@ -9,6 +9,7 @@ import tornado.httpclient
 
 from . import accounts
 from . import asterisk
+from . import analytics
 from . import blog
 from . import bugzilla
 from . import cache
@@ -65,6 +66,7 @@ class Backend(object):
 
                # Initialize backend modules.
                self.accounts = accounts.Accounts(self)
+               self.analytics = analytics.Analytics(self)
                self.bugzilla = bugzilla.Bugzilla(self)
                self.fireinfo = fireinfo.Fireinfo(self)
                self.iuse = iuse.IUse(self)
index 023ca6e8d4c35f6c272db74290b4bbec6d989ad3..4a4c88a4c2858f8b864fb584223b328e615590bf 100644 (file)
@@ -31,7 +31,7 @@ class AuthenticationMixin(object):
                        self.clear_cookie("session_id")
 
 
-class LoginHandler(AuthenticationMixin, base.BaseHandler):
+class LoginHandler(base.AnalyticsMixin, AuthenticationMixin, base.BaseHandler):
        def get(self):
                next = self.get_argument("next", None)
 
@@ -74,7 +74,7 @@ class LogoutHandler(AuthenticationMixin, base.BaseHandler):
                self.redirect("/")
 
 
-class JoinHandler(base.BaseHandler):
+class JoinHandler(base.AnalyticsMixin, base.BaseHandler):
        def get(self):
                # Redirect logged in users away
                if self.current_user:
index 57dd07dd95c69f3839fc5b1772ca657e6b5caf3b..fec01bdfbb21b79c4b04e7ce56a6a348d4c19f06 100644 (file)
@@ -16,6 +16,9 @@ import tornado.web
 from ..decorators import *
 from .. import util
 
+# Setup logging
+log = logging.getLogger(__name__)
+
 class ratelimit(object):
        """
                A decorator class which limits how often a function can be called
@@ -155,6 +158,17 @@ class BaseHandler(tornado.web.RequestHandler):
                if self.current_address:
                        return self.current_address.country_code
 
+       @property
+       def user_agent(self):
+               """
+                       Returns the HTTP user agent
+               """
+               return self.request.headers.get("User-Agent", None)
+
+       @property
+       def referrer(self):
+               return self.request.headers.get("Referer", None)
+
        def get_argument_int(self, *args, **kwargs):
                arg = self.get_argument(*args, **kwargs)
 
@@ -279,6 +293,60 @@ class BaseHandler(tornado.web.RequestHandler):
                return self.backend.releases
 
 
+class AnalyticsMixin(object):
+       def on_finish(self):
+               """
+                       Collect some data about this request
+               """
+               # Log something
+               log.debug("Analytics for %s:" % self)
+               log.debug("  User-Agent: %s" % self.user_agent)
+               log.debug("  Referrer  : %s" % self.referrer)
+
+               # Do nothing if this requst should be ignored
+               if self._ignore_analytics():
+                       return
+
+               with self.db.transaction():
+                       # Log unique visits
+                       self.backend.analytics.log_unique_visit(
+                               address=self.current_address,
+                               referrer=self.referrer,
+                               country_code=self.current_country_code,
+                               user_agent=self.user_agent,
+                               host=self.request.host,
+                               uri=self.request.uri,
+
+                               # UTMs
+                               source=self.get_argument("utm_source", None),
+                               medium=self.get_argument("utm_medium", None),
+                               campaign=self.get_argument("utm_campaign", None),
+                               content=self.get_argument("utm_content", None),
+                               term=self.get_argument("utm_term", None),
+
+                               # Search queries
+                               q=self.get_argument("q", None),
+                       )
+
+       def _ignore_analytics(self):
+               """
+                       Checks if this request should be ignored
+               """
+               ignored_user_agents = (
+                       "LWP::Simple",
+                       "check_http",
+               )
+
+               # Only log GET requests
+               if not self.request.method == "GET":
+                       return True
+
+               # Ignore everything from matching user agents
+               for ignored_user_agent in ignored_user_agents:
+                       if self.user_agent.startswith(ignored_user_agent):
+                               return True
+
+
 class APIHandler(BaseHandler):
        def check_xsrf_cookie(self):
                """
index 5d7dfcae1fb865bbaa391fca3f7c2d0ee0d4d1ae..d5e0d0da5f537e55163ed4053be61c856a983718 100644 (file)
@@ -8,7 +8,7 @@ import tornado.web
 from . import base
 from . import ui_modules
 
-class IndexHandler(base.BaseHandler):
+class IndexHandler(base.AnalyticsMixin, base.BaseHandler):
        def get(self):
                latest_post = None
 
@@ -29,7 +29,7 @@ class IndexHandler(base.BaseHandler):
                self.render("blog/index.html", q=q, posts=posts, latest_post=latest_post)
 
 
-class FeedHandler(base.BaseHandler):
+class FeedHandler(base.AnalyticsMixin, base.BaseHandler):
        def get(self):
                posts = self.backend.blog.get_newest(limit=10)
                if not posts:
@@ -46,7 +46,7 @@ class FeedHandler(base.BaseHandler):
                        now=datetime.datetime.now())
 
 
-class PostHandler(base.BaseHandler):
+class PostHandler(base.AnalyticsMixin, base.BaseHandler):
        def get(self, slug):
                post = self.backend.blog.get_by_slug(slug)
                if not post:
@@ -117,7 +117,7 @@ class DraftsHandler(base.BaseHandler):
                self.render("blog/drafts.html", drafts=drafts)
 
 
-class YearHandler(base.BaseHandler):
+class YearHandler(base.AnalyticsMixin, base.BaseHandler):
        def get(self, year):
                posts = self.backend.blog.get_by_year(year)
                if not posts:
index 1570a3d45aef7051e928eca520806ba93936c7b0..39f6547601be8450f9460d8ecd56664c9062a868 100644 (file)
@@ -6,7 +6,7 @@ import tornado.web
 from . import base
 from . import ui_modules
 
-class PageHandler(base.BaseHandler):
+class PageHandler(base.AnalyticsMixin, base.BaseHandler):
        @property
        def action(self):
                return self.get_argument("action", None)
@@ -104,7 +104,7 @@ class FilesHandler(base.BaseHandler):
                self.render("docs/files/index.html", path=path, files=files)
 
 
-class FileHandler(base.BaseHandler):
+class FileHandler(base.AnalyticsMixin, base.BaseHandler):
        @property
        def action(self):
                return self.get_argument("action", None)
@@ -357,7 +357,7 @@ class DeleteFileHandler(base.BaseHandler):
                self.redirect("/docs%s/_files" % file.path)
 
 
-class SearchHandler(base.BaseHandler):
+class SearchHandler(base.AnalyticsMixin, base.BaseHandler):
        @base.ratelimit(minutes=5, requests=25)
        def get(self):
                q = self.get_argument("q")
@@ -369,19 +369,19 @@ class SearchHandler(base.BaseHandler):
                self.render("docs/search-results.html", q=q, pages=pages)
 
 
-class RecentChangesHandler(base.BaseHandler):
+class RecentChangesHandler(base.AnalyticsMixin, base.BaseHandler):
        def get(self):
                recent_changes = self.backend.wiki.get_recent_changes(self.current_user, limit=50)
 
                self.render("docs/recent-changes.html", recent_changes=recent_changes)
 
 
-class TreeHandler(base.BaseHandler):
+class TreeHandler(base.AnalyticsMixin, base.BaseHandler):
        def get(self):
                self.render("docs/tree.html", pages=self.backend.wiki)
 
 
-class WatchlistHandler(base.BaseHandler):
+class WatchlistHandler(base.AnalyticsMixin, base.BaseHandler):
        @tornado.web.authenticated
        def get(self):
                pages = self.backend.wiki.get_watchlist(self.current_user)
index bae215745af82879887c0c510f22a932b4f9e427..fb243708785b0efb89c84454b4d5a84148f8cd5b 100644 (file)
@@ -12,7 +12,7 @@ SKUS = {
 }
 DEFAULT_SKU = "IPFIRE-DONATION"
 
-class DonateHandler(base.BaseHandler):
+class DonateHandler(base.AnalyticsMixin, base.BaseHandler):
        def get(self):
                if self.current_user:
                        country = self.current_user.country_code
index a0fb2f0260d8fd2d7f9a32506817fd072f61dc08..de1c79a18844e6ce82f945f880195753fe612d31 100644 (file)
@@ -5,7 +5,7 @@ import tornado.web
 
 from . import base
 
-class IndexHandler(base.BaseHandler):
+class IndexHandler(base.AnalyticsMixin, base.BaseHandler):
        def get(self):
                release = self.backend.releases.get_latest()
                if not release:
@@ -15,7 +15,7 @@ class IndexHandler(base.BaseHandler):
                self.redirect("/downloads/%s" % release.slug)
 
 
-class MirrorsHandler(base.BaseHandler):
+class MirrorsHandler(base.AnalyticsMixin, base.BaseHandler):
        def get(self):
                mirrors = self.backend.mirrors.get_by_countries()
                if not mirrors:
@@ -24,7 +24,7 @@ class MirrorsHandler(base.BaseHandler):
                self.render("downloads/mirrors.html", mirrors=mirrors)
 
 
-class ReleaseHandler(base.BaseHandler):
+class ReleaseHandler(base.AnalyticsMixin, base.BaseHandler):
        def get(self, slug):
                release = self.backend.releases.get_by_sname(slug)
                if not release:
@@ -33,12 +33,12 @@ class ReleaseHandler(base.BaseHandler):
                self.render("downloads/release.html", release=release)
 
 
-class ThankYouHandler(base.BaseHandler):
+class ThankYouHandler(base.AnalyticsMixin, base.BaseHandler):
        def get(self):
                self.render("downloads/thank-you.html")
 
 
-class FileHandler(base.BaseHandler):
+class FileHandler(base.AnalyticsMixin, base.BaseHandler):
        def prepare(self):
                self.set_header("Pragma", "no-cache")
 
index 8b0f8e8e759287d86ac8b2865611b1d702c14909..fb33a92c1d576330ff8d302856fc2feee3a764d9 100644 (file)
@@ -45,7 +45,7 @@ class ProfileSendHandler(BaseHandler):
                self.finish("Your profile was successfully saved to the database.")
 
 
-class IndexHandler(BaseHandler):
+class IndexHandler(base.AnalyticsMixin, BaseHandler):
        def get(self):
                data = {
                        "when"           : self.when,
@@ -71,14 +71,14 @@ class IndexHandler(BaseHandler):
                self.render("fireinfo/index.html", **data)
 
 
-class DriverDetail(BaseHandler):
+class DriverDetail(base.AnalyticsMixin, BaseHandler):
        def get(self, driver):
                devices = self.fireinfo.get_devices_by_driver(driver, when=self.when)
 
                self.render("fireinfo/driver.html", driver=driver, devices=devices)
 
 
-class ProfileHandler(BaseHandler):
+class ProfileHandler(base.AnalyticsMixin, BaseHandler):
        def get(self, profile_id):
                profile = self.fireinfo.get_profile(profile_id, when=self.when)
 
@@ -88,7 +88,7 @@ class ProfileHandler(BaseHandler):
                self.render("fireinfo/profile.html", profile=profile)
 
 
-class RandomProfileHandler(BaseHandler):
+class RandomProfileHandler(base.AnalyticsMixin, BaseHandler):
        def get(self):
                profile = self.fireinfo.get_random_profile(when=self.when)
                if not profile:
@@ -97,7 +97,7 @@ class RandomProfileHandler(BaseHandler):
                self.redirect("/profile/%s" % profile.profile_id)
 
 
-class ReleasesHandler(BaseHandler):
+class ReleasesHandler(base.AnalyticsMixin, BaseHandler):
        def get(self):
                data = {
                        "releases" : self.fireinfo.get_releases_map(when=self.when),
@@ -107,19 +107,19 @@ class ReleasesHandler(BaseHandler):
                return self.render("fireinfo/releases.html", **data)
 
 
-class ProcessorsHandler(BaseHandler):
+class ProcessorsHandler(base.AnalyticsMixin, BaseHandler):
        def get(self):
                return self.render("fireinfo/processors.html", when=self.when)
 
 
-class VendorsHandler(BaseHandler):
+class VendorsHandler(base.AnalyticsMixin, BaseHandler):
        def get(self):
                vendors = self.fireinfo.get_vendor_list(when=self.when)
 
                self.render("fireinfo/vendors.html", vendors=vendors)
 
 
-class VendorHandler(BaseHandler):
+class VendorHandler(base.AnalyticsMixin, BaseHandler):
        def get(self, subsystem, vendor_id):
                devices = self.fireinfo.get_devices_by_vendor(subsystem, vendor_id, when=self.when)
                if not devices:
index 739c4b62b2e5d1c04fae37bd561c088b8cc453a2..4372dc8f9c63defa5d0aa3a110c05a2052fbec91 100644 (file)
@@ -2,7 +2,7 @@
 
 from . import base
 
-class IndexHandler(base.BaseHandler):
+class IndexHandler(base.AnalyticsMixin, base.BaseHandler):
        """
                This handler displays the welcome page.
        """
@@ -13,7 +13,7 @@ class IndexHandler(base.BaseHandler):
                return self.render("index.html", latest_release=latest_release)
 
 
-class StaticHandler(base.BaseHandler):
+class StaticHandler(base.AnalyticsMixin, base.BaseHandler):
        def initialize(self, template):
                self._template = template
 
index faa6d40661b7eef165278f0a0f260b128a141b02..c3ab4927fc6a1132d3904ce7ccd803d6ef05ac10 100644 (file)
@@ -5,7 +5,7 @@ import tornado.web
 
 from . import base
 
-class ImageHandler(base.BaseHandler):
+class ImageHandler(base.AnalyticsMixin, base.BaseHandler):
        def write_error(self, status_code, **kwargs):
                """
                        Select a random image from the errors directory
index de7a9cce59f66d30d1ef936078f87482e448bda3..c466c447d82786ddb29b07014ff41cb3fe288912 100644 (file)
@@ -6,14 +6,14 @@ from .. import util
 
 from . import base
 
-class IndexHandler(base.BaseHandler):
+class IndexHandler(base.AnalyticsMixin, base.BaseHandler):
        def get(self):
                self.render("location/index.html",
                        address=self.current_address,
                )
 
 
-class LookupHandler(base.BaseHandler):
+class LookupHandler(base.AnalyticsMixin, base.BaseHandler):
        async def get(self, address):
                # Lookup address
                address = util.Address(self.backend, address)
index a15b72f4ffa72cb8892f8d220b465731fe6a7785..e3c491835559833231f6f9f755911b34a1be2e30 100644 (file)
@@ -5,7 +5,7 @@ import tornado.web
 from . import base
 from . import ui_modules
 
-class CreateHandler(base.BaseHandler):
+class CreateHandler(base.AnalyticsMixin, base.BaseHandler):
        MODES = ("paste", "upload")
 
        def get(self):
@@ -64,7 +64,7 @@ class CreateHandler(base.BaseHandler):
                return 2 * (1024 ** 2)
 
 
-class RawHandler(base.BaseHandler):
+class RawHandler(base.AnalyticsMixin, base.BaseHandler):
        def get(self, uid):
                entry = self.backend.nopaste.get(uid)
                if not entry:
@@ -84,7 +84,7 @@ class RawHandler(base.BaseHandler):
                self.finish(content)
 
 
-class ViewHandler(base.BaseHandler):
+class ViewHandler(base.AnalyticsMixin, base.BaseHandler):
        def get(self, uid):
                entry = self.backend.nopaste.get(uid)
                if not entry:
index 003caebbbeea280b76c345d452874d434fa729be..204c608fd27e3540e13a8b120c7143cd5d62c317 100644 (file)
@@ -16,7 +16,7 @@ from . import ui_modules
 COLOUR_LIGHT = (237,232,232)
 COLOUR_DARK  = (49,53,60)
 
-class IndexHandler(base.BaseHandler):
+class IndexHandler(base.AnalyticsMixin, base.BaseHandler):
        @tornado.web.authenticated
        def get(self):
                results = None
@@ -31,7 +31,7 @@ class IndexHandler(base.BaseHandler):
                self.render("users/index.html", q=q, results=results)
 
 
-class ShowHandler(base.BaseHandler):
+class ShowHandler(base.AnalyticsMixin, base.BaseHandler):
        @tornado.web.authenticated
        async def get(self, uid):
                account = self.backend.accounts.get_by_uid(uid)