wiki: Compress images better when using JPEG format

[ipfire.org.git] / src / backend / wiki.py
diff --git a/src/backend/wiki.py b/src/backend/wiki.py

index 00dd64be5fb559dc5de9e3e83e10b800ce58c369..53fe6a97ead02389297d73e95b7fca2ffe15996f 100644 (file)
--- a/src/backend/wiki.py
+++ b/src/backend/wiki.py
@@ -1,25 +1,19 @@
  #!/usr/bin/python3
  
+import PIL
+import PIL.ImageFilter
+import difflib
+import io
  import logging
-import markdown2
  import os.path
  import re
+import tornado.gen
+import urllib.parse
  
  from . import misc
+from . import util
  from .decorators import *
  
-# Used to automatically link some things
-link_patterns = (
-       # Find bug reports
-       (re.compile(r"(?:#(\d+))", re.I), r"https://bugzilla.ipfire.org/show_bug.cgi?id=\1"),
-
-       # Email Addresses
-       (re.compile(r"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)"), r"mailto:\1"),
-
-       # CVE Numbers
-       (re.compile(r"(?:CVE)[\s\-](\d{4}\-\d+)"), r"https://cve.mitre.org/cgi-bin/cvename.cgi?name=\1"),
-)
-
  class Wiki(misc.Object):
         def _get_pages(self, query, *args):
                 res = self.db.query(query, *args)
@@ -38,7 +32,7 @@ class Wiki(misc.Object):
                 if doc:
                         return doc.title
  
-               return default
+               return default or os.path.basename(page)
  
         def get_page(self, page, revision=None):
                 page = Page.sanitise_page_name(page)
@@ -51,38 +45,141 @@ class Wiki(misc.Object):
                         return self._get_page("SELECT * FROM wiki WHERE page = %s \
                                 ORDER BY timestamp DESC LIMIT 1", page)
  
-       def get_recent_changes(self, limit=None):
-               return self._get_pages("SELECT * FROM wiki \
+       def get_recent_changes(self, account, limit=None):
+               pages = self._get_pages("SELECT * FROM wiki \
                         WHERE timestamp >= NOW() - INTERVAL '4 weeks' \
-                       ORDER BY timestamp DESC LIMIT %s", limit)
+                       ORDER BY timestamp DESC")
+
+               for page in pages:
+                       if not page.check_acl(account):
+                               continue
+
+                       yield page
  
-       def create_page(self, page, author, content, changes=None):
+                       limit -= 1
+                       if not limit:
+                               break
+
+       def create_page(self, page, author, content, changes=None, address=None):
                 page = Page.sanitise_page_name(page)
  
-               return self._get_page("INSERT INTO wiki(page, author_uid, markdown, changes) \
-                       VALUES(%s, %s, %s, %s) RETURNING *", page, author.uid, content, changes)
+               # Write page to the database
+               page = self._get_page("INSERT INTO wiki(page, author_uid, markdown, changes, address) \
+                       VALUES(%s, %s, %s, %s, %s) RETURNING *", page, author.uid, content or None, changes, address)
+
+               # Send email to all watchers
+               page._send_watcher_emails(excludes=[author])
+
+               return page
  
-       def delete_page(self, page, author):
+       def delete_page(self, page, author, **kwargs):
                 # Do nothing if the page does not exist
                 if not self.get_page(page):
                         return
  
                 # Just creates a blank last version of the page
-               self.create_page(page, author, None)
+               self.create_page(page, author=author, content=None, **kwargs)
  
-       @staticmethod
-       def _split_url(url):
+       def make_breadcrumbs(self, url):
+               # Split and strip all empty elements (double slashes)
                 parts = list(e for e in url.split("/") if e)
  
-               num_parts = len(parts)
-               for i in range(num_parts):
-                       yield "/".join(parts[:i])
+               ret = []
+               for part in ("/".join(parts[:i]) for i in range(1, len(parts))):
+                       ret.append(("/%s" % part, self.get_page_title(part, os.path.basename(part))))
  
-       def make_breadcrumbs(self, url):
-               for part in self._split_url(url):
-                       title = self.get_page_title(part, os.path.basename(part))
+               return ret
+
+       def search(self, query, account=None, limit=None):
+               query = util.parse_search_query(query)
+
+               res = self._get_pages("SELECT wiki.* FROM wiki_search_index search_index \
+                       LEFT JOIN wiki ON search_index.wiki_id = wiki.id \
+                       WHERE search_index.document @@ to_tsquery('english', %s) \
+                               ORDER BY ts_rank(search_index.document, to_tsquery('english', %s)) DESC",
+                       query, query)
+
+               for page in res:
+                       # Skip any pages the user doesn't have permission for
+                       if not page.check_acl(account):
+                               continue
+
+                       # Return any other pages
+                       yield page
+
+                       limit -= 1
+                       if not limit:
+                               break
+
+       def refresh(self):
+               """
+                       Needs to be called after a page has been changed
+               """
+               self.db.execute("REFRESH MATERIALIZED VIEW wiki_search_index")
+
+       # ACL
+
+       def check_acl(self, page, account):
+               res = self.db.query("SELECT * FROM wiki_acls \
+                       WHERE %s ILIKE (path || '%%') ORDER BY LENGTH(path) DESC LIMIT 1", page)
+
+               for row in res:
+                       # Access not permitted when user is not logged in
+                       if not account:
+                               return False
+
+                       # If user is in a matching group, we grant permission
+                       for group in row.groups:
+                               if group in account.groups:
+                                       return True
+
+                       # Otherwise access is not permitted
+                       return False
+
+               # If no ACLs are found, we permit access
+               return True
+
+       # Files
+
+       def _get_files(self, query, *args):
+               res = self.db.query(query, *args)
+
+               for row in res:
+                       yield File(self.backend, row.id, data=row)
+
+       def _get_file(self, query, *args):
+               res = self.db.get(query, *args)
+
+               if res:
+                       return File(self.backend, res.id, data=res)
  
-                       yield ("/%s" % part, title)
+       def get_files(self, path):
+               files = self._get_files("SELECT * FROM wiki_files \
+                       WHERE path = %s AND deleted_at IS NULL ORDER BY filename", path)
+
+               return list(files)
+
+       def get_file_by_path(self, path):
+               path, filename = os.path.dirname(path), os.path.basename(path)
+
+               return self._get_file("SELECT * FROM wiki_files \
+                       WHERE path = %s AND filename = %s AND deleted_at IS NULL", path, filename)
+
+       def upload(self, path, filename, data, mimetype, author, address):
+               # Upload the blob first
+               blob = self.db.get("INSERT INTO wiki_blobs(data) VALUES(%s) RETURNING id", data)
+
+               # Create entry for file
+               return self._get_file("INSERT INTO wiki_files(path, filename, author_uid, address, \
+                       mimetype, blob_id, size) VALUES(%s,  %s, %s, %s, %s, %s, %s) RETURNING *", path,
+                       filename, author.uid, address, mimetype, blob.id, len(data))
+
+       def find_image(self, path, filename):
+               for p in (path, os.path.dirname(path)):
+                       file = self.get_file_by_path(os.path.join(p, filename))
+
+                       if file and file.is_image():
+                               return file
  
  
  class Page(misc.Object):
@@ -90,6 +187,13 @@ class Page(misc.Object):
                 self.id = id
                 self.data = data
  
+       def __repr__(self):
+               return "<%s %s %s>" % (self.__class__.__name__, self.page, self.timestamp)
+
+       def __eq__(self, other):
+               if isinstance(other, self.__class__):
+                       return self.id == other.id
+
         def __lt__(self, other):
                 if isinstance(other, self.__class__):
                         if self.page == other.page:
@@ -119,13 +223,17 @@ class Page(misc.Object):
         def url(self):
                 return self.page
  
+       @property
+       def full_url(self):
+               return "https://wiki.ipfire.org%s" % self.url
+
         @property
         def page(self):
                 return self.data.page
  
         @property
         def title(self):
-               return self._title or self.page[1:]
+               return self._title or os.path.basename(self.page[1:])
  
         @property
         def _title(self):
@@ -147,16 +255,87 @@ class Page(misc.Object):
         def _render(self, text):
                 logging.debug("Rendering %s" % self)
  
-               return markdown2.markdown(text, link_patterns=link_patterns,
-                       extras=["footnotes", "link-patterns", "wiki-tables"])
+               # Link images
+               replacements = []
+               for match in re.finditer(r"!\[(.*?)\]\((.*?)\)", text):
+                       alt_text, url = match.groups()
+
+                       # Skip any absolute and external URLs
+                       if url.startswith("/") or url.startswith("https://") or url.startswith("http://"):
+                               continue
+
+                       # Try to split query string
+                       url, delimiter, qs = url.partition("?")
+
+                       # Parse query arguments
+                       args = urllib.parse.parse_qs(qs)
+
+                       # Find image
+                       file = self.backend.wiki.find_image(self.page, url)
+                       if not file:
+                               continue
+
+                       # Scale down the image if not already done
+                       if not "s" in args:
+                               args["s"] = "768"
+
+                       # Format URL
+                       url = "%s?%s" % (file.url, urllib.parse.urlencode(args))
+
+                       replacements.append((match.span(), file, alt_text, url))
+
+               # Apply all replacements
+               for (start, end), file, alt_text, url in reversed(replacements):
+                       text = text[:start] + "[![%s](%s)](%s?action=detail)" % (alt_text, url, file.url) + text[end:]
+
+               # Add wiki links
+               patterns = (
+                       (r"\[\[([\w\d\/\-\.]+)(?:\|(.+?))\]\]", r"\1", r"\2", None, True),
+                       (r"\[\[([\w\d\/\-\.]+)\]\]", r"\1", r"\1", self.backend.wiki.get_page_title, True),
+
+                       # External links
+                       (r"\[\[((?:ftp|git|https?|rsync|sftp|ssh|webcal)\:\/\/.+?)(?:\|(.+?))\]\]",
+                               r"\1", r"\2", None, False),
+
+                       # Mail
+                       (r"\[\[([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)\]\]",
+                               r"\1", r"\1", None, False),
+               )
+
+               for pattern, link, title, repl, internal in patterns:
+                       replacements = []
+
+                       for match in re.finditer(pattern, text):
+                               l = match.expand(link)
+                               t = match.expand(title)
+
+                               if internal:
+                                       # Allow relative links
+                                       if not l.startswith("/"):
+                                               l = os.path.join(self.page, l)
+
+                                       # Normalise links
+                                       l = os.path.normpath(l)
+
+                               if callable(repl):
+                                       t = repl(l) or t
+
+                               replacements.append((match.span(), t or l, l))
+
+                       # Apply all replacements
+                       for (start, end), t, l in reversed(replacements):
+                               text = text[:start] + "[%s](%s)" % (t, l) + text[end:]
+
+               # Borrow this from the blog
+               return self.backend.blog._render_text(text, lang="markdown")
  
         @property
         def markdown(self):
-               return self.data.markdown
+               return self.data.markdown or ""
  
         @property
         def html(self):
-               return self.data.html or self._render(self.markdown)
+               return self._render(self.markdown)
  
         @property
         def timestamp(self):
@@ -170,12 +349,31 @@ class Page(misc.Object):
                 return self.backend.wiki.make_breadcrumbs(self.page)
  
         def get_latest_revision(self):
-               return self.backend.wiki.get_page(self.page)
+               revisions = self.get_revisions()
+
+               # Return first object
+               for rev in revisions:
+                       return rev
+
+       def get_revisions(self):
+               return self.backend.wiki._get_pages("SELECT * FROM wiki \
+                       WHERE page = %s ORDER BY timestamp DESC", self.page)
+
+       @lazy_property
+       def previous_revision(self):
+               return self.backend.wiki._get_page("SELECT * FROM wiki \
+                       WHERE page = %s AND timestamp < %s ORDER BY timestamp DESC \
+                       LIMIT 1", self.page, self.timestamp)
  
         @property
         def changes(self):
                 return self.data.changes
  
+       # ACL
+
+       def check_acl(self, account):
+               return self.backend.wiki.check_acl(self.page, account)
+
         # Sidebar
  
         @lazy_property
@@ -183,8 +381,173 @@ class Page(misc.Object):
                 parts = self.page.split("/")
  
                 while parts:
-                       sidebar = self.backend.wiki.get_page(os.path.join(*parts, "sidebar"))
+                       sidebar = self.backend.wiki.get_page("%s/sidebar" % os.path.join(*parts))
                         if sidebar:
                                 return sidebar
  
                         parts.pop()
+
+       # Watchers
+
+       @lazy_property
+       def diff(self):
+               if self.previous_revision:
+                       diff = difflib.unified_diff(
+                               self.previous_revision.markdown.splitlines(),
+                               self.markdown.splitlines(),
+                       )
+
+                       return "\n".join(diff)
+
+       @property
+       def watchers(self):
+               res = self.db.query("SELECT uid FROM wiki_watchlist \
+                       WHERE page = %s", self.page)
+
+               for row in res:
+                       # Search for account by UID and skip if none was found
+                       account = self.backend.accounts.get_by_uid(row.uid)
+                       if not account:
+                               continue
+
+                       # Return the account
+                       yield account
+
+       def is_watched_by(self, account):
+               res = self.db.get("SELECT 1 FROM wiki_watchlist \
+                       WHERE page = %s AND uid = %s", self.page, account.uid)
+
+               if res:
+                       return True
+
+               return False
+
+       def add_watcher(self, account):
+               if self.is_watched_by(account):
+                       return
+
+               self.db.execute("INSERT INTO wiki_watchlist(page, uid) \
+                       VALUES(%s, %s)", self.page, account.uid)
+
+       def remove_watcher(self, account):
+               self.db.execute("DELETE FROM wiki_watchlist \
+                       WHERE page = %s AND uid = %s", self.page, account.uid)
+
+       def _send_watcher_emails(self, excludes=[]):
+               # Nothing to do if there was no previous revision
+               if not self.previous_revision:
+                       return
+
+               for watcher in self.watchers:
+                       # Skip everyone who is excluded
+                       if watcher in excludes:
+                               logging.debug("Excluding %s" % watcher)
+                               continue
+
+                       logging.debug("Sending watcher email to %s" % watcher)
+
+                       # Compose message
+                       self.backend.messages.send_template("wiki/messages/page-changed",
+                               recipients=[watcher], page=self, priority=-10)
+
+
+class File(misc.Object):
+       def init(self, id, data):
+               self.id   = id
+               self.data = data
+
+       @property
+       def url(self):
+               return os.path.join(self.path, self.filename)
+
+       @property
+       def path(self):
+               return self.data.path
+
+       @property
+       def filename(self):
+               return self.data.filename
+
+       @property
+       def mimetype(self):
+               return self.data.mimetype
+
+       @property
+       def size(self):
+               return self.data.size
+
+       @lazy_property
+       def author(self):
+               if self.data.author_uid:
+                       return self.backend.accounts.get_by_uid(self.data.author_uid)
+
+       @property
+       def created_at(self):
+               return self.data.created_at
+
+       def is_pdf(self):
+               return self.mimetype in ("application/pdf", "application/x-pdf")
+
+       def is_image(self):
+               return self.mimetype.startswith("image/")
+
+       @lazy_property
+       def blob(self):
+               res = self.db.get("SELECT data FROM wiki_blobs \
+                       WHERE id = %s", self.data.blob_id)
+
+               if res:
+                       return bytes(res.data)
+
+       def get_thumbnail(self, size):
+               cache_key = "-".join((self.path, util.normalize(self.filename), self.created_at.isoformat(), "%spx" % size))
+
+               # Try to fetch the data from the cache
+               thumbnail = self.memcache.get(cache_key)
+               if thumbnail:
+                       return thumbnail
+
+               # Generate the thumbnail
+               thumbnail = self._generate_thumbnail(size)
+
+               # Put it into the cache for forever
+               self.memcache.set(cache_key, thumbnail)
+
+               return thumbnail
+
+       def _generate_thumbnail(self, size, **args):
+               image = PIL.Image.open(io.BytesIO(self.blob))
+
+               # Remove any alpha-channels
+               if image.format == "JPEG" and not image.mode == "RGB":
+                       # Make a white background
+                       background = PIL.Image.new("RGBA", image.size, (255,255,255))
+
+                       # Flatten both images together
+                       flattened_image = PIL.Image.alpha_composite(background, image)
+
+                       # Remove the alpha channel
+                       image = flattened_image.convert("RGB")
+
+               # Resize the image to the desired resolution
+               image.thumbnail((size, size), PIL.Image.LANCZOS)
+
+               if image.format == "JPEG":
+                       # Apply a gaussian blur to make compression easier
+                       image = image.filter(PIL.ImageFilter.GaussianBlur(radius=0.05))
+
+                       # Arguments to optimise the compression
+                       args.update({
+                               "subsampling" : "4:2:0",
+                               "quality"     : 70,
+                       })
+
+               with io.BytesIO() as f:
+                       # If writing out the image does not work with optimization,
+                       # we try to write it out without any optimization.
+                       try:
+                               image.save(f, image.format, optimize=True, **args)
+                       except:
+                               image.save(f, image.format, **args)
+
+                       return f.getvalue()