wiki: Use non-greedy matching when searching for images

[ipfire.org.git] / src / backend / wiki.py
diff --git a/src/backend/wiki.py b/src/backend/wiki.py

index c963b9a1ca85b644dd1d2a16fbf1708603576ed7..8075d0507416caecdb4a72211a8a5e5ac510ce5b 100644 (file)
--- a/src/backend/wiki.py
+++ b/src/backend/wiki.py
@@ -1,10 +1,13 @@
  #!/usr/bin/python3
  
  import PIL
+import difflib
  import io
  import logging
  import os.path
  import re
+import tornado.gen
+import urllib.parse
  
  from . import misc
  from . import util
@@ -41,17 +44,33 @@ class Wiki(misc.Object):
                         return self._get_page("SELECT * FROM wiki WHERE page = %s \
                                 ORDER BY timestamp DESC LIMIT 1", page)
  
-       def get_recent_changes(self, limit=None):
-               return self._get_pages("SELECT * FROM wiki \
+       def get_recent_changes(self, account, limit=None):
+               pages = self._get_pages("SELECT * FROM wiki \
                         WHERE timestamp >= NOW() - INTERVAL '4 weeks' \
-                       ORDER BY timestamp DESC LIMIT %s", limit)
+                       ORDER BY timestamp DESC")
+
+               for page in pages:
+                       if not page.check_acl(account):
+                               continue
+
+                       yield page
+
+                       limit -= 1
+                       if not limit:
+                               break
  
         def create_page(self, page, author, content, changes=None, address=None):
                 page = Page.sanitise_page_name(page)
  
-               return self._get_page("INSERT INTO wiki(page, author_uid, markdown, changes, address) \
+               # Write page to the database
+               page = self._get_page("INSERT INTO wiki(page, author_uid, markdown, changes, address) \
                         VALUES(%s, %s, %s, %s, %s) RETURNING *", page, author.uid, content or None, changes, address)
  
+               # Send email to all watchers
+               page._send_watcher_emails(excludes=[author])
+
+               return page
+
         def delete_page(self, page, author, **kwargs):
                 # Do nothing if the page does not exist
                 if not self.get_page(page):
@@ -70,16 +89,26 @@ class Wiki(misc.Object):
  
                 return ret
  
-       def search(self, query, limit=None):
+       def search(self, query, account=None, limit=None):
                 query = util.parse_search_query(query)
  
                 res = self._get_pages("SELECT wiki.* FROM wiki_search_index search_index \
                         LEFT JOIN wiki ON search_index.wiki_id = wiki.id \
                         WHERE search_index.document @@ to_tsquery('english', %s) \
-                               ORDER BY ts_rank(search_index.document, to_tsquery('english', %s)) DESC \
-                       LIMIT %s", query, query, limit)
+                               ORDER BY ts_rank(search_index.document, to_tsquery('english', %s)) DESC",
+                       query, query)
+
+               for page in res:
+                       # Skip any pages the user doesn't have permission for
+                       if not page.check_acl(account):
+                               continue
+
+                       # Return any other pages
+                       yield page
  
-               return list(res)
+                       limit -= 1
+                       if not limit:
+                               break
  
         def refresh(self):
                 """
@@ -87,6 +116,28 @@ class Wiki(misc.Object):
                 """
                 self.db.execute("REFRESH MATERIALIZED VIEW wiki_search_index")
  
+       # ACL
+
+       def check_acl(self, page, account):
+               res = self.db.query("SELECT * FROM wiki_acls \
+                       WHERE %s ILIKE (path || '%%') ORDER BY LENGTH(path) DESC LIMIT 1", page)
+
+               for row in res:
+                       # Access not permitted when user is not logged in
+                       if not account:
+                               return False
+
+                       # If user is in a matching group, we grant permission
+                       for group in row.groups:
+                               if group in account.groups:
+                                       return True
+
+                       # Otherwise access is not permitted
+                       return False
+
+               # If no ACLs are found, we permit access
+               return True
+
         # Files
  
         def _get_files(self, query, *args):
@@ -122,12 +173,26 @@ class Wiki(misc.Object):
                         mimetype, blob_id, size) VALUES(%s,  %s, %s, %s, %s, %s, %s) RETURNING *", path,
                         filename, author.uid, address, mimetype, blob.id, len(data))
  
+       def find_image(self, path, filename):
+               for p in (path, os.path.dirname(path)):
+                       file = self.get_file_by_path(os.path.join(p, filename))
+
+                       if file and file.is_image():
+                               return file
+
  
  class Page(misc.Object):
         def init(self, id, data=None):
                 self.id = id
                 self.data = data
  
+       def __repr__(self):
+               return "<%s %s %s>" % (self.__class__.__name__, self.page, self.timestamp)
+
+       def __eq__(self, other):
+               if isinstance(other, self.__class__):
+                       return self.id == other.id
+
         def __lt__(self, other):
                 if isinstance(other, self.__class__):
                         if self.page == other.page:
@@ -157,13 +222,17 @@ class Page(misc.Object):
         def url(self):
                 return self.page
  
+       @property
+       def full_url(self):
+               return "https://wiki.ipfire.org%s" % self.url
+
         @property
         def page(self):
                 return self.data.page
  
         @property
         def title(self):
-               return self._title or self.page[1:]
+               return self._title or os.path.basename(self.page[1:])
  
         @property
         def _title(self):
@@ -185,20 +254,70 @@ class Page(misc.Object):
         def _render(self, text):
                 logging.debug("Rendering %s" % self)
  
+               # Link images
+               replacements = []
+               for match in re.finditer(r"!\[(.*?)\]\((.*?)\)", text):
+                       alt_text, url = match.groups()
+
+                       # Skip any absolute and external URLs
+                       if url.startswith("/") or url.startswith("https://") or url.startswith("http://"):
+                               continue
+
+                       # Try to split query string
+                       url, delimiter, qs = url.partition("?")
+
+                       # Parse query arguments
+                       args = urllib.parse.parse_qs(qs)
+
+                       # Find image
+                       file = self.backend.wiki.find_image(self.page, url)
+                       if not file:
+                               continue
+
+                       # Scale down the image if not already done
+                       if not "s" in args:
+                               args["s"] = "768"
+
+                       # Format URL
+                       url = "%s?%s" % (file.url, urllib.parse.urlencode(args))
+
+                       replacements.append((match.span(), file, alt_text, url))
+
+               # Apply all replacements
+               for (start, end), file, alt_text, url in reversed(replacements):
+                       text = text[:start] + "[![%s](%s)](%s?action=detail)" % (alt_text, url, file.url) + text[end:]
+
+               # Add wiki links
                 patterns = (
-                       (r"\[\[([\w\d\/]+)(?:\|([\w\d\s]+))\]\]", r"/\1", r"\2", None, None),
-                       (r"\[\[([\w\d\/\-]+)\]\]", r"/\1", r"\1", self.backend.wiki.get_page_title, r"\1"),
+                       (r"\[\[([\w\d\/\-\.]+)(?:\|(.+?))\]\]", r"\1", r"\2", None, True),
+                       (r"\[\[([\w\d\/\-\.]+)\]\]", r"\1", r"\1", self.backend.wiki.get_page_title, True),
+
+                       # External links
+                       (r"\[\[((?:ftp|git|https?|rsync|sftp|ssh|webcal)\:\/\/.+?)(?:\|(.+?))\]\]",
+                               r"\1", r"\2", None, False),
+
+                       # Mail
+                       (r"\[\[([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)\]\]",
+                               r"\1", r"\1", None, False),
                 )
  
-               for pattern, link, title, repl, args in patterns:
+               for pattern, link, title, repl, internal in patterns:
                         replacements = []
  
                         for match in re.finditer(pattern, text):
                                 l = match.expand(link)
                                 t = match.expand(title)
  
+                               if internal:
+                                       # Allow relative links
+                                       if not l.startswith("/"):
+                                               l = os.path.join(self.page, l)
+
+                                       # Normalise links
+                                       l = os.path.normpath(l)
+
                                 if callable(repl):
-                                       t = repl(match.expand(args)) or t
+                                       t = repl(l) or t
  
                                 replacements.append((match.span(), t or l, l))
  
@@ -211,11 +330,11 @@ class Page(misc.Object):
  
         @property
         def markdown(self):
-               return self.data.markdown
+               return self.data.markdown or ""
  
         @property
         def html(self):
-               return self.data.html or self._render(self.markdown)
+               return self._render(self.markdown)
  
         @property
         def timestamp(self):
@@ -239,10 +358,21 @@ class Page(misc.Object):
                 return self.backend.wiki._get_pages("SELECT * FROM wiki \
                         WHERE page = %s ORDER BY timestamp DESC", self.page)
  
+       @lazy_property
+       def previous_revision(self):
+               return self.backend.wiki._get_page("SELECT * FROM wiki \
+                       WHERE page = %s AND timestamp < %s ORDER BY timestamp DESC \
+                       LIMIT 1", self.page, self.timestamp)
+
         @property
         def changes(self):
                 return self.data.changes
  
+       # ACL
+
+       def check_acl(self, account):
+               return self.backend.wiki.check_acl(self.page, account)
+
         # Sidebar
  
         @lazy_property
@@ -256,6 +386,69 @@ class Page(misc.Object):
  
                         parts.pop()
  
+       # Watchers
+
+       @lazy_property
+       def diff(self):
+               if self.previous_revision:
+                       diff = difflib.unified_diff(
+                               self.previous_revision.markdown.splitlines(),
+                               self.markdown.splitlines(),
+                       )
+
+                       return "\n".join(diff)
+
+       @property
+       def watchers(self):
+               res = self.db.query("SELECT uid FROM wiki_watchlist \
+                       WHERE page = %s", self.page)
+
+               for row in res:
+                       # Search for account by UID and skip if none was found
+                       account = self.backend.accounts.get_by_uid(row.uid)
+                       if not account:
+                               continue
+
+                       # Return the account
+                       yield account
+
+       def is_watched_by(self, account):
+               res = self.db.get("SELECT 1 FROM wiki_watchlist \
+                       WHERE page = %s AND uid = %s", self.page, account.uid)
+
+               if res:
+                       return True
+
+               return False
+
+       def add_watcher(self, account):
+               if self.is_watched_by(account):
+                       return
+
+               self.db.execute("INSERT INTO wiki_watchlist(page, uid) \
+                       VALUES(%s, %s)", self.page, account.uid)
+
+       def remove_watcher(self, account):
+               self.db.execute("DELETE FROM wiki_watchlist \
+                       WHERE page = %s AND uid = %s", self.page, account.uid)
+
+       def _send_watcher_emails(self, excludes=[]):
+               # Nothing to do if there was no previous revision
+               if not self.previous_revision:
+                       return
+
+               for watcher in self.watchers:
+                       # Skip everyone who is excluded
+                       if watcher in excludes:
+                               logging.debug("Excluding %s" % watcher)
+                               continue
+
+                       logging.debug("Sending watcher email to %s" % watcher)
+
+                       # Compose message
+                       self.backend.messages.send_template("wiki/messages/page-changed",
+                               recipients=[watcher], page=self, priority=-10)
+
  
  class File(misc.Object):
         def init(self, id, data):
@@ -282,6 +475,18 @@ class File(misc.Object):
         def size(self):
                 return self.data.size
  
+       @lazy_property
+       def author(self):
+               if self.data.author_uid:
+                       return self.backend.accounts.get_by_uid(self.data.author_uid)
+
+       @property
+       def created_at(self):
+               return self.data.created_at
+
+       def is_pdf(self):
+               return self.mimetype in ("application/pdf", "application/x-pdf")
+
         def is_image(self):
                 return self.mimetype.startswith("image/")
  
@@ -294,6 +499,22 @@ class File(misc.Object):
                         return bytes(res.data)
  
         def get_thumbnail(self, size):
+               cache_key = "-".join((self.path, util.normalize(self.filename), self.created_at.isoformat(), "%spx" % size))
+
+               # Try to fetch the data from the cache
+               thumbnail = self.memcache.get(cache_key)
+               if thumbnail:
+                       return thumbnail
+
+               # Generate the thumbnail
+               thumbnail = self._generate_thumbnail(size)
+
+               # Put it into the cache for forever
+               self.memcache.set(cache_key, thumbnail)
+
+               return thumbnail
+
+       def _generate_thumbnail(self, size):
                 image = PIL.Image.open(io.BytesIO(self.blob))
  
                 # Resize the image to the desired resolution