src/backend/wiki.py

   1 #!/usr/bin/python3
   2
   3 import PIL
   4 import difflib
   5 import io
   6 import logging
   7 import os.path
   8 import re
   9 import urllib.parse
  10
  11 from . import misc
  12 from . import util
  13 from .decorators import *
  14
  15 class Wiki(misc.Object):
  16         def _get_pages(self, query, *args):
  17                 res = self.db.query(query, *args)
  18
  19                 for row in res:
  20                         yield Page(self.backend, row.id, data=row)
  21
  22         def _get_page(self, query, *args):
  23                 res = self.db.get(query, *args)
  24
  25                 if res:
  26                         return Page(self.backend, res.id, data=res)
  27
  28         def get_page_title(self, page, default=None):
  29                 doc = self.get_page(page)
  30                 if doc:
  31                         return doc.title
  32
  33                 return default or os.path.basename(page)
  34
  35         def get_page(self, page, revision=None):
  36                 page = Page.sanitise_page_name(page)
  37                 assert page
  38
  39                 if revision:
  40                         return self._get_page("SELECT * FROM wiki WHERE page = %s \
  41                                 AND timestamp = %s", page, revision)
  42                 else:
  43                         return self._get_page("SELECT * FROM wiki WHERE page = %s \
  44                                 ORDER BY timestamp DESC LIMIT 1", page)
  45
  46         def get_recent_changes(self, account, limit=None):
  47                 pages = self._get_pages("SELECT * FROM wiki \
  48                         WHERE timestamp >= NOW() - INTERVAL '4 weeks' \
  49                         ORDER BY timestamp DESC")
  50
  51                 for page in pages:
  52                         if not page.check_acl(account):
  53                                 continue
  54
  55                         yield page
  56
  57                         limit -= 1
  58                         if not limit:
  59                                 break
  60
  61         def create_page(self, page, author, content, changes=None, address=None):
  62                 page = Page.sanitise_page_name(page)
  63
  64                 # Write page to the database
  65                 page = self._get_page("INSERT INTO wiki(page, author_uid, markdown, changes, address) \
  66                         VALUES(%s, %s, %s, %s, %s) RETURNING *", page, author.uid, content or None, changes, address)
  67
  68                 # Send email to all watchers
  69                 page._send_watcher_emails(excludes=[author])
  70
  71                 return page
  72
  73         def delete_page(self, page, author, **kwargs):
  74                 # Do nothing if the page does not exist
  75                 if not self.get_page(page):
  76                         return
  77
  78                 # Just creates a blank last version of the page
  79                 self.create_page(page, author=author, content=None, **kwargs)
  80
  81         def make_breadcrumbs(self, url):
  82                 # Split and strip all empty elements (double slashes)
  83                 parts = list(e for e in url.split("/") if e)
  84
  85                 ret = []
  86                 for part in ("/".join(parts[:i]) for i in range(1, len(parts))):
  87                         ret.append(("/%s" % part, self.get_page_title(part, os.path.basename(part))))
  88
  89                 return ret
  90
  91         def search(self, query, account=None, limit=None):
  92                 query = util.parse_search_query(query)
  93
  94                 res = self._get_pages("SELECT wiki.* FROM wiki_search_index search_index \
  95                         LEFT JOIN wiki ON search_index.wiki_id = wiki.id \
  96                         WHERE search_index.document @@ to_tsquery('english', %s) \
  97                                 ORDER BY ts_rank(search_index.document, to_tsquery('english', %s)) DESC",
  98                         query, query)
  99
 100                 for page in res:
 101                         # Skip any pages the user doesn't have permission for
 102                         if not page.check_acl(account):
 103                                 continue
 104
 105                         # Return any other pages
 106                         yield page
 107
 108                         limit -= 1
 109                         if not limit:
 110                                 break
 111
 112         def refresh(self):
 113                 """
 114                         Needs to be called after a page has been changed
 115                 """
 116                 self.db.execute("REFRESH MATERIALIZED VIEW wiki_search_index")
 117
 118         # ACL
 119
 120         def check_acl(self, page, account):
 121                 res = self.db.query("SELECT * FROM wiki_acls \
 122                         WHERE %s ILIKE (path || '%%') ORDER BY LENGTH(path) DESC LIMIT 1", page)
 123
 124                 for row in res:
 125                         # Access not permitted when user is not logged in
 126                         if not account:
 127                                 return False
 128
 129                         # If user is in a matching group, we grant permission
 130                         for group in row.groups:
 131                                 if group in account.groups:
 132                                         return True
 133
 134                         # Otherwise access is not permitted
 135                         return False
 136
 137                 # If no ACLs are found, we permit access
 138                 return True
 139
 140         # Files
 141
 142         def _get_files(self, query, *args):
 143                 res = self.db.query(query, *args)
 144
 145                 for row in res:
 146                         yield File(self.backend, row.id, data=row)
 147
 148         def _get_file(self, query, *args):
 149                 res = self.db.get(query, *args)
 150
 151                 if res:
 152                         return File(self.backend, res.id, data=res)
 153
 154         def get_files(self, path):
 155                 files = self._get_files("SELECT * FROM wiki_files \
 156                         WHERE path = %s AND deleted_at IS NULL ORDER BY filename", path)
 157
 158                 return list(files)
 159
 160         def get_file_by_path(self, path):
 161                 path, filename = os.path.dirname(path), os.path.basename(path)
 162
 163                 return self._get_file("SELECT * FROM wiki_files \
 164                         WHERE path = %s AND filename = %s AND deleted_at IS NULL", path, filename)
 165
 166         def upload(self, path, filename, data, mimetype, author, address):
 167                 # Upload the blob first
 168                 blob = self.db.get("INSERT INTO wiki_blobs(data) VALUES(%s) RETURNING id", data)
 169
 170                 # Create entry for file
 171                 return self._get_file("INSERT INTO wiki_files(path, filename, author_uid, address, \
 172                         mimetype, blob_id, size) VALUES(%s,  %s, %s, %s, %s, %s, %s) RETURNING *", path,
 173                         filename, author.uid, address, mimetype, blob.id, len(data))
 174
 175         def find_image(self, path, filename):
 176                 for p in (path, os.path.dirname(path)):
 177                         file = self.get_file_by_path(os.path.join(p, filename))
 178
 179                         if file and file.is_image():
 180                                 return file
 181
 182
 183 class Page(misc.Object):
 184         def init(self, id, data=None):
 185                 self.id = id
 186                 self.data = data
 187
 188         def __repr__(self):
 189                 return "<%s %s %s>" % (self.__class__.__name__, self.page, self.timestamp)
 190
 191         def __eq__(self, other):
 192                 if isinstance(other, self.__class__):
 193                         return self.id == other.id
 194
 195         def __lt__(self, other):
 196                 if isinstance(other, self.__class__):
 197                         if self.page == other.page:
 198                                 return self.timestamp < other.timestamp
 199
 200                         return self.page < other.page
 201
 202         @staticmethod
 203         def sanitise_page_name(page):
 204                 if not page:
 205                         return "/"
 206
 207                 # Make sure that the page name does NOT end with a /
 208                 if page.endswith("/"):
 209                         page = page[:-1]
 210
 211                 # Make sure the page name starts with a /
 212                 if not page.startswith("/"):
 213                         page = "/%s" % page
 214
 215                 # Remove any double slashes
 216                 page = page.replace("//", "/")
 217
 218                 return page
 219
 220         @property
 221         def url(self):
 222                 return self.page
 223
 224         @property
 225         def full_url(self):
 226                 return "https://wiki.ipfire.org%s" % self.url
 227
 228         @property
 229         def page(self):
 230                 return self.data.page
 231
 232         @property
 233         def title(self):
 234                 return self._title or os.path.basename(self.page[1:])
 235
 236         @property
 237         def _title(self):
 238                 if not self.markdown:
 239                         return
 240
 241                 # Find first H1 headline in markdown
 242                 markdown = self.markdown.splitlines()
 243
 244                 m = re.match(r"^# (.*)( #)?$", markdown[0])
 245                 if m:
 246                         return m.group(1)
 247
 248         @lazy_property
 249         def author(self):
 250                 if self.data.author_uid:
 251                         return self.backend.accounts.get_by_uid(self.data.author_uid)
 252
 253         def _render(self, text):
 254                 logging.debug("Rendering %s" % self)
 255
 256                 # Link images
 257                 replacements = []
 258                 for match in re.finditer(r"!\[(.*)\]\((.*)\)", text):
 259                         alt_text, url = match.groups()
 260
 261                         # Skip any absolute and external URLs
 262                         if url.startswith("/") or url.startswith("https://") or url.startswith("http://"):
 263                                 continue
 264
 265                         # Try to split query string
 266                         url, delimiter, qs = url.partition("?")
 267
 268                         # Parse query arguments
 269                         args = urllib.parse.parse_qs(qs)
 270
 271                         # Find image
 272                         file = self.backend.wiki.find_image(self.page, url)
 273                         if not file:
 274                                 continue
 275
 276                         # Scale down the image if not already done
 277                         if not "s" in args:
 278                                 args["s"] = "768"
 279
 280                         # Format URL
 281                         url = "%s?%s" % (file.url, urllib.parse.urlencode(args))
 282
 283                         replacements.append((match.span(), file, alt_text, url))
 284
 285                 # Apply all replacements
 286                 for (start, end), file, alt_text, url in reversed(replacements):
 287                         text = text[:start] + "[![%s](%s)](%s?action=detail)" % (alt_text, url, file.url) + text[end:]
 288
 289                 # Add wiki links
 290                 patterns = (
 291                         (r"\[\[([\w\d\/]+)(?:\|([\w\d\s]+))\]\]", r"/\1", r"\2", None, None),
 292                         (r"\[\[([\w\d\/\-]+)\]\]", r"/\1", r"\1", self.backend.wiki.get_page_title, r"\1"),
 293                 )
 294
 295                 for pattern, link, title, repl, args in patterns:
 296                         replacements = []
 297
 298                         for match in re.finditer(pattern, text):
 299                                 l = match.expand(link)
 300                                 t = match.expand(title)
 301
 302                                 if callable(repl):
 303                                         t = repl(match.expand(args)) or t
 304
 305                                 replacements.append((match.span(), t or l, l))
 306
 307                         # Apply all replacements
 308                         for (start, end), t, l in reversed(replacements):
 309                                 text = text[:start] + "[%s](%s)" % (t, l) + text[end:]
 310
 311                 # Borrow this from the blog
 312                 return self.backend.blog._render_text(text, lang="markdown")
 313
 314         @property
 315         def markdown(self):
 316                 return self.data.markdown or ""
 317
 318         @property
 319         def html(self):
 320                 return self.data.html or self._render(self.markdown)
 321
 322         @property
 323         def timestamp(self):
 324                 return self.data.timestamp
 325
 326         def was_deleted(self):
 327                 return self.markdown is None
 328
 329         @lazy_property
 330         def breadcrumbs(self):
 331                 return self.backend.wiki.make_breadcrumbs(self.page)
 332
 333         def get_latest_revision(self):
 334                 revisions = self.get_revisions()
 335
 336                 # Return first object
 337                 for rev in revisions:
 338                         return rev
 339
 340         def get_revisions(self):
 341                 return self.backend.wiki._get_pages("SELECT * FROM wiki \
 342                         WHERE page = %s ORDER BY timestamp DESC", self.page)
 343
 344         @lazy_property
 345         def previous_revision(self):
 346                 return self.backend.wiki._get_page("SELECT * FROM wiki \
 347                         WHERE page = %s AND timestamp < %s ORDER BY timestamp DESC \
 348                         LIMIT 1", self.page, self.timestamp)
 349
 350         @property
 351         def changes(self):
 352                 return self.data.changes
 353
 354         # ACL
 355
 356         def check_acl(self, account):
 357                 return self.backend.wiki.check_acl(self.page, account)
 358
 359         # Sidebar
 360
 361         @lazy_property
 362         def sidebar(self):
 363                 parts = self.page.split("/")
 364
 365                 while parts:
 366                         sidebar = self.backend.wiki.get_page("%s/sidebar" % os.path.join(*parts))
 367                         if sidebar:
 368                                 return sidebar
 369
 370                         parts.pop()
 371
 372         # Watchers
 373
 374         @lazy_property
 375         def diff(self):
 376                 if self.previous_revision:
 377                         diff = difflib.unified_diff(
 378                                 self.previous_revision.markdown.splitlines(),
 379                                 self.markdown.splitlines(),
 380                         )
 381
 382                         return "\n".join(diff)
 383
 384         @property
 385         def watchers(self):
 386                 res = self.db.query("SELECT uid FROM wiki_watchlist \
 387                         WHERE page = %s", self.page)
 388
 389                 for row in res:
 390                         # Search for account by UID and skip if none was found
 391                         account = self.backend.accounts.get_by_uid(row.uid)
 392                         if not account:
 393                                 continue
 394
 395                         # Return the account
 396                         yield account
 397
 398         def is_watched_by(self, account):
 399                 res = self.db.get("SELECT 1 FROM wiki_watchlist \
 400                         WHERE page = %s AND uid = %s", self.page, account.uid)
 401
 402                 if res:
 403                         return True
 404
 405                 return False
 406
 407         def add_watcher(self, account):
 408                 if self.is_watched_by(account):
 409                         return
 410
 411                 self.db.execute("INSERT INTO wiki_watchlist(page, uid) \
 412                         VALUES(%s, %s)", self.page, account.uid)
 413
 414         def remove_watcher(self, account):
 415                 self.db.execute("DELETE FROM wiki_watchlist \
 416                         WHERE page = %s AND uid = %s", self.page, account.uid)
 417
 418         def _send_watcher_emails(self, excludes=[]):
 419                 # Nothing to do if there was no previous revision
 420                 if not self.previous_revision:
 421                         return
 422
 423                 for watcher in self.watchers:
 424                         # Skip everyone who is excluded
 425                         if watcher in excludes:
 426                                 logging.debug("Excluding %s" % watcher)
 427                                 continue
 428
 429                         logging.debug("Sending watcher email to %s" % watcher)
 430
 431                         # Compose message
 432                         self.backend.messages.send_template("wiki/messages/page-changed",
 433                                 recipients=[watcher], page=self, priority=-10)
 434
 435
 436 class File(misc.Object):
 437         def init(self, id, data):
 438                 self.id   = id
 439                 self.data = data
 440
 441         @property
 442         def url(self):
 443                 return os.path.join(self.path, self.filename)
 444
 445         @property
 446         def path(self):
 447                 return self.data.path
 448
 449         @property
 450         def filename(self):
 451                 return self.data.filename
 452
 453         @property
 454         def mimetype(self):
 455                 return self.data.mimetype
 456
 457         @property
 458         def size(self):
 459                 return self.data.size
 460
 461         @lazy_property
 462         def author(self):
 463                 if self.data.author_uid:
 464                         return self.backend.accounts.get_by_uid(self.data.author_uid)
 465
 466         @property
 467         def created_at(self):
 468                 return self.data.created_at
 469
 470         def is_pdf(self):
 471                 return self.mimetype in ("application/pdf", "application/x-pdf")
 472
 473         def is_image(self):
 474                 return self.mimetype.startswith("image/")
 475
 476         @lazy_property
 477         def blob(self):
 478                 res = self.db.get("SELECT data FROM wiki_blobs \
 479                         WHERE id = %s", self.data.blob_id)
 480
 481                 if res:
 482                         return bytes(res.data)
 483
 484         def get_thumbnail(self, size):
 485                 cache_key = "-".join((self.path, util.normalize(self.filename), self.created_at.isoformat(), "%spx" % size))
 486
 487                 # Try to fetch the data from the cache
 488                 thumbnail = self.memcache.get(cache_key)
 489                 if thumbnail:
 490                         return thumbnail
 491
 492                 # Generate the thumbnail
 493                 thumbnail = self._generate_thumbnail(size)
 494
 495                 # Put it into the cache for forever
 496                 self.memcache.set(cache_key, thumbnail)
 497
 498                 return thumbnail
 499
 500         def _generate_thumbnail(self, size):
 501                 image = PIL.Image.open(io.BytesIO(self.blob))
 502
 503                 # Resize the image to the desired resolution
 504                 image.thumbnail((size, size), PIL.Image.ANTIALIAS)
 505
 506                 with io.BytesIO() as f:
 507                         # If writing out the image does not work with optimization,
 508                         # we try to write it out without any optimization.
 509                         try:
 510                                 image.save(f, image.format, optimize=True, quality=98)
 511                         except:
 512                                 image.save(f, image.format, quality=98)
 513
 514                         return f.getvalue()