bc779fdc907655ea227fa635540b3141313f23fe
[ipfire.org.git] / src / backend / wiki.py
1 #!/usr/bin/python3
2
3 import difflib
4 import logging
5 import os.path
6 import re
7 import tornado.gen
8 import urllib.parse
9
10 from . import misc
11 from . import util
12 from .decorators import *
13
14 INTERWIKIS = {
15         "google"    : ("https://www.google.com/search?q=%(url)s", None, "fab fa-google"),
16         "rfc"       : ("https://tools.ietf.org/html/rfc%(name)s", "RFC %s", None),
17         "wp"        : ("https://en.wikipedia.org/wiki/%(name)s", None, "fab fa-wikipedia-w"),
18 }
19
20 class Wiki(misc.Object):
21         def _get_pages(self, query, *args):
22                 res = self.db.query(query, *args)
23
24                 for row in res:
25                         yield Page(self.backend, row.id, data=row)
26
27         def _get_page(self, query, *args):
28                 res = self.db.get(query, *args)
29
30                 if res:
31                         return Page(self.backend, res.id, data=res)
32
33         def get_page_title(self, page, default=None):
34                 # Try to retrieve title from cache
35                 title = self.memcache.get("wiki:title:%s" % page)
36                 if title:
37                         return title
38
39                 # If the title has not been in the cache, we will
40                 # have to look it up
41                 doc = self.get_page(page)
42                 if doc:
43                         title = doc.title
44                 else:
45                         title = os.path.basename(page)
46
47                 # Save in cache for forever
48                 self.memcache.set("wiki:title:%s" % page, title)
49
50                 return title
51
52         def get_page(self, page, revision=None):
53                 page = Page.sanitise_page_name(page)
54                 assert page
55
56                 if revision:
57                         return self._get_page("SELECT * FROM wiki WHERE page = %s \
58                                 AND timestamp = %s", page, revision)
59                 else:
60                         return self._get_page("SELECT * FROM wiki WHERE page = %s \
61                                 ORDER BY timestamp DESC LIMIT 1", page)
62
63         def get_recent_changes(self, account, limit=None):
64                 pages = self._get_pages("SELECT * FROM wiki \
65                         ORDER BY timestamp DESC")
66
67                 for page in pages:
68                         if not page.check_acl(account):
69                                 continue
70
71                         yield page
72
73                         limit -= 1
74                         if not limit:
75                                 break
76
77         def create_page(self, page, author, content, changes=None, address=None):
78                 page = Page.sanitise_page_name(page)
79
80                 # Write page to the database
81                 page = self._get_page("INSERT INTO wiki(page, author_uid, markdown, changes, address) \
82                         VALUES(%s, %s, %s, %s, %s) RETURNING *", page, author.uid, content or None, changes, address)
83
84                 # Update cache
85                 self.memcache.set("wiki:title:%s" % page.page, page.title)
86
87                 # Send email to all watchers
88                 page._send_watcher_emails(excludes=[author])
89
90                 return page
91
92         def delete_page(self, page, author, **kwargs):
93                 # Do nothing if the page does not exist
94                 if not self.get_page(page):
95                         return
96
97                 # Just creates a blank last version of the page
98                 self.create_page(page, author=author, content=None, **kwargs)
99
100         def make_breadcrumbs(self, url):
101                 # Split and strip all empty elements (double slashes)
102                 parts = list(e for e in url.split("/") if e)
103
104                 ret = []
105                 for part in ("/".join(parts[:i]) for i in range(1, len(parts))):
106                         ret.append(("/%s" % part, self.get_page_title(part, os.path.basename(part))))
107
108                 return ret
109
110         def search(self, query, account=None, limit=None):
111                 query = util.parse_search_query(query)
112
113                 res = self._get_pages("SELECT wiki.* FROM wiki_search_index search_index \
114                         LEFT JOIN wiki ON search_index.wiki_id = wiki.id \
115                         WHERE search_index.document @@ to_tsquery('english', %s) \
116                                 ORDER BY ts_rank(search_index.document, to_tsquery('english', %s)) DESC",
117                         query, query)
118
119                 pages = []
120                 for page in res:
121                         # Skip any pages the user doesn't have permission for
122                         if not page.check_acl(account):
123                                 continue
124
125                         # Return any other pages
126                         pages.append(page)
127
128                         # Break when we have found enough pages
129                         if limit and len(pages) >= limit:
130                                 break
131
132                 return pages
133
134         def refresh(self):
135                 """
136                         Needs to be called after a page has been changed
137                 """
138                 self.db.execute("REFRESH MATERIALIZED VIEW wiki_search_index")
139
140         def get_watchlist(self, account):
141                 pages = self._get_pages(
142                         "WITH pages AS (SELECT * FROM wiki_current \
143                                         LEFT JOIN wiki ON wiki_current.id = wiki.id) \
144                         SELECT * FROM wiki_watchlist watchlist \
145                                 LEFT JOIN pages ON watchlist.page = pages.page \
146                                 WHERE watchlist.uid = %s",
147                         account.uid,
148                 )
149
150                 return sorted(pages)
151
152         # ACL
153
154         def check_acl(self, page, account):
155                 res = self.db.query("SELECT * FROM wiki_acls \
156                         WHERE %s ILIKE (path || '%%') ORDER BY LENGTH(path) DESC LIMIT 1", page)
157
158                 for row in res:
159                         # Access not permitted when user is not logged in
160                         if not account:
161                                 return False
162
163                         # If user is in a matching group, we grant permission
164                         for group in row.groups:
165                                 if group in account.groups:
166                                         return True
167
168                         # Otherwise access is not permitted
169                         return False
170
171                 # If no ACLs are found, we permit access
172                 return True
173
174         # Files
175
176         def _get_files(self, query, *args):
177                 res = self.db.query(query, *args)
178
179                 for row in res:
180                         yield File(self.backend, row.id, data=row)
181
182         def _get_file(self, query, *args):
183                 res = self.db.get(query, *args)
184
185                 if res:
186                         return File(self.backend, res.id, data=res)
187
188         def get_files(self, path):
189                 files = self._get_files("SELECT * FROM wiki_files \
190                         WHERE path = %s AND deleted_at IS NULL ORDER BY filename", path)
191
192                 return list(files)
193
194         def get_file_by_path(self, path):
195                 path, filename = os.path.dirname(path), os.path.basename(path)
196
197                 return self._get_file("SELECT * FROM wiki_files \
198                         WHERE path = %s AND filename = %s AND deleted_at IS NULL", path, filename)
199
200         def upload(self, path, filename, data, mimetype, author, address):
201                 # Upload the blob first
202                 blob = self.db.get("INSERT INTO wiki_blobs(data) VALUES(%s) RETURNING id", data)
203
204                 # Create entry for file
205                 return self._get_file("INSERT INTO wiki_files(path, filename, author_uid, address, \
206                         mimetype, blob_id, size) VALUES(%s,  %s, %s, %s, %s, %s, %s) RETURNING *", path,
207                         filename, author.uid, address, mimetype, blob.id, len(data))
208
209         def find_image(self, path, filename):
210                 for p in (path, os.path.dirname(path)):
211                         file = self.get_file_by_path(os.path.join(p, filename))
212
213                         if file and file.is_image():
214                                 return file
215
216         def render(self, path, text):
217                 r = WikiRenderer(self.backend, path)
218
219                 return r.render(text)
220
221
222 class Page(misc.Object):
223         def init(self, id, data=None):
224                 self.id = id
225                 self.data = data
226
227         def __repr__(self):
228                 return "<%s %s %s>" % (self.__class__.__name__, self.page, self.timestamp)
229
230         def __eq__(self, other):
231                 if isinstance(other, self.__class__):
232                         return self.id == other.id
233
234         def __lt__(self, other):
235                 if isinstance(other, self.__class__):
236                         if self.page == other.page:
237                                 return self.timestamp < other.timestamp
238
239                         return self.page < other.page
240
241         @staticmethod
242         def sanitise_page_name(page):
243                 if not page:
244                         return "/"
245
246                 # Make sure that the page name does NOT end with a /
247                 if page.endswith("/"):
248                         page = page[:-1]
249
250                 # Make sure the page name starts with a /
251                 if not page.startswith("/"):
252                         page = "/%s" % page
253
254                 # Remove any double slashes
255                 page = page.replace("//", "/")
256
257                 return page
258
259         @property
260         def url(self):
261                 return self.page
262
263         @property
264         def full_url(self):
265                 return "https://wiki.ipfire.org%s" % self.url
266
267         @property
268         def page(self):
269                 return self.data.page
270
271         @property
272         def title(self):
273                 return self._title or os.path.basename(self.page[1:])
274
275         @property
276         def _title(self):
277                 if not self.markdown:
278                         return
279
280                 # Find first H1 headline in markdown
281                 markdown = self.markdown.splitlines()
282
283                 m = re.match(r"^# (.*)( #)?$", markdown[0])
284                 if m:
285                         return m.group(1)
286
287         @lazy_property
288         def author(self):
289                 if self.data.author_uid:
290                         return self.backend.accounts.get_by_uid(self.data.author_uid)
291
292         @property
293         def markdown(self):
294                 return self.data.markdown or ""
295
296         @property
297         def html(self):
298                 return self.backend.wiki.render(self.page, self.markdown)
299
300         @property
301         def timestamp(self):
302                 return self.data.timestamp
303
304         def was_deleted(self):
305                 return self.markdown is None
306
307         @lazy_property
308         def breadcrumbs(self):
309                 return self.backend.wiki.make_breadcrumbs(self.page)
310
311         def get_latest_revision(self):
312                 revisions = self.get_revisions()
313
314                 # Return first object
315                 for rev in revisions:
316                         return rev
317
318         def get_revisions(self):
319                 return self.backend.wiki._get_pages("SELECT * FROM wiki \
320                         WHERE page = %s ORDER BY timestamp DESC", self.page)
321
322         @lazy_property
323         def previous_revision(self):
324                 return self.backend.wiki._get_page("SELECT * FROM wiki \
325                         WHERE page = %s AND timestamp < %s ORDER BY timestamp DESC \
326                         LIMIT 1", self.page, self.timestamp)
327
328         @property
329         def changes(self):
330                 return self.data.changes
331
332         # ACL
333
334         def check_acl(self, account):
335                 return self.backend.wiki.check_acl(self.page, account)
336
337         # Sidebar
338
339         @lazy_property
340         def sidebar(self):
341                 parts = self.page.split("/")
342
343                 while parts:
344                         sidebar = self.backend.wiki.get_page("%s/sidebar" % os.path.join(*parts))
345                         if sidebar:
346                                 return sidebar
347
348                         parts.pop()
349
350         # Watchers
351
352         @lazy_property
353         def diff(self):
354                 if self.previous_revision:
355                         diff = difflib.unified_diff(
356                                 self.previous_revision.markdown.splitlines(),
357                                 self.markdown.splitlines(),
358                         )
359
360                         return "\n".join(diff)
361
362         @property
363         def watchers(self):
364                 res = self.db.query("SELECT uid FROM wiki_watchlist \
365                         WHERE page = %s", self.page)
366
367                 for row in res:
368                         # Search for account by UID and skip if none was found
369                         account = self.backend.accounts.get_by_uid(row.uid)
370                         if not account:
371                                 continue
372
373                         # Return the account
374                         yield account
375
376         def is_watched_by(self, account):
377                 res = self.db.get("SELECT 1 FROM wiki_watchlist \
378                         WHERE page = %s AND uid = %s", self.page, account.uid)
379
380                 if res:
381                         return True
382
383                 return False
384
385         def add_watcher(self, account):
386                 if self.is_watched_by(account):
387                         return
388
389                 self.db.execute("INSERT INTO wiki_watchlist(page, uid) \
390                         VALUES(%s, %s)", self.page, account.uid)
391
392         def remove_watcher(self, account):
393                 self.db.execute("DELETE FROM wiki_watchlist \
394                         WHERE page = %s AND uid = %s", self.page, account.uid)
395
396         def _send_watcher_emails(self, excludes=[]):
397                 # Nothing to do if there was no previous revision
398                 if not self.previous_revision:
399                         return
400
401                 for watcher in self.watchers:
402                         # Skip everyone who is excluded
403                         if watcher in excludes:
404                                 logging.debug("Excluding %s" % watcher)
405                                 continue
406
407                         # Check permissions
408                         if not self.backend.wiki.check_acl(self.page, watcher):
409                                 logging.debug("Watcher %s does not have permissions" % watcher)
410                                 continue
411
412                         logging.debug("Sending watcher email to %s" % watcher)
413
414                         # Compose message
415                         self.backend.messages.send_template("wiki/messages/page-changed",
416                                 recipients=[watcher], page=self, priority=-10)
417
418
419 class File(misc.Object):
420         def init(self, id, data):
421                 self.id   = id
422                 self.data = data
423
424         @property
425         def url(self):
426                 return os.path.join(self.path, self.filename)
427
428         @property
429         def path(self):
430                 return self.data.path
431
432         @property
433         def filename(self):
434                 return self.data.filename
435
436         @property
437         def mimetype(self):
438                 return self.data.mimetype
439
440         @property
441         def size(self):
442                 return self.data.size
443
444         @lazy_property
445         def author(self):
446                 if self.data.author_uid:
447                         return self.backend.accounts.get_by_uid(self.data.author_uid)
448
449         @property
450         def created_at(self):
451                 return self.data.created_at
452
453         def is_pdf(self):
454                 return self.mimetype in ("application/pdf", "application/x-pdf")
455
456         def is_image(self):
457                 return self.mimetype.startswith("image/")
458
459         @lazy_property
460         def blob(self):
461                 res = self.db.get("SELECT data FROM wiki_blobs \
462                         WHERE id = %s", self.data.blob_id)
463
464                 if res:
465                         return bytes(res.data)
466
467         def get_thumbnail(self, size):
468                 cache_key = "-".join((self.path, util.normalize(self.filename), self.created_at.isoformat(), "%spx" % size))
469
470                 # Try to fetch the data from the cache
471                 thumbnail = self.memcache.get(cache_key)
472                 if thumbnail:
473                         return thumbnail
474
475                 # Generate the thumbnail
476                 thumbnail = util.generate_thumbnail(self.blob, size)
477
478                 # Put it into the cache for forever
479                 self.memcache.set(cache_key, thumbnail)
480
481                 return thumbnail
482
483
484 class WikiRenderer(misc.Object):
485         # Wiki links
486         wiki_link = re.compile(r"\[\[([\w\d\/\-\.]+)(?:\|(.+?))?\]\]")
487
488         # External links
489         external_link = re.compile(r"\[\[((?:ftp|git|https?|rsync|sftp|ssh|webcal)\:\/\/.+?)(?:\|(.+?))?\]\]")
490
491         # Interwiki links e.g. [[wp>IPFire]]
492         interwiki_link = re.compile(r"\[\[(\w+)>(.+?)(?:\|(.+?))?\]\]")
493
494         # Mail link
495         email_link = re.compile(r"\[\[([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)(?:\|(.+?))?\]\]")
496
497         def init(self, path):
498                 self.path = path
499
500         def _render_wiki_link(self, m):
501                 path, alias = m.groups()
502
503                 # Nothing to do for absolute links
504                 if path.startswith("/"):
505                         pass
506
507                 # Relative links (one-level down)
508                 elif path.startswith("./"):
509                         path = os.path.join(self.path, path)
510
511                 # All other relative links
512                 else:
513                         p = os.path.dirname(self.path)
514                         path = os.path.join(p, path)
515
516                 # Normalise links
517                 path = os.path.normpath(path)
518
519                 return """<a href="%s">%s</a>""" % (
520                         path,
521                         alias or self.backend.wiki.get_page_title(path),
522                 )
523
524         def _render_external_link(self, m):
525                 url, alias = m.groups()
526
527                 return """<a class="link-external" href="%s">%s</a>""" % (url, alias or url)
528
529         def _render_interwiki_link(self, m):
530                 wiki = m.group(1)
531                 if not wiki:
532                         return
533
534                 # Retrieve URL
535                 try:
536                         url, repl, icon = INTERWIKIS[wiki]
537                 except KeyError:
538                         logging.warning("Invalid interwiki: %s" % wiki)
539                         return
540
541                 # Name of the page
542                 name = m.group(2)
543
544                 # Expand URL
545                 url = url % {
546                         "name" : name,
547                         "url"  : urllib.parse.quote(name),
548                 }
549
550                 # Get alias (if present)
551                 alias = m.group(3)
552
553                 if not alias and repl:
554                         alias = repl % name
555
556                 # Put everything together
557                 s = []
558
559                 if icon:
560                         s.append("<span class=\"%s\"></span>" % icon)
561
562                 s.append("""<a class="link-external" href="%s">%s</a>""" % (url, alias or name))
563
564                 return " ".join(s)
565
566         def _render_email_link(self, m):
567                 address, alias = m.groups()
568
569                 return """<a class="link-external" href="mailto:%s">%s</a>""" \
570                         % (address, alias or address)
571
572         def render(self, text):
573                 logging.debug("Rendering %s" % self.path)
574
575                 # Link images
576                 replacements = []
577                 for match in re.finditer(r"!\[(.*?)\]\((.*?)\)", text):
578                         alt_text, url = match.groups()
579
580                         # Skip any absolute and external URLs
581                         if url.startswith("/") or url.startswith("https://") or url.startswith("http://"):
582                                 continue
583
584                         # Try to split query string
585                         url, delimiter, qs = url.partition("?")
586
587                         # Parse query arguments
588                         args = urllib.parse.parse_qs(qs)
589
590                         # Find image
591                         file = self.backend.wiki.find_image(self.path, url)
592                         if not file:
593                                 continue
594
595                         # Scale down the image if not already done
596                         if not "s" in args:
597                                 args["s"] = "768"
598
599                         # Format URL
600                         url = "%s?%s" % (file.url, urllib.parse.urlencode(args))
601
602                         replacements.append((match.span(), file, alt_text, url))
603
604                 # Apply all replacements
605                 for (start, end), file, alt_text, url in reversed(replacements):
606                         text = text[:start] + "[![%s](%s)](%s?action=detail)" % (alt_text, url, file.url) + text[end:]
607
608                 # Handle wiki links
609                 text = self.wiki_link.sub(self._render_wiki_link, text)
610
611                 # Handle interwiki links
612                 text = self.interwiki_link.sub(self._render_interwiki_link, text)
613
614                 # Handle external links
615                 text = self.external_link.sub(self._render_external_link, text)
616
617                 # Handle email links
618                 text = self.email_link.sub(self._render_email_link, text)
619
620                 # Borrow this from the blog
621                 return self.backend.blog._render_text(text, lang="markdown")