]> git.ipfire.org Git - ipfire.org.git/blob - src/backend/wiki.py
wiki: Migrate from memcache to redis
[ipfire.org.git] / src / backend / wiki.py
1 #!/usr/bin/python3
2
3 import difflib
4 import logging
5 import os.path
6 import re
7 import urllib.parse
8
9 from . import misc
10 from . import util
11 from .decorators import *
12
13 class Wiki(misc.Object):
14 def _get_pages(self, query, *args):
15 res = self.db.query(query, *args)
16
17 for row in res:
18 yield Page(self.backend, row.id, data=row)
19
20 def _get_page(self, query, *args):
21 res = self.db.get(query, *args)
22
23 if res:
24 return Page(self.backend, res.id, data=res)
25
26 def __iter__(self):
27 return self._get_pages(
28 "SELECT wiki.* FROM wiki_current current \
29 LEFT JOIN wiki ON current.id = wiki.id \
30 WHERE current.deleted IS FALSE \
31 ORDER BY page",
32 )
33
34 def make_path(self, page, path):
35 # Nothing to do for absolute links
36 if path.startswith("/"):
37 pass
38
39 # Relative links (one-level down)
40 elif path.startswith("./"):
41 path = os.path.join(page, path)
42
43 # All other relative links
44 else:
45 p = os.path.dirname(page)
46 path = os.path.join(p, path)
47
48 # Normalise links
49 return os.path.normpath(path)
50
51 def page_exists(self, path):
52 page = self.get_page(path)
53
54 # Page must have been found and not deleted
55 return page and not page.was_deleted()
56
57 def get_page_title(self, page, default=None):
58 doc = self.get_page(page)
59 if doc:
60 title = doc.title
61 else:
62 title = os.path.basename(page)
63
64 return title
65
66 def get_page(self, page, revision=None):
67 page = Page.sanitise_page_name(page)
68
69 # Split the path into parts
70 parts = page.split("/")
71
72 # Check if this is an action
73 if any((part.startswith("_") for part in parts)):
74 return
75
76 if revision:
77 return self._get_page("SELECT * FROM wiki WHERE page = %s \
78 AND timestamp = %s", page, revision)
79 else:
80 return self._get_page("SELECT * FROM wiki WHERE page = %s \
81 ORDER BY timestamp DESC LIMIT 1", page)
82
83 def get_recent_changes(self, account, limit=None):
84 pages = self._get_pages("SELECT * FROM wiki \
85 ORDER BY timestamp DESC")
86
87 for page in pages:
88 if not page.check_acl(account):
89 continue
90
91 yield page
92
93 limit -= 1
94 if not limit:
95 break
96
97 def create_page(self, page, author, content, changes=None, address=None):
98 page = Page.sanitise_page_name(page)
99
100 # Write page to the database
101 page = self._get_page("INSERT INTO wiki(page, author_uid, markdown, changes, address) \
102 VALUES(%s, %s, %s, %s, %s) RETURNING *", page, author.uid, content or None, changes, address)
103
104 # Send email to all watchers
105 page._send_watcher_emails(excludes=[author])
106
107 return page
108
109 def delete_page(self, page, author, **kwargs):
110 # Do nothing if the page does not exist
111 if not self.get_page(page):
112 return
113
114 # Just creates a blank last version of the page
115 self.create_page(page, author=author, content=None, **kwargs)
116
117 def make_breadcrumbs(self, url):
118 # Split and strip all empty elements (double slashes)
119 parts = list(e for e in url.split("/") if e)
120
121 ret = []
122 for part in ("/".join(parts[:i]) for i in range(1, len(parts))):
123 ret.append(("/%s" % part, self.get_page_title(part, os.path.basename(part))))
124
125 return ret
126
127 def search(self, query, account=None, limit=None):
128 res = self._get_pages("SELECT wiki.* FROM wiki_search_index search_index \
129 LEFT JOIN wiki ON search_index.wiki_id = wiki.id \
130 WHERE search_index.document @@ websearch_to_tsquery('english', %s) \
131 ORDER BY ts_rank(search_index.document, websearch_to_tsquery('english', %s)) DESC",
132 query, query)
133
134 pages = []
135 for page in res:
136 # Skip any pages the user doesn't have permission for
137 if not page.check_acl(account):
138 continue
139
140 # Return any other pages
141 pages.append(page)
142
143 # Break when we have found enough pages
144 if limit and len(pages) >= limit:
145 break
146
147 return pages
148
149 def refresh(self):
150 """
151 Needs to be called after a page has been changed
152 """
153 self.db.execute("REFRESH MATERIALIZED VIEW wiki_search_index")
154
155 def get_watchlist(self, account):
156 pages = self._get_pages("""
157 WITH pages AS (
158 SELECT
159 *
160 FROM
161 wiki_current
162 LEFT JOIN
163 wiki ON wiki_current.id = wiki.id
164 )
165
166 SELECT
167 *
168 FROM
169 wiki_watchlist watchlist
170 JOIN
171 pages ON watchlist.page = pages.page
172 WHERE
173 watchlist.uid = %s
174 """, account.uid,
175 )
176
177 return sorted(pages)
178
179 # ACL
180
181 def check_acl(self, page, account):
182 res = self.db.query("SELECT * FROM wiki_acls \
183 WHERE %s ILIKE (path || '%%') ORDER BY LENGTH(path) DESC LIMIT 1", page)
184
185 for row in res:
186 # Access not permitted when user is not logged in
187 if not account:
188 return False
189
190 # If user is in a matching group, we grant permission
191 for group in row.groups:
192 if account.is_member_of_group(group):
193 return True
194
195 # Otherwise access is not permitted
196 return False
197
198 # If no ACLs are found, we permit access
199 return True
200
201 # Files
202
203 def _get_files(self, query, *args):
204 res = self.db.query(query, *args)
205
206 for row in res:
207 yield File(self.backend, row.id, data=row)
208
209 def _get_file(self, query, *args):
210 res = self.db.get(query, *args)
211
212 if res:
213 return File(self.backend, res.id, data=res)
214
215 def get_files(self, path):
216 files = self._get_files("SELECT * FROM wiki_files \
217 WHERE path = %s AND deleted_at IS NULL ORDER BY filename", path)
218
219 return list(files)
220
221 def get_file_by_path(self, path, revision=None):
222 path, filename = os.path.dirname(path), os.path.basename(path)
223
224 if revision:
225 # Fetch a specific revision
226 return self._get_file("SELECT * FROM wiki_files \
227 WHERE path = %s AND filename = %s AND created_at <= %s \
228 ORDER BY created_at DESC LIMIT 1", path, filename, revision)
229
230 # Fetch latest version
231 return self._get_file("SELECT * FROM wiki_files \
232 WHERE path = %s AND filename = %s AND deleted_at IS NULL",
233 path, filename)
234
235 def get_file_by_path_and_filename(self, path, filename):
236 return self._get_file("SELECT * FROM wiki_files \
237 WHERE path = %s AND filename = %s AND deleted_at IS NULL",
238 path, filename)
239
240 def upload(self, path, filename, data, mimetype, author, address):
241 # Replace any existing files
242 file = self.get_file_by_path_and_filename(path, filename)
243 if file:
244 file.delete(author)
245
246 # Upload the blob first
247 blob = self.db.get("INSERT INTO wiki_blobs(data) VALUES(%s) \
248 ON CONFLICT (digest(data, %s)) DO UPDATE SET data = EXCLUDED.data \
249 RETURNING id", data, "MD5")
250
251 # Create entry for file
252 return self._get_file("INSERT INTO wiki_files(path, filename, author_uid, address, \
253 mimetype, blob_id, size) VALUES(%s, %s, %s, %s, %s, %s, %s) RETURNING *", path,
254 filename, author.uid, address, mimetype, blob.id, len(data))
255
256 def render(self, path, text):
257 r = WikiRenderer(self.backend, path)
258
259 return r.render(text)
260
261
262 class Page(misc.Object):
263 def init(self, id, data=None):
264 self.id = id
265 self.data = data
266
267 def __repr__(self):
268 return "<%s %s %s>" % (self.__class__.__name__, self.page, self.timestamp)
269
270 def __eq__(self, other):
271 if isinstance(other, self.__class__):
272 return self.id == other.id
273
274 return NotImplemented
275
276 def __lt__(self, other):
277 if isinstance(other, self.__class__):
278 if self.page == other.page:
279 return self.timestamp < other.timestamp
280
281 return self.page < other.page
282
283 return NotImplemented
284
285 @staticmethod
286 def sanitise_page_name(page):
287 if not page:
288 return "/"
289
290 # Make sure that the page name does NOT end with a /
291 if page.endswith("/"):
292 page = page[:-1]
293
294 # Make sure the page name starts with a /
295 if not page.startswith("/"):
296 page = "/%s" % page
297
298 # Remove any double slashes
299 page = page.replace("//", "/")
300
301 return page
302
303 @property
304 def url(self):
305 return "/docs%s" % self.page
306
307 @property
308 def full_url(self):
309 return "https://www.ipfire.org%s" % self.url
310
311 @property
312 def page(self):
313 return self.data.page
314
315 @property
316 def title(self):
317 return self._title or os.path.basename(self.page[1:])
318
319 @property
320 def _title(self):
321 if not self.markdown:
322 return
323
324 # Find first H1 headline in markdown
325 markdown = self.markdown.splitlines()
326
327 m = re.match(r"^#\s*(.*)( #)?$", markdown[0])
328 if m:
329 return m.group(1)
330
331 @lazy_property
332 def author(self):
333 if self.data.author_uid:
334 return self.backend.accounts.get_by_uid(self.data.author_uid)
335
336 @property
337 def markdown(self):
338 return self.data.markdown or ""
339
340 @property
341 def html(self):
342 lines = []
343
344 # Strip off the first line if it contains a heading (as it will be shown separately)
345 for i, line in enumerate(self.markdown.splitlines()):
346 if i == 0 and line.startswith("#"):
347 continue
348
349 lines.append(line)
350
351 return self.backend.wiki.render(self.page, "\n".join(lines))
352
353 @property
354 def timestamp(self):
355 return self.data.timestamp
356
357 def was_deleted(self):
358 return not self.markdown
359
360 @lazy_property
361 def breadcrumbs(self):
362 return self.backend.wiki.make_breadcrumbs(self.page)
363
364 def is_latest_revision(self):
365 return self.get_latest_revision() == self
366
367 def get_latest_revision(self):
368 revisions = self.get_revisions()
369
370 # Return first object
371 for rev in revisions:
372 return rev
373
374 def get_revisions(self):
375 return self.backend.wiki._get_pages("SELECT * FROM wiki \
376 WHERE page = %s ORDER BY timestamp DESC", self.page)
377
378 @lazy_property
379 def previous_revision(self):
380 return self.backend.wiki._get_page("SELECT * FROM wiki \
381 WHERE page = %s AND timestamp < %s ORDER BY timestamp DESC \
382 LIMIT 1", self.page, self.timestamp)
383
384 @property
385 def changes(self):
386 return self.data.changes
387
388 # ACL
389
390 def check_acl(self, account):
391 return self.backend.wiki.check_acl(self.page, account)
392
393 # Watchers
394
395 @lazy_property
396 def diff(self):
397 if self.previous_revision:
398 diff = difflib.unified_diff(
399 self.previous_revision.markdown.splitlines(),
400 self.markdown.splitlines(),
401 )
402
403 return "\n".join(diff)
404
405 @property
406 def watchers(self):
407 res = self.db.query("SELECT uid FROM wiki_watchlist \
408 WHERE page = %s", self.page)
409
410 for row in res:
411 # Search for account by UID and skip if none was found
412 account = self.backend.accounts.get_by_uid(row.uid)
413 if not account:
414 continue
415
416 # Return the account
417 yield account
418
419 def is_watched_by(self, account):
420 res = self.db.get("SELECT 1 FROM wiki_watchlist \
421 WHERE page = %s AND uid = %s", self.page, account.uid)
422
423 if res:
424 return True
425
426 return False
427
428 def add_watcher(self, account):
429 if self.is_watched_by(account):
430 return
431
432 self.db.execute("INSERT INTO wiki_watchlist(page, uid) \
433 VALUES(%s, %s)", self.page, account.uid)
434
435 def remove_watcher(self, account):
436 self.db.execute("DELETE FROM wiki_watchlist \
437 WHERE page = %s AND uid = %s", self.page, account.uid)
438
439 def _send_watcher_emails(self, excludes=[]):
440 # Nothing to do if there was no previous revision
441 if not self.previous_revision:
442 return
443
444 for watcher in self.watchers:
445 # Skip everyone who is excluded
446 if watcher in excludes:
447 logging.debug("Excluding %s" % watcher)
448 continue
449
450 # Check permissions
451 if not self.backend.wiki.check_acl(self.page, watcher):
452 logging.debug("Watcher %s does not have permissions" % watcher)
453 continue
454
455 logging.debug("Sending watcher email to %s" % watcher)
456
457 # Compose message
458 self.backend.messages.send_template("wiki/messages/page-changed",
459 account=watcher, page=self, priority=-10)
460
461 def restore(self, author, address, comment=None):
462 changes = "Restore to revision from %s" % self.timestamp.isoformat()
463
464 # Append comment
465 if comment:
466 changes = "%s: %s" % (changes, comment)
467
468 return self.backend.wiki.create_page(self.page,
469 author, self.markdown, changes=changes, address=address)
470
471
472 class File(misc.Object):
473 def init(self, id, data):
474 self.id = id
475 self.data = data
476
477 def __eq__(self, other):
478 if isinstance(other, self.__class__):
479 return self.id == other.id
480
481 @property
482 def url(self):
483 return os.path.join(self.path, self.filename)
484
485 @property
486 def path(self):
487 return self.data.path
488
489 @property
490 def filename(self):
491 return self.data.filename
492
493 @property
494 def mimetype(self):
495 return self.data.mimetype
496
497 @property
498 def size(self):
499 return self.data.size
500
501 @lazy_property
502 def author(self):
503 if self.data.author_uid:
504 return self.backend.accounts.get_by_uid(self.data.author_uid)
505
506 @property
507 def created_at(self):
508 return self.data.created_at
509
510 def delete(self, author=None):
511 self.db.execute("UPDATE wiki_files SET deleted_at = NOW(), deleted_by = %s \
512 WHERE id = %s", author.uid if author else None, self.id)
513
514 @property
515 def deleted_at(self):
516 return self.data.deleted_at
517
518 def get_latest_revision(self):
519 revisions = self.get_revisions()
520
521 # Return first object
522 for rev in revisions:
523 return rev
524
525 def get_revisions(self):
526 revisions = self.backend.wiki._get_files("SELECT * FROM wiki_files \
527 WHERE path = %s AND filename = %s ORDER BY created_at DESC", self.path, self.filename)
528
529 return list(revisions)
530
531 def is_pdf(self):
532 return self.mimetype in ("application/pdf", "application/x-pdf")
533
534 def is_image(self):
535 return self.mimetype.startswith("image/")
536
537 def is_vector_image(self):
538 return self.mimetype in ("image/svg+xml",)
539
540 def is_bitmap_image(self):
541 return self.is_image() and not self.is_vector_image()
542
543 @lazy_property
544 def blob(self):
545 res = self.db.get("SELECT data FROM wiki_blobs \
546 WHERE id = %s", self.data.blob_id)
547
548 if res:
549 return bytes(res.data)
550
551 async def get_thumbnail(self, size):
552 assert self.is_bitmap_image()
553
554 cache_key = "-".join((
555 self.path,
556 util.normalize(self.filename),
557 self.created_at.isoformat(),
558 "%spx" % size,
559 ))
560
561 # Try to fetch the data from the cache
562 thumbnail = await self.backend.cache.get(cache_key)
563 if thumbnail:
564 return thumbnail
565
566 # Generate the thumbnail
567 thumbnail = util.generate_thumbnail(self.blob, size)
568
569 # Put it into the cache for forever
570 await self.backend.cache.set(cache_key, thumbnail)
571
572 return thumbnail
573
574
575 class WikiRenderer(misc.Object):
576 schemas = (
577 "ftp://",
578 "git://",
579 "http://",
580 "https://",
581 "rsync://",
582 "sftp://",
583 "ssh://",
584 "webcal://",
585 )
586
587 # Links
588 links = re.compile(r"<a href=\"(.*?)\">(.*?)</a>")
589
590 # Images
591 images = re.compile(r"<img alt(?:=\"(.*?)\")? src=\"(.*?)\" (?:title=\"(.*?)\" )?/>")
592
593 def init(self, path):
594 self.path = path
595
596 def _render_link(self, m):
597 url, text = m.groups()
598
599 # External Links
600 for schema in self.schemas:
601 if url.startswith(schema):
602 return """<a class="link-external" href="%s">%s</a>""" % \
603 (url, text or url)
604
605 # Emails
606 if "@" in url:
607 # Strip mailto:
608 if url.startswith("mailto:"):
609 url = url[7:]
610
611 return """<a class="link-external" href="mailto:%s">%s</a>""" % \
612 (url, text or url)
613
614 # Everything else must be an internal link
615 path = self.backend.wiki.make_path(self.path, url)
616
617 return """<a href="/docs%s">%s</a>""" % \
618 (path, text or self.backend.wiki.get_page_title(path))
619
620 def _render_image(self, m):
621 alt_text, url, caption = m.groups()
622
623 html = """
624 <div class="columns is-centered">
625 <div class="column is-8">
626 <figure class="image">
627 <img src="/docs%s" alt="%s">
628 <figcaption class="figure-caption">%s</figcaption>
629 </figure>
630 </div>
631 </div>
632 """
633
634 # Skip any absolute and external URLs
635 if url.startswith("/") or url.startswith("https://") or url.startswith("http://"):
636 return html % (url, alt_text, caption or "")
637
638 # Try to split query string
639 url, delimiter, qs = url.partition("?")
640
641 # Parse query arguments
642 args = urllib.parse.parse_qs(qs)
643
644 # Build absolute path
645 url = self.backend.wiki.make_path(self.path, url)
646
647 # Find image
648 file = self.backend.wiki.get_file_by_path(url)
649 if not file or not file.is_image():
650 return "<!-- Could not find image %s in %s -->" % (url, self.path)
651
652 # Scale down the image if not already done
653 if not "s" in args:
654 args["s"] = "920"
655
656 # Append arguments to the URL
657 if args:
658 url = "%s?%s" % (url, urllib.parse.urlencode(args))
659
660 return html % (url, caption, caption or "")
661
662 def render(self, text):
663 logging.debug("Rendering %s" % self.path)
664
665 # Borrow this from the blog
666 text = self.backend.blog._render_text(text, lang="markdown")
667
668 # Postprocess links
669 text = self.links.sub(self._render_link, text)
670
671 # Postprocess images to <figure>
672 text = self.images.sub(self._render_image, text)
673
674 return text