]> git.ipfire.org Git - ipfire.org.git/blob - src/backend/wiki.py
Merge remote-tracking branch 'origin/new-design' into new-design
[ipfire.org.git] / src / backend / wiki.py
1 #!/usr/bin/python3
2
3 import difflib
4 import hashlib
5 import logging
6 import markdown
7 import markdown.extensions
8 import markdown.preprocessors
9 import os.path
10 import re
11 import urllib.parse
12
13 from . import misc
14 from . import util
15 from .decorators import *
16
17 class Wiki(misc.Object):
18 def _get_pages(self, query, *args):
19 res = self.db.query(query, *args)
20
21 for row in res:
22 yield Page(self.backend, row.id, data=row)
23
24 def _get_page(self, query, *args):
25 res = self.db.get(query, *args)
26
27 if res:
28 return Page(self.backend, res.id, data=res)
29
30 def __iter__(self):
31 return self._get_pages("""
32 SELECT
33 wiki.*
34 FROM
35 wiki_current current
36 LEFT JOIN
37 wiki ON current.id = wiki.id
38 WHERE
39 current.deleted IS FALSE
40 ORDER BY page
41 """,
42 )
43
44 def make_path(self, page, path):
45 # Nothing to do for absolute links
46 if path.startswith("/"):
47 pass
48
49 # Relative links (one-level down)
50 elif path.startswith("./"):
51 path = os.path.join(page, path)
52
53 # All other relative links
54 else:
55 p = os.path.dirname(page)
56 path = os.path.join(p, path)
57
58 # Normalise links
59 return os.path.normpath(path)
60
61 def page_exists(self, path):
62 page = self.get_page(path)
63
64 # Page must have been found and not deleted
65 return page and not page.was_deleted()
66
67 def get_page_title(self, page, default=None):
68 doc = self.get_page(page)
69 if doc:
70 title = doc.title
71 else:
72 title = os.path.basename(page)
73
74 return title
75
76 def get_page(self, page, revision=None):
77 page = Page.sanitise_page_name(page)
78
79 # Split the path into parts
80 parts = page.split("/")
81
82 # Check if this is an action
83 if any((part.startswith("_") for part in parts)):
84 return
85
86 if revision:
87 return self._get_page("SELECT * FROM wiki WHERE page = %s \
88 AND timestamp = %s", page, revision)
89 else:
90 return self._get_page("SELECT * FROM wiki WHERE page = %s \
91 ORDER BY timestamp DESC LIMIT 1", page)
92
93 def get_recent_changes(self, account, limit=None):
94 pages = self._get_pages("SELECT * FROM wiki \
95 ORDER BY timestamp DESC")
96
97 for page in pages:
98 if not page.check_acl(account):
99 continue
100
101 yield page
102
103 limit -= 1
104 if not limit:
105 break
106
107 def create_page(self, page, author, content, changes=None, address=None):
108 page = Page.sanitise_page_name(page)
109
110 # Write page to the database
111 page = self._get_page("""
112 INSERT INTO
113 wiki
114 (
115 page,
116 author_uid,
117 markdown,
118 changes,
119 address
120 ) VALUES (
121 %s, %s, %s, %s, %s
122 )
123 RETURNING *
124 """, page, author.uid, content or None, changes, address,
125 )
126
127 # Store any linked files
128 page._store_linked_files()
129
130 # Send email to all watchers
131 page._send_watcher_emails(excludes=[author])
132
133 return page
134
135 def delete_page(self, page, author, **kwargs):
136 # Do nothing if the page does not exist
137 if not self.get_page(page):
138 return
139
140 # Just creates a blank last version of the page
141 self.create_page(page, author=author, content=None, **kwargs)
142
143 def make_breadcrumbs(self, url):
144 # Split and strip all empty elements (double slashes)
145 parts = list(e for e in url.split("/") if e)
146
147 ret = []
148 for part in ("/".join(parts[:i]) for i in range(1, len(parts))):
149 ret.append(("/%s" % part, self.get_page_title(part, os.path.basename(part))))
150
151 return ret
152
153 def search(self, query, account=None, limit=None):
154 res = self._get_pages("""
155 SELECT
156 wiki.*
157 FROM
158 wiki_search_index search_index
159 LEFT JOIN
160 wiki ON search_index.wiki_id = wiki.id
161 WHERE
162 search_index.document @@ websearch_to_tsquery('english', %s)
163 ORDER BY
164 ts_rank(search_index.document, websearch_to_tsquery('english', %s)) DESC
165 """, query, query,
166 )
167
168 pages = []
169 for page in res:
170 # Skip any pages the user doesn't have permission for
171 if not page.check_acl(account):
172 continue
173
174 # Return any other pages
175 pages.append(page)
176
177 # Break when we have found enough pages
178 if limit and len(pages) >= limit:
179 break
180
181 return pages
182
183 def refresh(self):
184 """
185 Needs to be called after a page has been changed
186 """
187 self.db.execute("REFRESH MATERIALIZED VIEW wiki_search_index")
188
189 def get_watchlist(self, account):
190 pages = self._get_pages("""
191 WITH pages AS (
192 SELECT
193 *
194 FROM
195 wiki_current
196 LEFT JOIN
197 wiki ON wiki_current.id = wiki.id
198 )
199
200 SELECT
201 *
202 FROM
203 wiki_watchlist watchlist
204 JOIN
205 pages ON watchlist.page = pages.page
206 WHERE
207 watchlist.uid = %s
208 """, account.uid,
209 )
210
211 return sorted(pages)
212
213 # ACL
214
215 def check_acl(self, page, account):
216 res = self.db.query("""
217 SELECT
218 *
219 FROM
220 wiki_acls
221 WHERE
222 %s ILIKE (path || '%%')
223 ORDER BY
224 LENGTH(path) DESC
225 LIMIT 1
226 """, page,
227 )
228
229 for row in res:
230 # Access not permitted when user is not logged in
231 if not account:
232 return False
233
234 # If user is in a matching group, we grant permission
235 for group in row.groups:
236 if account.is_member_of_group(group):
237 return True
238
239 # Otherwise access is not permitted
240 return False
241
242 # If no ACLs are found, we permit access
243 return True
244
245 # Files
246
247 def _get_files(self, query, *args):
248 res = self.db.query(query, *args)
249
250 for row in res:
251 yield File(self.backend, row.id, data=row)
252
253 def _get_file(self, query, *args):
254 res = self.db.get(query, *args)
255
256 if res:
257 return File(self.backend, res.id, data=res)
258
259 def get_files(self, path):
260 files = self._get_files("""
261 SELECT
262 *
263 FROM
264 wiki_files
265 WHERE
266 path = %s
267 AND
268 deleted_at IS NULL
269 ORDER BY filename
270 """, path,
271 )
272
273 return list(files)
274
275 def get_file_by_path(self, path, revision=None):
276 path, filename = os.path.dirname(path), os.path.basename(path)
277
278 if revision:
279 # Fetch a specific revision
280 return self._get_file("""
281 SELECT
282 *
283 FROM
284 wiki_files
285 WHERE
286 path = %s
287 AND
288 filename = %s
289 AND
290 created_at <= %s
291 ORDER BY
292 created_at DESC
293 LIMIT 1
294 """, path, filename, revision,
295 )
296
297 # Fetch latest version
298 return self._get_file("""
299 SELECT
300 *
301 FROM
302 wiki_files
303 WHERE
304 path = %s
305 AND
306 filename = %s
307 AND
308 deleted_at IS NULL
309 """, path, filename,
310 )
311
312 def get_file_by_path_and_filename(self, path, filename):
313 return self._get_file("""
314 SELECT
315 *
316 FROM
317 wiki_files
318 WHERE
319 path = %s
320 AND
321 filename = %s
322 AND
323 deleted_at IS NULL
324 """, path, filename,
325 )
326
327 def upload(self, path, filename, data, mimetype, author, address):
328 # Replace any existing files
329 file = self.get_file_by_path_and_filename(path, filename)
330 if file:
331 file.delete(author)
332
333 # Upload the blob first
334 blob = self.db.get("""
335 INSERT INTO
336 wiki_blobs(data)
337 VALUES
338 (%s)
339 ON CONFLICT
340 (digest(data, %s))
341 DO UPDATE
342 SET data = EXCLUDED.data
343 RETURNING id
344 """, data, "MD5",
345 )
346
347 # Create entry for file
348 return self._get_file("""
349 INSERT INTO
350 wiki_files
351 (
352 path,
353 filename,
354 author_uid,
355 address,
356 mimetype,
357 blob_id,
358 size
359 ) VALUES (
360 %s, %s, %s, %s, %s, %s, %s
361 )
362 RETURNING *
363 """, path, filename, author.uid, address, mimetype, blob.id, len(data),
364 )
365
366 def render(self, path, text, **kwargs):
367 return WikiRenderer(self.backend, path, text, **kwargs)
368
369
370 class Page(misc.Object):
371 def init(self, id, data=None):
372 self.id = id
373 self.data = data
374
375 def __repr__(self):
376 return "<%s %s %s>" % (self.__class__.__name__, self.page, self.timestamp)
377
378 def __eq__(self, other):
379 if isinstance(other, self.__class__):
380 return self.id == other.id
381
382 return NotImplemented
383
384 def __lt__(self, other):
385 if isinstance(other, self.__class__):
386 if self.page == other.page:
387 return self.timestamp < other.timestamp
388
389 return self.page < other.page
390
391 return NotImplemented
392
393 @staticmethod
394 def sanitise_page_name(page):
395 if not page:
396 return "/"
397
398 # Make sure that the page name does NOT end with a /
399 if page.endswith("/"):
400 page = page[:-1]
401
402 # Make sure the page name starts with a /
403 if not page.startswith("/"):
404 page = "/%s" % page
405
406 # Remove any double slashes
407 page = page.replace("//", "/")
408
409 return page
410
411 @property
412 def url(self):
413 return "/docs%s" % self.page
414
415 @property
416 def full_url(self):
417 return "https://www.ipfire.org%s" % self.url
418
419 @property
420 def page(self):
421 return self.data.page
422
423 @property
424 def title(self):
425 return self._title or os.path.basename(self.page[1:])
426
427 @property
428 def _title(self):
429 if not self.markdown:
430 return
431
432 # Find first H1 headline in markdown
433 markdown = self.markdown.splitlines()
434
435 m = re.match(r"^#\s*(.*)( #)?$", markdown[0])
436 if m:
437 return m.group(1)
438
439 @lazy_property
440 def author(self):
441 if self.data.author_uid:
442 return self.backend.accounts.get_by_uid(self.data.author_uid)
443
444 @property
445 def markdown(self):
446 return self.data.markdown or ""
447
448 @property
449 def html(self):
450 lines = []
451
452 # Strip off the first line if it contains a heading (as it will be shown separately)
453 for i, line in enumerate(self.markdown.splitlines()):
454 if i == 0 and line.startswith("#"):
455 continue
456
457 lines.append(line)
458
459 renderer = self.backend.wiki.render(self.page, "\n".join(lines), revision=self.timestamp)
460
461 return renderer.html
462
463 # Linked Files
464
465 @property
466 def files(self):
467 renderer = self.backend.wiki.render(self.page, self.markdown, revision=self.timestamp)
468
469 return renderer.files
470
471 def _store_linked_files(self):
472 self.db.executemany("INSERT INTO wiki_linked_files(page_id, path) \
473 VALUES(%s, %s)", ((self.id, file) for file in self.files))
474
475 @property
476 def timestamp(self):
477 return self.data.timestamp
478
479 def was_deleted(self):
480 return not self.markdown
481
482 @lazy_property
483 def breadcrumbs(self):
484 return self.backend.wiki.make_breadcrumbs(self.page)
485
486 def is_latest_revision(self):
487 return self.get_latest_revision() == self
488
489 def get_latest_revision(self):
490 revisions = self.get_revisions()
491
492 # Return first object
493 for rev in revisions:
494 return rev
495
496 def get_revisions(self):
497 return self.backend.wiki._get_pages("SELECT * FROM wiki \
498 WHERE page = %s ORDER BY timestamp DESC", self.page)
499
500 @lazy_property
501 def previous_revision(self):
502 return self.backend.wiki._get_page("SELECT * FROM wiki \
503 WHERE page = %s AND timestamp < %s ORDER BY timestamp DESC \
504 LIMIT 1", self.page, self.timestamp)
505
506 @property
507 def changes(self):
508 return self.data.changes
509
510 # ACL
511
512 def check_acl(self, account):
513 return self.backend.wiki.check_acl(self.page, account)
514
515 # Watchers
516
517 @lazy_property
518 def diff(self):
519 if self.previous_revision:
520 diff = difflib.unified_diff(
521 self.previous_revision.markdown.splitlines(),
522 self.markdown.splitlines(),
523 )
524
525 return "\n".join(diff)
526
527 @property
528 def watchers(self):
529 res = self.db.query("SELECT uid FROM wiki_watchlist \
530 WHERE page = %s", self.page)
531
532 for row in res:
533 # Search for account by UID and skip if none was found
534 account = self.backend.accounts.get_by_uid(row.uid)
535 if not account:
536 continue
537
538 # Return the account
539 yield account
540
541 def is_watched_by(self, account):
542 res = self.db.get("SELECT 1 FROM wiki_watchlist \
543 WHERE page = %s AND uid = %s", self.page, account.uid)
544
545 if res:
546 return True
547
548 return False
549
550 def add_watcher(self, account):
551 if self.is_watched_by(account):
552 return
553
554 self.db.execute("INSERT INTO wiki_watchlist(page, uid) \
555 VALUES(%s, %s)", self.page, account.uid)
556
557 def remove_watcher(self, account):
558 self.db.execute("DELETE FROM wiki_watchlist \
559 WHERE page = %s AND uid = %s", self.page, account.uid)
560
561 def _send_watcher_emails(self, excludes=[]):
562 # Nothing to do if there was no previous revision
563 if not self.previous_revision:
564 return
565
566 for watcher in self.watchers:
567 # Skip everyone who is excluded
568 if watcher in excludes:
569 logging.debug("Excluding %s" % watcher)
570 continue
571
572 # Check permissions
573 if not self.backend.wiki.check_acl(self.page, watcher):
574 logging.debug("Watcher %s does not have permissions" % watcher)
575 continue
576
577 logging.debug("Sending watcher email to %s" % watcher)
578
579 # Compose message
580 self.backend.messages.send_template("wiki/messages/page-changed",
581 account=watcher, page=self, priority=-10)
582
583 def restore(self, author, address, comment=None):
584 changes = "Restore to revision from %s" % self.timestamp.isoformat()
585
586 # Append comment
587 if comment:
588 changes = "%s: %s" % (changes, comment)
589
590 return self.backend.wiki.create_page(self.page,
591 author, self.markdown, changes=changes, address=address)
592
593
594 class File(misc.Object):
595 def init(self, id, data):
596 self.id = id
597 self.data = data
598
599 def __eq__(self, other):
600 if isinstance(other, self.__class__):
601 return self.id == other.id
602
603 return NotImplemented
604
605 @property
606 def url(self):
607 return "/docs%s" % os.path.join(self.path, self.filename)
608
609 @property
610 def path(self):
611 return self.data.path
612
613 @property
614 def filename(self):
615 return self.data.filename
616
617 @property
618 def mimetype(self):
619 return self.data.mimetype
620
621 @property
622 def size(self):
623 return self.data.size
624
625 @lazy_property
626 def author(self):
627 if self.data.author_uid:
628 return self.backend.accounts.get_by_uid(self.data.author_uid)
629
630 @property
631 def created_at(self):
632 return self.data.created_at
633
634 timestamp = created_at
635
636 def delete(self, author=None):
637 if not self.can_be_deleted():
638 raise RuntimeError("Cannot delete %s" % self)
639
640 self.db.execute("UPDATE wiki_files SET deleted_at = NOW(), deleted_by = %s \
641 WHERE id = %s", author.uid if author else None, self.id)
642
643 def can_be_deleted(self):
644 # Cannot be deleted if still in use
645 if self.pages:
646 return False
647
648 # Can be deleted
649 return True
650
651 @property
652 def deleted_at(self):
653 return self.data.deleted_at
654
655 def get_latest_revision(self):
656 revisions = self.get_revisions()
657
658 # Return first object
659 for rev in revisions:
660 return rev
661
662 def get_revisions(self):
663 revisions = self.backend.wiki._get_files("SELECT * FROM wiki_files \
664 WHERE path = %s AND filename = %s ORDER BY created_at DESC", self.path, self.filename)
665
666 return list(revisions)
667
668 def is_pdf(self):
669 return self.mimetype in ("application/pdf", "application/x-pdf")
670
671 def is_image(self):
672 return self.mimetype.startswith("image/")
673
674 def is_vector_image(self):
675 return self.mimetype in ("image/svg+xml",)
676
677 def is_bitmap_image(self):
678 return self.is_image() and not self.is_vector_image()
679
680 @lazy_property
681 def blob(self):
682 res = self.db.get("SELECT data FROM wiki_blobs \
683 WHERE id = %s", self.data.blob_id)
684
685 if res:
686 return bytes(res.data)
687
688 async def get_thumbnail(self, size):
689 assert self.is_bitmap_image()
690
691 cache_key = "-".join((
692 self.path,
693 util.normalize(self.filename),
694 self.created_at.isoformat(),
695 "%spx" % size,
696 ))
697
698 # Try to fetch the data from the cache
699 thumbnail = await self.backend.cache.get(cache_key)
700 if thumbnail:
701 return thumbnail
702
703 # Generate the thumbnail
704 thumbnail = util.generate_thumbnail(self.blob, size)
705
706 # Put it into the cache for forever
707 await self.backend.cache.set(cache_key, thumbnail)
708
709 return thumbnail
710
711 @property
712 def pages(self):
713 """
714 Returns a list of all pages this file is linked by
715 """
716 pages = self.backend.wiki._get_pages("""
717 SELECT
718 wiki.*
719 FROM
720 wiki_linked_files
721 JOIN
722 wiki_current ON wiki_linked_files.page_id = wiki_current.id
723 LEFT JOIN
724 wiki ON wiki_linked_files.page_id = wiki.id
725 WHERE
726 wiki_linked_files.path = %s
727 ORDER BY
728 wiki.page
729 """, os.path.join(self.path, self.filename),
730 )
731
732 return list(pages)
733
734
735 class WikiRenderer(misc.Object):
736 schemas = (
737 "ftp://",
738 "git://",
739 "http://",
740 "https://",
741 "rsync://",
742 "sftp://",
743 "ssh://",
744 "webcal://",
745 )
746
747 # Links
748 _links = re.compile(r"<a href=\"(.*?)\">(.*?)</a>")
749
750 # Images
751 _images = re.compile(r"<img alt(?:=\"(.*?)\")? src=\"(.*?)\" (?:title=\"(.*?)\" )?/>")
752
753 def init(self, path, text, revision=None):
754 self.path = path
755 self.text = text
756
757 # Optionally, the revision of the rendered page
758 self.revision = revision
759
760 # Markdown Renderer
761 self.renderer = markdown.Markdown(
762 extensions=[
763 LinkedFilesExtractorExtension(),
764 PrettyLinksExtension(),
765 "codehilite",
766 "fenced_code",
767 "footnotes",
768 "nl2br",
769 "sane_lists",
770 "tables",
771 "toc",
772 ],
773 )
774
775 # Render!
776 self.html = self._render()
777
778 def _render_link(self, m):
779 url, text = m.groups()
780
781 # External Links
782 for schema in self.schemas:
783 if url.startswith(schema):
784 return """<a class="link-external" href="%s">%s</a>""" % \
785 (url, text or url)
786
787 # Emails
788 if "@" in url:
789 # Strip mailto:
790 if url.startswith("mailto:"):
791 url = url[7:]
792
793 return """<a class="link-external" href="mailto:%s">%s</a>""" % \
794 (url, text or url)
795
796 # Everything else must be an internal link
797 path = self.backend.wiki.make_path(self.path, url)
798
799 return """<a href="/docs%s">%s</a>""" % \
800 (path, text or self.backend.wiki.get_page_title(path))
801
802 def _render_image(self, m):
803 alt_text, url, caption = m.groups()
804
805 # Compute a hash over the URL
806 h = hashlib.new("md5")
807 h.update(url.encode())
808 id = h.hexdigest()
809
810 html = """
811 <div class="columns is-centered">
812 <div class="column is-8">
813 <figure class="image modal-trigger" data-target="%(id)s">
814 <img src="/docs%(url)s?s=640&amp;%(args)s" alt="%(caption)s">
815
816 <figcaption class="figure-caption">%(caption)s</figcaption>
817 </figure>
818
819 <div class="modal is-large" id="%(id)s">
820 <div class="modal-background"></div>
821
822 <div class="modal-content">
823 <p class="image">
824 <img src="/docs%(url)s?s=1920&amp;%(args)s" alt="%(caption)s"
825 loading="lazy">
826 </p>
827
828 <a class="button is-small" href="/docs%(url)s?action=detail">
829 <span class="icon">
830 <i class="fa-solid fa-circle-info"></i>
831 </span>
832 </a>
833 </div>
834
835 <button class="modal-close is-large" aria-label="close"></button>
836 </div>
837 </div>
838 </div>
839 """
840
841 # Skip any absolute and external URLs
842 if url.startswith("https://") or url.startswith("http://"):
843 return html % {
844 "caption" : caption or "",
845 "id" : id,
846 "url" : url,
847 "args" : args,
848 }
849
850 # Try to split query string
851 url, delimiter, qs = url.partition("?")
852
853 # Parse query arguments
854 args = urllib.parse.parse_qs(qs)
855
856 # Build absolute path
857 url = self.backend.wiki.make_path(self.path, url)
858
859 # Find image
860 file = self.backend.wiki.get_file_by_path(url, revision=self.revision)
861 if not file or not file.is_image():
862 return "<!-- Could not find image %s in %s -->" % (url, self.path)
863
864 # Remove any requested size
865 if "s" in args:
866 del args["s"]
867
868 # Link the image that has been the current version at the time of the page edit
869 if file:
870 args["revision"] = file.timestamp
871
872 return html % {
873 "caption" : caption or "",
874 "id" : id,
875 "url" : url,
876 "args" : urllib.parse.urlencode(args),
877 }
878
879 def _render(self):
880 logging.debug("Rendering %s" % self.path)
881
882 # Render...
883 text = self.renderer.convert(self.text)
884
885 # Postprocess links
886 text = self._links.sub(self._render_link, text)
887
888 # Postprocess images to <figure>
889 text = self._images.sub(self._render_image, text)
890
891 return text
892
893 @lazy_property
894 def files(self):
895 """
896 A list of all linked files that have been part of the rendered markup
897 """
898 files = []
899
900 for url in self.renderer.files:
901 # Skip external images
902 if url.startswith("https://") or url.startswith("http://"):
903 continue
904
905 # Make the URL absolute
906 url = self.backend.wiki.make_path(self.path, url)
907
908 # Check if this is a file (it could also just be a page)
909 file = self.backend.wiki.get_file_by_path(url)
910 if file:
911 files.append(url)
912
913 return files
914
915
916 class PrettyLinksExtension(markdown.extensions.Extension):
917 def extendMarkdown(self, md):
918 # Create links to Bugzilla
919 md.preprocessors.register(BugzillaLinksPreprocessor(md), "bugzilla", 10)
920
921 # Create links to CVE
922 md.preprocessors.register(CVELinksPreprocessor(md), "cve", 10)
923
924
925 class BugzillaLinksPreprocessor(markdown.preprocessors.Preprocessor):
926 regex = re.compile(r"(?:#(\d{5,}))", re.I)
927
928 def run(self, lines):
929 for line in lines:
930 yield self.regex.sub(r"[#\1](https://bugzilla.ipfire.org/show_bug.cgi?id=\1)", line)
931
932
933 class CVELinksPreprocessor(markdown.preprocessors.Preprocessor):
934 regex = re.compile(r"(?:CVE)[\s\-](\d{4}\-\d+)")
935
936 def run(self, lines):
937 for line in lines:
938 yield self.regex.sub(r"[CVE-\1](https://cve.mitre.org/cgi-bin/cvename.cgi?name=\1)", line)
939
940
941 class LinkedFilesExtractor(markdown.treeprocessors.Treeprocessor):
942 """
943 Finds all Linked Files
944 """
945 def run(self, root):
946 self.md.files = []
947
948 # Find all images and store the URLs
949 for image in root.findall(".//img"):
950 src = image.get("src")
951
952 self.md.files.append(src)
953
954 # Find all links
955 for link in root.findall(".//a"):
956 href = link.get("href")
957
958 self.md.files.append(href)
959
960
961 class LinkedFilesExtractorExtension(markdown.extensions.Extension):
962 def extendMarkdown(self, md):
963 md.treeprocessors.register(LinkedFilesExtractor(md), "linked-files-extractor", 10)