]> git.ipfire.org Git - ipfire.org.git/blob - src/backend/wiki.py
docs: Deliver images with a better quality (and larger size)
[ipfire.org.git] / src / backend / wiki.py
1 #!/usr/bin/python3
2
3 import difflib
4 import hashlib
5 import logging
6 import markdown
7 import markdown.extensions
8 import markdown.preprocessors
9 import os.path
10 import re
11 import urllib.parse
12
13 from . import misc
14 from . import util
15 from .decorators import *
16
17 class Wiki(misc.Object):
18 def _get_pages(self, query, *args):
19 res = self.db.query(query, *args)
20
21 for row in res:
22 yield Page(self.backend, row.id, data=row)
23
24 def _get_page(self, query, *args):
25 res = self.db.get(query, *args)
26
27 if res:
28 return Page(self.backend, res.id, data=res)
29
30 def __iter__(self):
31 return self._get_pages("""
32 SELECT
33 wiki.*
34 FROM
35 wiki_current current
36 LEFT JOIN
37 wiki ON current.id = wiki.id
38 WHERE
39 current.deleted IS FALSE
40 ORDER BY page
41 """,
42 )
43
44 def make_path(self, page, path):
45 # Nothing to do for absolute links
46 if path.startswith("/"):
47 pass
48
49 # Relative links (one-level down)
50 elif path.startswith("./"):
51 path = os.path.join(page, path)
52
53 # All other relative links
54 else:
55 p = os.path.dirname(page)
56 path = os.path.join(p, path)
57
58 # Normalise links
59 return os.path.normpath(path)
60
61 def page_exists(self, path):
62 page = self.get_page(path)
63
64 # Page must have been found and not deleted
65 return page and not page.was_deleted()
66
67 def get_page_title(self, page, default=None):
68 doc = self.get_page(page)
69 if doc:
70 title = doc.title
71 else:
72 title = os.path.basename(page)
73
74 return title
75
76 def get_page(self, page, revision=None):
77 page = Page.sanitise_page_name(page)
78
79 # Split the path into parts
80 parts = page.split("/")
81
82 # Check if this is an action
83 if any((part.startswith("_") for part in parts)):
84 return
85
86 if revision:
87 return self._get_page("SELECT * FROM wiki WHERE page = %s \
88 AND timestamp = %s", page, revision)
89 else:
90 return self._get_page("SELECT * FROM wiki WHERE page = %s \
91 ORDER BY timestamp DESC LIMIT 1", page)
92
93 def get_recent_changes(self, account, limit=None):
94 pages = self._get_pages("SELECT * FROM wiki \
95 ORDER BY timestamp DESC")
96
97 for page in pages:
98 if not page.check_acl(account):
99 continue
100
101 yield page
102
103 limit -= 1
104 if not limit:
105 break
106
107 def create_page(self, page, author, content, changes=None, address=None):
108 page = Page.sanitise_page_name(page)
109
110 # Write page to the database
111 page = self._get_page("""
112 INSERT INTO
113 wiki
114 (
115 page,
116 author_uid,
117 markdown,
118 changes,
119 address
120 ) VALUES (
121 %s, %s, %s, %s, %s
122 )
123 RETURNING *
124 """, page, author.uid, content or None, changes, address,
125 )
126
127 # Store any linked files
128 page._store_linked_files()
129
130 # Send email to all watchers
131 page._send_watcher_emails(excludes=[author])
132
133 return page
134
135 def delete_page(self, page, author, **kwargs):
136 # Do nothing if the page does not exist
137 if not self.get_page(page):
138 return
139
140 # Just creates a blank last version of the page
141 self.create_page(page, author=author, content=None, **kwargs)
142
143 def make_breadcrumbs(self, url):
144 # Split and strip all empty elements (double slashes)
145 parts = list(e for e in url.split("/") if e)
146
147 ret = []
148 for part in ("/".join(parts[:i]) for i in range(1, len(parts))):
149 ret.append(("/%s" % part, self.get_page_title(part, os.path.basename(part))))
150
151 return ret
152
153 def search(self, query, account=None, limit=None):
154 res = self._get_pages("""
155 SELECT
156 wiki.*
157 FROM
158 wiki_search_index search_index
159 LEFT JOIN
160 wiki ON search_index.wiki_id = wiki.id
161 WHERE
162 search_index.document @@ websearch_to_tsquery('english', %s)
163 ORDER BY
164 ts_rank(search_index.document, websearch_to_tsquery('english', %s)) DESC
165 """, query, query,
166 )
167
168 pages = []
169 for page in res:
170 # Skip any pages the user doesn't have permission for
171 if not page.check_acl(account):
172 continue
173
174 # Return any other pages
175 pages.append(page)
176
177 # Break when we have found enough pages
178 if limit and len(pages) >= limit:
179 break
180
181 return pages
182
183 def refresh(self):
184 """
185 Needs to be called after a page has been changed
186 """
187 self.db.execute("REFRESH MATERIALIZED VIEW wiki_search_index")
188
189 def get_watchlist(self, account):
190 pages = self._get_pages("""
191 WITH pages AS (
192 SELECT
193 *
194 FROM
195 wiki_current
196 LEFT JOIN
197 wiki ON wiki_current.id = wiki.id
198 )
199
200 SELECT
201 *
202 FROM
203 wiki_watchlist watchlist
204 JOIN
205 pages ON watchlist.page = pages.page
206 WHERE
207 watchlist.uid = %s
208 """, account.uid,
209 )
210
211 return sorted(pages)
212
213 # ACL
214
215 def check_acl(self, page, account):
216 res = self.db.query("""
217 SELECT
218 *
219 FROM
220 wiki_acls
221 WHERE
222 %s ILIKE (path || '%%')
223 ORDER BY
224 LENGTH(path) DESC
225 LIMIT 1
226 """, page,
227 )
228
229 for row in res:
230 # Access not permitted when user is not logged in
231 if not account:
232 return False
233
234 # If user is in a matching group, we grant permission
235 for group in row.groups:
236 if account.is_member_of_group(group):
237 return True
238
239 # Otherwise access is not permitted
240 return False
241
242 # If no ACLs are found, we permit access
243 return True
244
245 # Files
246
247 def _get_files(self, query, *args):
248 res = self.db.query(query, *args)
249
250 for row in res:
251 yield File(self.backend, row.id, data=row)
252
253 def _get_file(self, query, *args):
254 res = self.db.get(query, *args)
255
256 if res:
257 return File(self.backend, res.id, data=res)
258
259 def get_files(self, path):
260 files = self._get_files("""
261 SELECT
262 *
263 FROM
264 wiki_files
265 WHERE
266 path = %s
267 AND
268 deleted_at IS NULL
269 ORDER BY filename
270 """, path,
271 )
272
273 return list(files)
274
275 def get_file_by_path(self, path, revision=None):
276 path, filename = os.path.dirname(path), os.path.basename(path)
277
278 if revision:
279 # Fetch a specific revision
280 return self._get_file("""
281 SELECT
282 *
283 FROM
284 wiki_files
285 WHERE
286 path = %s
287 AND
288 filename = %s
289 AND
290 created_at <= %s
291 ORDER BY
292 created_at DESC
293 LIMIT 1
294 """, path, filename, revision,
295 )
296
297 # Fetch latest version
298 return self._get_file("""
299 SELECT
300 *
301 FROM
302 wiki_files
303 WHERE
304 path = %s
305 AND
306 filename = %s
307 AND
308 deleted_at IS NULL
309 """, path, filename,
310 )
311
312 def get_file_by_path_and_filename(self, path, filename):
313 return self._get_file("""
314 SELECT
315 *
316 FROM
317 wiki_files
318 WHERE
319 path = %s
320 AND
321 filename = %s
322 AND
323 deleted_at IS NULL
324 """, path, filename,
325 )
326
327 def upload(self, path, filename, data, mimetype, author, address):
328 # Replace any existing files
329 file = self.get_file_by_path_and_filename(path, filename)
330 if file:
331 file.delete(author)
332
333 # Upload the blob first
334 blob = self.db.get("""
335 INSERT INTO
336 wiki_blobs(data)
337 VALUES
338 (%s)
339 ON CONFLICT
340 (digest(data, %s))
341 DO UPDATE
342 SET data = EXCLUDED.data
343 RETURNING id
344 """, data, "MD5",
345 )
346
347 # Create entry for file
348 return self._get_file("""
349 INSERT INTO
350 wiki_files
351 (
352 path,
353 filename,
354 author_uid,
355 address,
356 mimetype,
357 blob_id,
358 size
359 ) VALUES (
360 %s, %s, %s, %s, %s, %s, %s
361 )
362 RETURNING *
363 """, path, filename, author.uid, address, mimetype, blob.id, len(data),
364 )
365
366 def render(self, path, text, **kwargs):
367 return WikiRenderer(self.backend, path, text, **kwargs)
368
369
370 class Page(misc.Object):
371 def init(self, id, data=None):
372 self.id = id
373 self.data = data
374
375 def __repr__(self):
376 return "<%s %s %s>" % (self.__class__.__name__, self.page, self.timestamp)
377
378 def __eq__(self, other):
379 if isinstance(other, self.__class__):
380 return self.id == other.id
381
382 return NotImplemented
383
384 def __lt__(self, other):
385 if isinstance(other, self.__class__):
386 if self.page == other.page:
387 return self.timestamp < other.timestamp
388
389 return self.page < other.page
390
391 return NotImplemented
392
393 @staticmethod
394 def sanitise_page_name(page):
395 if not page:
396 return "/"
397
398 # Make sure that the page name does NOT end with a /
399 if page.endswith("/"):
400 page = page[:-1]
401
402 # Make sure the page name starts with a /
403 if not page.startswith("/"):
404 page = "/%s" % page
405
406 # Remove any double slashes
407 page = page.replace("//", "/")
408
409 return page
410
411 @property
412 def url(self):
413 return "/docs%s" % self.page
414
415 @property
416 def full_url(self):
417 return "https://www.ipfire.org%s" % self.url
418
419 @property
420 def page(self):
421 return self.data.page
422
423 @property
424 def title(self):
425 return self._title or os.path.basename(self.page[1:])
426
427 @property
428 def _title(self):
429 if not self.markdown:
430 return
431
432 # Find first H1 headline in markdown
433 markdown = self.markdown.splitlines()
434
435 m = re.match(r"^#\s*(.*)( #)?$", markdown[0])
436 if m:
437 return m.group(1)
438
439 @lazy_property
440 def author(self):
441 if self.data.author_uid:
442 return self.backend.accounts.get_by_uid(self.data.author_uid)
443
444 @property
445 def markdown(self):
446 return self.data.markdown or ""
447
448 @property
449 def html(self):
450 lines = []
451
452 # Strip off the first line if it contains a heading (as it will be shown separately)
453 for i, line in enumerate(self.markdown.splitlines()):
454 if i == 0 and line.startswith("#"):
455 continue
456
457 lines.append(line)
458
459 renderer = self.backend.wiki.render(self.page, "\n".join(lines), revision=self.timestamp)
460
461 return renderer.html
462
463 # Linked Files
464
465 @property
466 def files(self):
467 renderer = self.backend.wiki.render(self.page, self.markdown, revision=self.timestamp)
468
469 return renderer.files
470
471 def _store_linked_files(self):
472 self.db.executemany("INSERT INTO wiki_linked_files(page_id, path) \
473 VALUES(%s, %s)", ((self.id, file) for file in self.files))
474
475 @property
476 def timestamp(self):
477 return self.data.timestamp
478
479 def was_deleted(self):
480 return not self.markdown
481
482 @lazy_property
483 def breadcrumbs(self):
484 return self.backend.wiki.make_breadcrumbs(self.page)
485
486 def is_latest_revision(self):
487 return self.get_latest_revision() == self
488
489 def get_latest_revision(self):
490 revisions = self.get_revisions()
491
492 # Return first object
493 for rev in revisions:
494 return rev
495
496 def get_revisions(self):
497 return self.backend.wiki._get_pages("SELECT * FROM wiki \
498 WHERE page = %s ORDER BY timestamp DESC", self.page)
499
500 @lazy_property
501 def previous_revision(self):
502 return self.backend.wiki._get_page("SELECT * FROM wiki \
503 WHERE page = %s AND timestamp < %s ORDER BY timestamp DESC \
504 LIMIT 1", self.page, self.timestamp)
505
506 @property
507 def changes(self):
508 return self.data.changes
509
510 # ACL
511
512 def check_acl(self, account):
513 return self.backend.wiki.check_acl(self.page, account)
514
515 # Watchers
516
517 @lazy_property
518 def diff(self):
519 if self.previous_revision:
520 diff = difflib.unified_diff(
521 self.previous_revision.markdown.splitlines(),
522 self.markdown.splitlines(),
523 )
524
525 return "\n".join(diff)
526
527 @property
528 def watchers(self):
529 res = self.db.query("SELECT uid FROM wiki_watchlist \
530 WHERE page = %s", self.page)
531
532 for row in res:
533 # Search for account by UID and skip if none was found
534 account = self.backend.accounts.get_by_uid(row.uid)
535 if not account:
536 continue
537
538 # Return the account
539 yield account
540
541 def is_watched_by(self, account):
542 res = self.db.get("SELECT 1 FROM wiki_watchlist \
543 WHERE page = %s AND uid = %s", self.page, account.uid)
544
545 if res:
546 return True
547
548 return False
549
550 def add_watcher(self, account):
551 if self.is_watched_by(account):
552 return
553
554 self.db.execute("INSERT INTO wiki_watchlist(page, uid) \
555 VALUES(%s, %s)", self.page, account.uid)
556
557 def remove_watcher(self, account):
558 self.db.execute("DELETE FROM wiki_watchlist \
559 WHERE page = %s AND uid = %s", self.page, account.uid)
560
561 def _send_watcher_emails(self, excludes=[]):
562 # Nothing to do if there was no previous revision
563 if not self.previous_revision:
564 return
565
566 for watcher in self.watchers:
567 # Skip everyone who is excluded
568 if watcher in excludes:
569 logging.debug("Excluding %s" % watcher)
570 continue
571
572 # Check permissions
573 if not self.backend.wiki.check_acl(self.page, watcher):
574 logging.debug("Watcher %s does not have permissions" % watcher)
575 continue
576
577 logging.debug("Sending watcher email to %s" % watcher)
578
579 # Compose message
580 self.backend.messages.send_template("wiki/messages/page-changed",
581 account=watcher, page=self, priority=-10)
582
583 def restore(self, author, address, comment=None):
584 changes = "Restore to revision from %s" % self.timestamp.isoformat()
585
586 # Append comment
587 if comment:
588 changes = "%s: %s" % (changes, comment)
589
590 return self.backend.wiki.create_page(self.page,
591 author, self.markdown, changes=changes, address=address)
592
593
594 class File(misc.Object):
595 def init(self, id, data):
596 self.id = id
597 self.data = data
598
599 def __eq__(self, other):
600 if isinstance(other, self.__class__):
601 return self.id == other.id
602
603 return NotImplemented
604
605 @property
606 def url(self):
607 return "/docs%s" % os.path.join(self.path, self.filename)
608
609 @property
610 def path(self):
611 return self.data.path
612
613 @property
614 def filename(self):
615 return self.data.filename
616
617 @property
618 def mimetype(self):
619 return self.data.mimetype
620
621 @property
622 def size(self):
623 return self.data.size
624
625 @lazy_property
626 def author(self):
627 if self.data.author_uid:
628 return self.backend.accounts.get_by_uid(self.data.author_uid)
629
630 @property
631 def created_at(self):
632 return self.data.created_at
633
634 timestamp = created_at
635
636 def delete(self, author=None):
637 if not self.can_be_deleted():
638 raise RuntimeError("Cannot delete %s" % self)
639
640 self.db.execute("UPDATE wiki_files SET deleted_at = NOW(), deleted_by = %s \
641 WHERE id = %s", author.uid if author else None, self.id)
642
643 def can_be_deleted(self):
644 # Cannot be deleted if still in use
645 if self.pages:
646 return False
647
648 # Can be deleted
649 return True
650
651 @property
652 def deleted_at(self):
653 return self.data.deleted_at
654
655 def get_latest_revision(self):
656 revisions = self.get_revisions()
657
658 # Return first object
659 for rev in revisions:
660 return rev
661
662 def get_revisions(self):
663 revisions = self.backend.wiki._get_files("SELECT * FROM wiki_files \
664 WHERE path = %s AND filename = %s ORDER BY created_at DESC", self.path, self.filename)
665
666 return list(revisions)
667
668 def is_pdf(self):
669 return self.mimetype in ("application/pdf", "application/x-pdf")
670
671 def is_image(self):
672 return self.mimetype.startswith("image/")
673
674 def is_vector_image(self):
675 return self.mimetype in ("image/svg+xml",)
676
677 def is_bitmap_image(self):
678 return self.is_image() and not self.is_vector_image()
679
680 @lazy_property
681 def blob(self):
682 res = self.db.get("SELECT data FROM wiki_blobs \
683 WHERE id = %s", self.data.blob_id)
684
685 if res:
686 return bytes(res.data)
687
688 async def get_thumbnail(self, size, format=None):
689 assert self.is_bitmap_image()
690
691 cache_key = ":".join((
692 "wiki",
693 "thumbnail",
694 self.path,
695 util.normalize(self.filename),
696 self.created_at.isoformat(),
697 format or "N/A",
698 "%spx" % size,
699 ))
700
701 # Try to fetch the data from the cache
702 thumbnail = await self.backend.cache.get(cache_key)
703 if thumbnail:
704 return thumbnail
705
706 # Generate the thumbnail
707 thumbnail = util.generate_thumbnail(self.blob, size, format=format, quality=95)
708
709 # Put it into the cache for forever
710 await self.backend.cache.set(cache_key, thumbnail)
711
712 return thumbnail
713
714 @property
715 def pages(self):
716 """
717 Returns a list of all pages this file is linked by
718 """
719 pages = self.backend.wiki._get_pages("""
720 SELECT
721 wiki.*
722 FROM
723 wiki_linked_files
724 JOIN
725 wiki_current ON wiki_linked_files.page_id = wiki_current.id
726 LEFT JOIN
727 wiki ON wiki_linked_files.page_id = wiki.id
728 WHERE
729 wiki_linked_files.path = %s
730 ORDER BY
731 wiki.page
732 """, os.path.join(self.path, self.filename),
733 )
734
735 return list(pages)
736
737
738 class WikiRenderer(misc.Object):
739 schemas = (
740 "ftp://",
741 "git://",
742 "http://",
743 "https://",
744 "rsync://",
745 "sftp://",
746 "ssh://",
747 "webcal://",
748 )
749
750 # Links
751 _links = re.compile(r"<a href=\"(.*?)\">(.*?)</a>")
752
753 # Images
754 _images = re.compile(r"<img alt(?:=\"(.*?)\")? src=\"(.*?)\" (?:title=\"(.*?)\" )?/>")
755
756 def init(self, path, text, revision=None):
757 self.path = path
758 self.text = text
759
760 # Optionally, the revision of the rendered page
761 self.revision = revision
762
763 # Markdown Renderer
764 self.renderer = markdown.Markdown(
765 extensions=[
766 LinkedFilesExtractorExtension(),
767 PrettyLinksExtension(),
768 "codehilite",
769 "fenced_code",
770 "footnotes",
771 "nl2br",
772 "sane_lists",
773 "tables",
774 "toc",
775 ],
776 )
777
778 # Render!
779 self.html = self._render()
780
781 def _render_link(self, m):
782 url, text = m.groups()
783
784 # External Links
785 for schema in self.schemas:
786 if url.startswith(schema):
787 return """<a class="link-external" href="%s">%s</a>""" % \
788 (url, text or url)
789
790 # Emails
791 if "@" in url:
792 # Strip mailto:
793 if url.startswith("mailto:"):
794 url = url[7:]
795
796 return """<a class="link-external" href="mailto:%s">%s</a>""" % \
797 (url, text or url)
798
799 # Everything else must be an internal link
800 path = self.backend.wiki.make_path(self.path, url)
801
802 return """<a href="/docs%s">%s</a>""" % \
803 (path, text or self.backend.wiki.get_page_title(path))
804
805 def _render_image(self, m):
806 alt_text, url, caption = m.groups()
807
808 # Compute a hash over the URL
809 h = hashlib.new("md5")
810 h.update(url.encode())
811 id = h.hexdigest()
812
813 html = """
814 <div class="columns is-centered">
815 <div class="column is-8">
816 <figure class="image modal-trigger" data-target="%(id)s">
817 <img src="/docs%(url)s?s=640&amp;%(args)s" alt="%(caption)s">
818
819 <figcaption class="figure-caption">%(caption)s</figcaption>
820 </figure>
821
822 <div class="modal is-large" id="%(id)s">
823 <div class="modal-background"></div>
824
825 <div class="modal-content">
826 <p class="image">
827 <img src="/docs%(url)s?s=1920&amp;%(args)s" alt="%(caption)s"
828 loading="lazy">
829 </p>
830
831 <a class="button is-small" href="/docs%(url)s?action=detail">
832 <span class="icon">
833 <i class="fa-solid fa-circle-info"></i>
834 </span>
835 </a>
836 </div>
837
838 <button class="modal-close is-large" aria-label="close"></button>
839 </div>
840 </div>
841 </div>
842 """
843
844 # Skip any absolute and external URLs
845 if url.startswith("https://") or url.startswith("http://"):
846 return html % {
847 "caption" : caption or "",
848 "id" : id,
849 "url" : url,
850 "args" : args,
851 }
852
853 # Try to split query string
854 url, delimiter, qs = url.partition("?")
855
856 # Parse query arguments
857 args = urllib.parse.parse_qs(qs)
858
859 # Build absolute path
860 url = self.backend.wiki.make_path(self.path, url)
861
862 # Find image
863 file = self.backend.wiki.get_file_by_path(url, revision=self.revision)
864 if not file or not file.is_image():
865 return "<!-- Could not find image %s in %s -->" % (url, self.path)
866
867 # Remove any requested size
868 if "s" in args:
869 del args["s"]
870
871 # Link the image that has been the current version at the time of the page edit
872 if file:
873 args["revision"] = file.timestamp
874
875 return html % {
876 "caption" : caption or "",
877 "id" : id,
878 "url" : url,
879 "args" : urllib.parse.urlencode(args),
880 }
881
882 def _render(self):
883 logging.debug("Rendering %s" % self.path)
884
885 # Render...
886 text = self.renderer.convert(self.text)
887
888 # Postprocess links
889 text = self._links.sub(self._render_link, text)
890
891 # Postprocess images to <figure>
892 text = self._images.sub(self._render_image, text)
893
894 return text
895
896 @lazy_property
897 def files(self):
898 """
899 A list of all linked files that have been part of the rendered markup
900 """
901 files = []
902
903 for url in self.renderer.files:
904 # Skip external images
905 if url.startswith("https://") or url.startswith("http://"):
906 continue
907
908 # Make the URL absolute
909 url = self.backend.wiki.make_path(self.path, url)
910
911 # Check if this is a file (it could also just be a page)
912 file = self.backend.wiki.get_file_by_path(url)
913 if file:
914 files.append(url)
915
916 return files
917
918
919 class PrettyLinksExtension(markdown.extensions.Extension):
920 def extendMarkdown(self, md):
921 # Create links to Bugzilla
922 md.preprocessors.register(BugzillaLinksPreprocessor(md), "bugzilla", 10)
923
924 # Create links to CVE
925 md.preprocessors.register(CVELinksPreprocessor(md), "cve", 10)
926
927
928 class BugzillaLinksPreprocessor(markdown.preprocessors.Preprocessor):
929 regex = re.compile(r"(?:#(\d{5,}))", re.I)
930
931 def run(self, lines):
932 for line in lines:
933 yield self.regex.sub(r"[#\1](https://bugzilla.ipfire.org/show_bug.cgi?id=\1)", line)
934
935
936 class CVELinksPreprocessor(markdown.preprocessors.Preprocessor):
937 regex = re.compile(r"(?:CVE)[\s\-](\d{4}\-\d+)")
938
939 def run(self, lines):
940 for line in lines:
941 yield self.regex.sub(r"[CVE-\1](https://cve.mitre.org/cgi-bin/cvename.cgi?name=\1)", line)
942
943
944 class LinkedFilesExtractor(markdown.treeprocessors.Treeprocessor):
945 """
946 Finds all Linked Files
947 """
948 def run(self, root):
949 self.md.files = []
950
951 # Find all images and store the URLs
952 for image in root.findall(".//img"):
953 src = image.get("src")
954
955 self.md.files.append(src)
956
957 # Find all links
958 for link in root.findall(".//a"):
959 href = link.get("href")
960
961 self.md.files.append(href)
962
963
964 class LinkedFilesExtractorExtension(markdown.extensions.Extension):
965 def extendMarkdown(self, md):
966 md.treeprocessors.register(LinkedFilesExtractor(md), "linked-files-extractor", 10)