]> git.ipfire.org Git - ipfire.org.git/blob - src/backend/wiki.py
docs: Fix URL computation
[ipfire.org.git] / src / backend / wiki.py
1 #!/usr/bin/python3
2
3 import difflib
4 import logging
5 import os.path
6 import re
7 import urllib.parse
8
9 from . import misc
10 from . import util
11 from .decorators import *
12
13 class Wiki(misc.Object):
14 def _get_pages(self, query, *args):
15 res = self.db.query(query, *args)
16
17 for row in res:
18 yield Page(self.backend, row.id, data=row)
19
20 def _get_page(self, query, *args):
21 res = self.db.get(query, *args)
22
23 if res:
24 return Page(self.backend, res.id, data=res)
25
26 def __iter__(self):
27 return self._get_pages(
28 "SELECT wiki.* FROM wiki_current current \
29 LEFT JOIN wiki ON current.id = wiki.id \
30 WHERE current.deleted IS FALSE \
31 ORDER BY page",
32 )
33
34 def make_path(self, page, path):
35 # Nothing to do for absolute links
36 if path.startswith("/"):
37 pass
38
39 # Relative links (one-level down)
40 elif path.startswith("./"):
41 path = os.path.join(page, path)
42
43 # All other relative links
44 else:
45 p = os.path.dirname(page)
46 path = os.path.join(p, path)
47
48 # Normalise links
49 return os.path.normpath(path)
50
51 def page_exists(self, path):
52 page = self.get_page(path)
53
54 # Page must have been found and not deleted
55 return page and not page.was_deleted()
56
57 def get_page_title(self, page, default=None):
58 # Try to retrieve title from cache
59 title = self.memcache.get("wiki:title:%s" % page)
60 if title:
61 return title
62
63 # If the title has not been in the cache, we will
64 # have to look it up
65 doc = self.get_page(page)
66 if doc:
67 title = doc.title
68 else:
69 title = os.path.basename(page)
70
71 # Save in cache for forever
72 self.memcache.set("wiki:title:%s" % page, title)
73
74 return title
75
76 def get_page(self, page, revision=None):
77 page = Page.sanitise_page_name(page)
78
79 # Split the path into parts
80 parts = page.split("/")
81
82 # Check if this is an action
83 if any((part.startswith("_") for part in parts)):
84 return
85
86 if revision:
87 return self._get_page("SELECT * FROM wiki WHERE page = %s \
88 AND timestamp = %s", page, revision)
89 else:
90 return self._get_page("SELECT * FROM wiki WHERE page = %s \
91 ORDER BY timestamp DESC LIMIT 1", page)
92
93 def get_recent_changes(self, account, limit=None):
94 pages = self._get_pages("SELECT * FROM wiki \
95 ORDER BY timestamp DESC")
96
97 for page in pages:
98 if not page.check_acl(account):
99 continue
100
101 yield page
102
103 limit -= 1
104 if not limit:
105 break
106
107 def create_page(self, page, author, content, changes=None, address=None):
108 page = Page.sanitise_page_name(page)
109
110 # Write page to the database
111 page = self._get_page("INSERT INTO wiki(page, author_uid, markdown, changes, address) \
112 VALUES(%s, %s, %s, %s, %s) RETURNING *", page, author.uid, content or None, changes, address)
113
114 # Update cache
115 self.memcache.set("wiki:title:%s" % page.page, page.title)
116
117 # Send email to all watchers
118 page._send_watcher_emails(excludes=[author])
119
120 return page
121
122 def delete_page(self, page, author, **kwargs):
123 # Do nothing if the page does not exist
124 if not self.get_page(page):
125 return
126
127 # Just creates a blank last version of the page
128 self.create_page(page, author=author, content=None, **kwargs)
129
130 def make_breadcrumbs(self, url):
131 # Split and strip all empty elements (double slashes)
132 parts = list(e for e in url.split("/") if e)
133
134 ret = []
135 for part in ("/".join(parts[:i]) for i in range(1, len(parts))):
136 ret.append(("/%s" % part, self.get_page_title(part, os.path.basename(part))))
137
138 return ret
139
140 def search(self, query, account=None, limit=None):
141 res = self._get_pages("SELECT wiki.* FROM wiki_search_index search_index \
142 LEFT JOIN wiki ON search_index.wiki_id = wiki.id \
143 WHERE search_index.document @@ websearch_to_tsquery('english', %s) \
144 ORDER BY ts_rank(search_index.document, websearch_to_tsquery('english', %s)) DESC",
145 query, query)
146
147 pages = []
148 for page in res:
149 # Skip any pages the user doesn't have permission for
150 if not page.check_acl(account):
151 continue
152
153 # Return any other pages
154 pages.append(page)
155
156 # Break when we have found enough pages
157 if limit and len(pages) >= limit:
158 break
159
160 return pages
161
162 def refresh(self):
163 """
164 Needs to be called after a page has been changed
165 """
166 self.db.execute("REFRESH MATERIALIZED VIEW wiki_search_index")
167
168 def get_watchlist(self, account):
169 pages = self._get_pages("""
170 WITH pages AS (
171 SELECT
172 *
173 FROM
174 wiki_current
175 LEFT JOIN
176 wiki ON wiki_current.id = wiki.id
177 )
178
179 SELECT
180 *
181 FROM
182 wiki_watchlist watchlist
183 JOIN
184 pages ON watchlist.page = pages.page
185 WHERE
186 watchlist.uid = %s
187 """, account.uid,
188 )
189
190 return sorted(pages)
191
192 # ACL
193
194 def check_acl(self, page, account):
195 res = self.db.query("SELECT * FROM wiki_acls \
196 WHERE %s ILIKE (path || '%%') ORDER BY LENGTH(path) DESC LIMIT 1", page)
197
198 for row in res:
199 # Access not permitted when user is not logged in
200 if not account:
201 return False
202
203 # If user is in a matching group, we grant permission
204 for group in row.groups:
205 if account.is_member_of_group(group):
206 return True
207
208 # Otherwise access is not permitted
209 return False
210
211 # If no ACLs are found, we permit access
212 return True
213
214 # Files
215
216 def _get_files(self, query, *args):
217 res = self.db.query(query, *args)
218
219 for row in res:
220 yield File(self.backend, row.id, data=row)
221
222 def _get_file(self, query, *args):
223 res = self.db.get(query, *args)
224
225 if res:
226 return File(self.backend, res.id, data=res)
227
228 def get_files(self, path):
229 files = self._get_files("SELECT * FROM wiki_files \
230 WHERE path = %s AND deleted_at IS NULL ORDER BY filename", path)
231
232 return list(files)
233
234 def get_file_by_path(self, path, revision=None):
235 path, filename = os.path.dirname(path), os.path.basename(path)
236
237 if revision:
238 # Fetch a specific revision
239 return self._get_file("SELECT * FROM wiki_files \
240 WHERE path = %s AND filename = %s AND created_at <= %s \
241 ORDER BY created_at DESC LIMIT 1", path, filename, revision)
242
243 # Fetch latest version
244 return self._get_file("SELECT * FROM wiki_files \
245 WHERE path = %s AND filename = %s AND deleted_at IS NULL",
246 path, filename)
247
248 def get_file_by_path_and_filename(self, path, filename):
249 return self._get_file("SELECT * FROM wiki_files \
250 WHERE path = %s AND filename = %s AND deleted_at IS NULL",
251 path, filename)
252
253 def upload(self, path, filename, data, mimetype, author, address):
254 # Replace any existing files
255 file = self.get_file_by_path_and_filename(path, filename)
256 if file:
257 file.delete(author)
258
259 # Upload the blob first
260 blob = self.db.get("INSERT INTO wiki_blobs(data) VALUES(%s) \
261 ON CONFLICT (digest(data, %s)) DO UPDATE SET data = EXCLUDED.data \
262 RETURNING id", data, "MD5")
263
264 # Create entry for file
265 return self._get_file("INSERT INTO wiki_files(path, filename, author_uid, address, \
266 mimetype, blob_id, size) VALUES(%s, %s, %s, %s, %s, %s, %s) RETURNING *", path,
267 filename, author.uid, address, mimetype, blob.id, len(data))
268
269 def render(self, path, text):
270 r = WikiRenderer(self.backend, path)
271
272 return r.render(text)
273
274
275 class Page(misc.Object):
276 def init(self, id, data=None):
277 self.id = id
278 self.data = data
279
280 def __repr__(self):
281 return "<%s %s %s>" % (self.__class__.__name__, self.page, self.timestamp)
282
283 def __eq__(self, other):
284 if isinstance(other, self.__class__):
285 return self.id == other.id
286
287 return NotImplemented
288
289 def __lt__(self, other):
290 if isinstance(other, self.__class__):
291 if self.page == other.page:
292 return self.timestamp < other.timestamp
293
294 return self.page < other.page
295
296 return NotImplemented
297
298 @staticmethod
299 def sanitise_page_name(page):
300 if not page:
301 return "/"
302
303 # Make sure that the page name does NOT end with a /
304 if page.endswith("/"):
305 page = page[:-1]
306
307 # Make sure the page name starts with a /
308 if not page.startswith("/"):
309 page = "/%s" % page
310
311 # Remove any double slashes
312 page = page.replace("//", "/")
313
314 return page
315
316 @property
317 def url(self):
318 return "/docs%s" % self.page
319
320 @property
321 def full_url(self):
322 return "https://www.ipfire.org%s" % self.url
323
324 @property
325 def page(self):
326 return self.data.page
327
328 @property
329 def title(self):
330 return self._title or os.path.basename(self.page[1:])
331
332 @property
333 def _title(self):
334 if not self.markdown:
335 return
336
337 # Find first H1 headline in markdown
338 markdown = self.markdown.splitlines()
339
340 m = re.match(r"^#\s*(.*)( #)?$", markdown[0])
341 if m:
342 return m.group(1)
343
344 @lazy_property
345 def author(self):
346 if self.data.author_uid:
347 return self.backend.accounts.get_by_uid(self.data.author_uid)
348
349 @property
350 def markdown(self):
351 return self.data.markdown or ""
352
353 @property
354 def html(self):
355 lines = []
356
357 # Strip off the first line if it contains a heading (as it will be shown separately)
358 for i, line in enumerate(self.markdown.splitlines()):
359 if i == 0 and line.startswith("#"):
360 continue
361
362 lines.append(line)
363
364 return self.backend.wiki.render(self.page, "\n".join(lines))
365
366 @property
367 def timestamp(self):
368 return self.data.timestamp
369
370 def was_deleted(self):
371 return not self.markdown
372
373 @lazy_property
374 def breadcrumbs(self):
375 return self.backend.wiki.make_breadcrumbs(self.page)
376
377 def is_latest_revision(self):
378 return self.get_latest_revision() == self
379
380 def get_latest_revision(self):
381 revisions = self.get_revisions()
382
383 # Return first object
384 for rev in revisions:
385 return rev
386
387 def get_revisions(self):
388 return self.backend.wiki._get_pages("SELECT * FROM wiki \
389 WHERE page = %s ORDER BY timestamp DESC", self.page)
390
391 @lazy_property
392 def previous_revision(self):
393 return self.backend.wiki._get_page("SELECT * FROM wiki \
394 WHERE page = %s AND timestamp < %s ORDER BY timestamp DESC \
395 LIMIT 1", self.page, self.timestamp)
396
397 @property
398 def changes(self):
399 return self.data.changes
400
401 # ACL
402
403 def check_acl(self, account):
404 return self.backend.wiki.check_acl(self.page, account)
405
406 # Watchers
407
408 @lazy_property
409 def diff(self):
410 if self.previous_revision:
411 diff = difflib.unified_diff(
412 self.previous_revision.markdown.splitlines(),
413 self.markdown.splitlines(),
414 )
415
416 return "\n".join(diff)
417
418 @property
419 def watchers(self):
420 res = self.db.query("SELECT uid FROM wiki_watchlist \
421 WHERE page = %s", self.page)
422
423 for row in res:
424 # Search for account by UID and skip if none was found
425 account = self.backend.accounts.get_by_uid(row.uid)
426 if not account:
427 continue
428
429 # Return the account
430 yield account
431
432 def is_watched_by(self, account):
433 res = self.db.get("SELECT 1 FROM wiki_watchlist \
434 WHERE page = %s AND uid = %s", self.page, account.uid)
435
436 if res:
437 return True
438
439 return False
440
441 def add_watcher(self, account):
442 if self.is_watched_by(account):
443 return
444
445 self.db.execute("INSERT INTO wiki_watchlist(page, uid) \
446 VALUES(%s, %s)", self.page, account.uid)
447
448 def remove_watcher(self, account):
449 self.db.execute("DELETE FROM wiki_watchlist \
450 WHERE page = %s AND uid = %s", self.page, account.uid)
451
452 def _send_watcher_emails(self, excludes=[]):
453 # Nothing to do if there was no previous revision
454 if not self.previous_revision:
455 return
456
457 for watcher in self.watchers:
458 # Skip everyone who is excluded
459 if watcher in excludes:
460 logging.debug("Excluding %s" % watcher)
461 continue
462
463 # Check permissions
464 if not self.backend.wiki.check_acl(self.page, watcher):
465 logging.debug("Watcher %s does not have permissions" % watcher)
466 continue
467
468 logging.debug("Sending watcher email to %s" % watcher)
469
470 # Compose message
471 self.backend.messages.send_template("wiki/messages/page-changed",
472 account=watcher, page=self, priority=-10)
473
474 def restore(self, author, address, comment=None):
475 changes = "Restore to revision from %s" % self.timestamp.isoformat()
476
477 # Append comment
478 if comment:
479 changes = "%s: %s" % (changes, comment)
480
481 return self.backend.wiki.create_page(self.page,
482 author, self.markdown, changes=changes, address=address)
483
484
485 class File(misc.Object):
486 def init(self, id, data):
487 self.id = id
488 self.data = data
489
490 def __eq__(self, other):
491 if isinstance(other, self.__class__):
492 return self.id == other.id
493
494 @property
495 def url(self):
496 return os.path.join(self.path, self.filename)
497
498 @property
499 def path(self):
500 return self.data.path
501
502 @property
503 def filename(self):
504 return self.data.filename
505
506 @property
507 def mimetype(self):
508 return self.data.mimetype
509
510 @property
511 def size(self):
512 return self.data.size
513
514 @lazy_property
515 def author(self):
516 if self.data.author_uid:
517 return self.backend.accounts.get_by_uid(self.data.author_uid)
518
519 @property
520 def created_at(self):
521 return self.data.created_at
522
523 def delete(self, author=None):
524 self.db.execute("UPDATE wiki_files SET deleted_at = NOW(), deleted_by = %s \
525 WHERE id = %s", author.uid if author else None, self.id)
526
527 @property
528 def deleted_at(self):
529 return self.data.deleted_at
530
531 def get_latest_revision(self):
532 revisions = self.get_revisions()
533
534 # Return first object
535 for rev in revisions:
536 return rev
537
538 def get_revisions(self):
539 revisions = self.backend.wiki._get_files("SELECT * FROM wiki_files \
540 WHERE path = %s AND filename = %s ORDER BY created_at DESC", self.path, self.filename)
541
542 return list(revisions)
543
544 def is_pdf(self):
545 return self.mimetype in ("application/pdf", "application/x-pdf")
546
547 def is_image(self):
548 return self.mimetype.startswith("image/")
549
550 def is_vector_image(self):
551 return self.mimetype in ("image/svg+xml",)
552
553 def is_bitmap_image(self):
554 return self.is_image() and not self.is_vector_image()
555
556 @lazy_property
557 def blob(self):
558 res = self.db.get("SELECT data FROM wiki_blobs \
559 WHERE id = %s", self.data.blob_id)
560
561 if res:
562 return bytes(res.data)
563
564 def get_thumbnail(self, size):
565 assert self.is_bitmap_image()
566
567 cache_key = "-".join((self.path, util.normalize(self.filename), self.created_at.isoformat(), "%spx" % size))
568
569 # Try to fetch the data from the cache
570 thumbnail = self.memcache.get(cache_key)
571 if thumbnail:
572 return thumbnail
573
574 # Generate the thumbnail
575 thumbnail = util.generate_thumbnail(self.blob, size)
576
577 # Put it into the cache for forever
578 self.memcache.set(cache_key, thumbnail)
579
580 return thumbnail
581
582
583 class WikiRenderer(misc.Object):
584 schemas = (
585 "ftp://",
586 "git://",
587 "http://",
588 "https://",
589 "rsync://",
590 "sftp://",
591 "ssh://",
592 "webcal://",
593 )
594
595 # Links
596 links = re.compile(r"<a href=\"(.*?)\">(.*?)</a>")
597
598 # Images
599 images = re.compile(r"<img alt(?:=\"(.*?)\")? src=\"(.*?)\" (?:title=\"(.*?)\" )?/>")
600
601 def init(self, path):
602 self.path = path
603
604 def _render_link(self, m):
605 url, text = m.groups()
606
607 # External Links
608 for schema in self.schemas:
609 if url.startswith(schema):
610 return """<a class="link-external" href="%s">%s</a>""" % \
611 (url, text or url)
612
613 # Emails
614 if "@" in url:
615 # Strip mailto:
616 if url.startswith("mailto:"):
617 url = url[7:]
618
619 return """<a class="link-external" href="mailto:%s">%s</a>""" % \
620 (url, text or url)
621
622 # Everything else must be an internal link
623 path = self.backend.wiki.make_path(self.path, url)
624
625 return """<a href="/docs%s">%s</a>""" % \
626 (path, text or self.backend.wiki.get_page_title(path))
627
628 def _render_image(self, m):
629 alt_text, url, caption = m.groups()
630
631 html = """
632 <div class="columns is-centered">
633 <div class="column is-8">
634 <figure class="image">
635 <img src="/docs%s" alt="%s">
636 <figcaption class="figure-caption">%s</figcaption>
637 </figure>
638 </div>
639 </div>
640 """
641
642 # Skip any absolute and external URLs
643 if url.startswith("/") or url.startswith("https://") or url.startswith("http://"):
644 return html % (url, alt_text, caption or "")
645
646 # Try to split query string
647 url, delimiter, qs = url.partition("?")
648
649 # Parse query arguments
650 args = urllib.parse.parse_qs(qs)
651
652 # Build absolute path
653 url = self.backend.wiki.make_path(self.path, url)
654
655 # Find image
656 file = self.backend.wiki.get_file_by_path(url)
657 if not file or not file.is_image():
658 return "<!-- Could not find image %s in %s -->" % (url, self.path)
659
660 # Scale down the image if not already done
661 if not "s" in args:
662 args["s"] = "920"
663
664 # Append arguments to the URL
665 if args:
666 url = "%s?%s" % (url, urllib.parse.urlencode(args))
667
668 return html % (url, caption, caption or "")
669
670 def render(self, text):
671 logging.debug("Rendering %s" % self.path)
672
673 # Borrow this from the blog
674 text = self.backend.blog._render_text(text, lang="markdown")
675
676 # Postprocess links
677 text = self.links.sub(self._render_link, text)
678
679 # Postprocess images to <figure>
680 text = self.images.sub(self._render_image, text)
681
682 return text