]> git.ipfire.org Git - ipfire.org.git/blob - src/backend/wiki.py
a390ce37d5622516481f6c3bcdb72202a47008aa
[ipfire.org.git] / src / backend / wiki.py
1 #!/usr/bin/python3
2
3 import PIL
4 import PIL.ImageFilter
5 import difflib
6 import io
7 import logging
8 import os.path
9 import re
10 import tornado.gen
11 import urllib.parse
12
13 from . import misc
14 from . import util
15 from .decorators import *
16
17 class Wiki(misc.Object):
18 def _get_pages(self, query, *args):
19 res = self.db.query(query, *args)
20
21 for row in res:
22 yield Page(self.backend, row.id, data=row)
23
24 def _get_page(self, query, *args):
25 res = self.db.get(query, *args)
26
27 if res:
28 return Page(self.backend, res.id, data=res)
29
30 def get_page_title(self, page, default=None):
31 doc = self.get_page(page)
32 if doc:
33 return doc.title
34
35 return default or os.path.basename(page)
36
37 def get_page(self, page, revision=None):
38 page = Page.sanitise_page_name(page)
39 assert page
40
41 if revision:
42 return self._get_page("SELECT * FROM wiki WHERE page = %s \
43 AND timestamp = %s", page, revision)
44 else:
45 return self._get_page("SELECT * FROM wiki WHERE page = %s \
46 ORDER BY timestamp DESC LIMIT 1", page)
47
48 def get_recent_changes(self, account, limit=None):
49 pages = self._get_pages("SELECT * FROM wiki \
50 WHERE timestamp >= NOW() - INTERVAL '4 weeks' \
51 ORDER BY timestamp DESC")
52
53 for page in pages:
54 if not page.check_acl(account):
55 continue
56
57 yield page
58
59 limit -= 1
60 if not limit:
61 break
62
63 def create_page(self, page, author, content, changes=None, address=None):
64 page = Page.sanitise_page_name(page)
65
66 # Write page to the database
67 page = self._get_page("INSERT INTO wiki(page, author_uid, markdown, changes, address) \
68 VALUES(%s, %s, %s, %s, %s) RETURNING *", page, author.uid, content or None, changes, address)
69
70 # Send email to all watchers
71 page._send_watcher_emails(excludes=[author])
72
73 return page
74
75 def delete_page(self, page, author, **kwargs):
76 # Do nothing if the page does not exist
77 if not self.get_page(page):
78 return
79
80 # Just creates a blank last version of the page
81 self.create_page(page, author=author, content=None, **kwargs)
82
83 def make_breadcrumbs(self, url):
84 # Split and strip all empty elements (double slashes)
85 parts = list(e for e in url.split("/") if e)
86
87 ret = []
88 for part in ("/".join(parts[:i]) for i in range(1, len(parts))):
89 ret.append(("/%s" % part, self.get_page_title(part, os.path.basename(part))))
90
91 return ret
92
93 def search(self, query, account=None, limit=None):
94 query = util.parse_search_query(query)
95
96 res = self._get_pages("SELECT wiki.* FROM wiki_search_index search_index \
97 LEFT JOIN wiki ON search_index.wiki_id = wiki.id \
98 WHERE search_index.document @@ to_tsquery('english', %s) \
99 ORDER BY ts_rank(search_index.document, to_tsquery('english', %s)) DESC",
100 query, query)
101
102 pages = []
103 for page in res:
104 # Skip any pages the user doesn't have permission for
105 if not page.check_acl(account):
106 continue
107
108 # Return any other pages
109 pages.append(page)
110
111 # Break when we have found enough pages
112 if limit and len(pages) >= limit:
113 break
114
115 return pages
116
117 def refresh(self):
118 """
119 Needs to be called after a page has been changed
120 """
121 self.db.execute("REFRESH MATERIALIZED VIEW wiki_search_index")
122
123 # ACL
124
125 def check_acl(self, page, account):
126 res = self.db.query("SELECT * FROM wiki_acls \
127 WHERE %s ILIKE (path || '%%') ORDER BY LENGTH(path) DESC LIMIT 1", page)
128
129 for row in res:
130 # Access not permitted when user is not logged in
131 if not account:
132 return False
133
134 # If user is in a matching group, we grant permission
135 for group in row.groups:
136 if group in account.groups:
137 return True
138
139 # Otherwise access is not permitted
140 return False
141
142 # If no ACLs are found, we permit access
143 return True
144
145 # Files
146
147 def _get_files(self, query, *args):
148 res = self.db.query(query, *args)
149
150 for row in res:
151 yield File(self.backend, row.id, data=row)
152
153 def _get_file(self, query, *args):
154 res = self.db.get(query, *args)
155
156 if res:
157 return File(self.backend, res.id, data=res)
158
159 def get_files(self, path):
160 files = self._get_files("SELECT * FROM wiki_files \
161 WHERE path = %s AND deleted_at IS NULL ORDER BY filename", path)
162
163 return list(files)
164
165 def get_file_by_path(self, path):
166 path, filename = os.path.dirname(path), os.path.basename(path)
167
168 return self._get_file("SELECT * FROM wiki_files \
169 WHERE path = %s AND filename = %s AND deleted_at IS NULL", path, filename)
170
171 def upload(self, path, filename, data, mimetype, author, address):
172 # Upload the blob first
173 blob = self.db.get("INSERT INTO wiki_blobs(data) VALUES(%s) RETURNING id", data)
174
175 # Create entry for file
176 return self._get_file("INSERT INTO wiki_files(path, filename, author_uid, address, \
177 mimetype, blob_id, size) VALUES(%s, %s, %s, %s, %s, %s, %s) RETURNING *", path,
178 filename, author.uid, address, mimetype, blob.id, len(data))
179
180 def find_image(self, path, filename):
181 for p in (path, os.path.dirname(path)):
182 file = self.get_file_by_path(os.path.join(p, filename))
183
184 if file and file.is_image():
185 return file
186
187
188 class Page(misc.Object):
189 def init(self, id, data=None):
190 self.id = id
191 self.data = data
192
193 def __repr__(self):
194 return "<%s %s %s>" % (self.__class__.__name__, self.page, self.timestamp)
195
196 def __eq__(self, other):
197 if isinstance(other, self.__class__):
198 return self.id == other.id
199
200 def __lt__(self, other):
201 if isinstance(other, self.__class__):
202 if self.page == other.page:
203 return self.timestamp < other.timestamp
204
205 return self.page < other.page
206
207 @staticmethod
208 def sanitise_page_name(page):
209 if not page:
210 return "/"
211
212 # Make sure that the page name does NOT end with a /
213 if page.endswith("/"):
214 page = page[:-1]
215
216 # Make sure the page name starts with a /
217 if not page.startswith("/"):
218 page = "/%s" % page
219
220 # Remove any double slashes
221 page = page.replace("//", "/")
222
223 return page
224
225 @property
226 def url(self):
227 return self.page
228
229 @property
230 def full_url(self):
231 return "https://wiki.ipfire.org%s" % self.url
232
233 @property
234 def page(self):
235 return self.data.page
236
237 @property
238 def title(self):
239 return self._title or os.path.basename(self.page[1:])
240
241 @property
242 def _title(self):
243 if not self.markdown:
244 return
245
246 # Find first H1 headline in markdown
247 markdown = self.markdown.splitlines()
248
249 m = re.match(r"^# (.*)( #)?$", markdown[0])
250 if m:
251 return m.group(1)
252
253 @lazy_property
254 def author(self):
255 if self.data.author_uid:
256 return self.backend.accounts.get_by_uid(self.data.author_uid)
257
258 def _render(self, text):
259 logging.debug("Rendering %s" % self)
260
261 # Link images
262 replacements = []
263 for match in re.finditer(r"!\[(.*?)\]\((.*?)\)", text):
264 alt_text, url = match.groups()
265
266 # Skip any absolute and external URLs
267 if url.startswith("/") or url.startswith("https://") or url.startswith("http://"):
268 continue
269
270 # Try to split query string
271 url, delimiter, qs = url.partition("?")
272
273 # Parse query arguments
274 args = urllib.parse.parse_qs(qs)
275
276 # Find image
277 file = self.backend.wiki.find_image(self.page, url)
278 if not file:
279 continue
280
281 # Scale down the image if not already done
282 if not "s" in args:
283 args["s"] = "768"
284
285 # Format URL
286 url = "%s?%s" % (file.url, urllib.parse.urlencode(args))
287
288 replacements.append((match.span(), file, alt_text, url))
289
290 # Apply all replacements
291 for (start, end), file, alt_text, url in reversed(replacements):
292 text = text[:start] + "[![%s](%s)](%s?action=detail)" % (alt_text, url, file.url) + text[end:]
293
294 # Add wiki links
295 patterns = (
296 (r"\[\[([\w\d\/\-\.]+)(?:\|(.+?))\]\]", r"\1", r"\2", None, True),
297 (r"\[\[([\w\d\/\-\.]+)\]\]", r"\1", r"\1", self.backend.wiki.get_page_title, True),
298
299 # External links
300 (r"\[\[((?:ftp|git|https?|rsync|sftp|ssh|webcal)\:\/\/.+?)(?:\|(.+?))\]\]",
301 r"\1", r"\2", None, False),
302
303 # Mail
304 (r"\[\[([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)\]\]",
305 r"\1", r"\1", None, False),
306 )
307
308 for pattern, link, title, repl, internal in patterns:
309 replacements = []
310
311 for match in re.finditer(pattern, text):
312 l = match.expand(link)
313 t = match.expand(title)
314
315 if internal:
316 # Allow relative links
317 if not l.startswith("/"):
318 l = os.path.join(self.page, l)
319
320 # Normalise links
321 l = os.path.normpath(l)
322
323 if callable(repl):
324 t = repl(l) or t
325
326 replacements.append((match.span(), t or l, l))
327
328 # Apply all replacements
329 for (start, end), t, l in reversed(replacements):
330 text = text[:start] + "[%s](%s)" % (t, l) + text[end:]
331
332 # Borrow this from the blog
333 return self.backend.blog._render_text(text, lang="markdown")
334
335 @property
336 def markdown(self):
337 return self.data.markdown or ""
338
339 @property
340 def html(self):
341 return self._render(self.markdown)
342
343 @property
344 def timestamp(self):
345 return self.data.timestamp
346
347 def was_deleted(self):
348 return self.markdown is None
349
350 @lazy_property
351 def breadcrumbs(self):
352 return self.backend.wiki.make_breadcrumbs(self.page)
353
354 def get_latest_revision(self):
355 revisions = self.get_revisions()
356
357 # Return first object
358 for rev in revisions:
359 return rev
360
361 def get_revisions(self):
362 return self.backend.wiki._get_pages("SELECT * FROM wiki \
363 WHERE page = %s ORDER BY timestamp DESC", self.page)
364
365 @lazy_property
366 def previous_revision(self):
367 return self.backend.wiki._get_page("SELECT * FROM wiki \
368 WHERE page = %s AND timestamp < %s ORDER BY timestamp DESC \
369 LIMIT 1", self.page, self.timestamp)
370
371 @property
372 def changes(self):
373 return self.data.changes
374
375 # ACL
376
377 def check_acl(self, account):
378 return self.backend.wiki.check_acl(self.page, account)
379
380 # Sidebar
381
382 @lazy_property
383 def sidebar(self):
384 parts = self.page.split("/")
385
386 while parts:
387 sidebar = self.backend.wiki.get_page("%s/sidebar" % os.path.join(*parts))
388 if sidebar:
389 return sidebar
390
391 parts.pop()
392
393 # Watchers
394
395 @lazy_property
396 def diff(self):
397 if self.previous_revision:
398 diff = difflib.unified_diff(
399 self.previous_revision.markdown.splitlines(),
400 self.markdown.splitlines(),
401 )
402
403 return "\n".join(diff)
404
405 @property
406 def watchers(self):
407 res = self.db.query("SELECT uid FROM wiki_watchlist \
408 WHERE page = %s", self.page)
409
410 for row in res:
411 # Search for account by UID and skip if none was found
412 account = self.backend.accounts.get_by_uid(row.uid)
413 if not account:
414 continue
415
416 # Return the account
417 yield account
418
419 def is_watched_by(self, account):
420 res = self.db.get("SELECT 1 FROM wiki_watchlist \
421 WHERE page = %s AND uid = %s", self.page, account.uid)
422
423 if res:
424 return True
425
426 return False
427
428 def add_watcher(self, account):
429 if self.is_watched_by(account):
430 return
431
432 self.db.execute("INSERT INTO wiki_watchlist(page, uid) \
433 VALUES(%s, %s)", self.page, account.uid)
434
435 def remove_watcher(self, account):
436 self.db.execute("DELETE FROM wiki_watchlist \
437 WHERE page = %s AND uid = %s", self.page, account.uid)
438
439 def _send_watcher_emails(self, excludes=[]):
440 # Nothing to do if there was no previous revision
441 if not self.previous_revision:
442 return
443
444 for watcher in self.watchers:
445 # Skip everyone who is excluded
446 if watcher in excludes:
447 logging.debug("Excluding %s" % watcher)
448 continue
449
450 logging.debug("Sending watcher email to %s" % watcher)
451
452 # Compose message
453 self.backend.messages.send_template("wiki/messages/page-changed",
454 recipients=[watcher], page=self, priority=-10)
455
456
457 class File(misc.Object):
458 def init(self, id, data):
459 self.id = id
460 self.data = data
461
462 @property
463 def url(self):
464 return os.path.join(self.path, self.filename)
465
466 @property
467 def path(self):
468 return self.data.path
469
470 @property
471 def filename(self):
472 return self.data.filename
473
474 @property
475 def mimetype(self):
476 return self.data.mimetype
477
478 @property
479 def size(self):
480 return self.data.size
481
482 @lazy_property
483 def author(self):
484 if self.data.author_uid:
485 return self.backend.accounts.get_by_uid(self.data.author_uid)
486
487 @property
488 def created_at(self):
489 return self.data.created_at
490
491 def is_pdf(self):
492 return self.mimetype in ("application/pdf", "application/x-pdf")
493
494 def is_image(self):
495 return self.mimetype.startswith("image/")
496
497 @lazy_property
498 def blob(self):
499 res = self.db.get("SELECT data FROM wiki_blobs \
500 WHERE id = %s", self.data.blob_id)
501
502 if res:
503 return bytes(res.data)
504
505 def get_thumbnail(self, size):
506 cache_key = "-".join((self.path, util.normalize(self.filename), self.created_at.isoformat(), "%spx" % size))
507
508 # Try to fetch the data from the cache
509 thumbnail = self.memcache.get(cache_key)
510 if thumbnail:
511 return thumbnail
512
513 # Generate the thumbnail
514 thumbnail = self._generate_thumbnail(size)
515
516 # Put it into the cache for forever
517 self.memcache.set(cache_key, thumbnail)
518
519 return thumbnail
520
521 def _generate_thumbnail(self, size, **args):
522 image = PIL.Image.open(io.BytesIO(self.blob))
523
524 # Remove any alpha-channels
525 if image.format == "JPEG" and not image.mode == "RGB":
526 # Make a white background
527 background = PIL.Image.new("RGBA", image.size, (255,255,255))
528
529 # Flatten both images together
530 flattened_image = PIL.Image.alpha_composite(background, image)
531
532 # Remove the alpha channel
533 image = flattened_image.convert("RGB")
534
535 # Resize the image to the desired resolution
536 image.thumbnail((size, size), PIL.Image.LANCZOS)
537
538 if image.format == "JPEG":
539 # Apply a gaussian blur to make compression easier
540 image = image.filter(PIL.ImageFilter.GaussianBlur(radius=0.05))
541
542 # Arguments to optimise the compression
543 args.update({
544 "subsampling" : "4:2:0",
545 "quality" : 70,
546 })
547
548 with io.BytesIO() as f:
549 # If writing out the image does not work with optimization,
550 # we try to write it out without any optimization.
551 try:
552 image.save(f, image.format, optimize=True, **args)
553 except:
554 image.save(f, image.format, **args)
555
556 return f.getvalue()