]> git.ipfire.org Git - ipfire.org.git/blob - src/backend/wiki.py
wiki: Compress images better when using JPEG format
[ipfire.org.git] / src / backend / wiki.py
1 #!/usr/bin/python3
2
3 import PIL
4 import PIL.ImageFilter
5 import difflib
6 import io
7 import logging
8 import os.path
9 import re
10 import tornado.gen
11 import urllib.parse
12
13 from . import misc
14 from . import util
15 from .decorators import *
16
17 class Wiki(misc.Object):
18 def _get_pages(self, query, *args):
19 res = self.db.query(query, *args)
20
21 for row in res:
22 yield Page(self.backend, row.id, data=row)
23
24 def _get_page(self, query, *args):
25 res = self.db.get(query, *args)
26
27 if res:
28 return Page(self.backend, res.id, data=res)
29
30 def get_page_title(self, page, default=None):
31 doc = self.get_page(page)
32 if doc:
33 return doc.title
34
35 return default or os.path.basename(page)
36
37 def get_page(self, page, revision=None):
38 page = Page.sanitise_page_name(page)
39 assert page
40
41 if revision:
42 return self._get_page("SELECT * FROM wiki WHERE page = %s \
43 AND timestamp = %s", page, revision)
44 else:
45 return self._get_page("SELECT * FROM wiki WHERE page = %s \
46 ORDER BY timestamp DESC LIMIT 1", page)
47
48 def get_recent_changes(self, account, limit=None):
49 pages = self._get_pages("SELECT * FROM wiki \
50 WHERE timestamp >= NOW() - INTERVAL '4 weeks' \
51 ORDER BY timestamp DESC")
52
53 for page in pages:
54 if not page.check_acl(account):
55 continue
56
57 yield page
58
59 limit -= 1
60 if not limit:
61 break
62
63 def create_page(self, page, author, content, changes=None, address=None):
64 page = Page.sanitise_page_name(page)
65
66 # Write page to the database
67 page = self._get_page("INSERT INTO wiki(page, author_uid, markdown, changes, address) \
68 VALUES(%s, %s, %s, %s, %s) RETURNING *", page, author.uid, content or None, changes, address)
69
70 # Send email to all watchers
71 page._send_watcher_emails(excludes=[author])
72
73 return page
74
75 def delete_page(self, page, author, **kwargs):
76 # Do nothing if the page does not exist
77 if not self.get_page(page):
78 return
79
80 # Just creates a blank last version of the page
81 self.create_page(page, author=author, content=None, **kwargs)
82
83 def make_breadcrumbs(self, url):
84 # Split and strip all empty elements (double slashes)
85 parts = list(e for e in url.split("/") if e)
86
87 ret = []
88 for part in ("/".join(parts[:i]) for i in range(1, len(parts))):
89 ret.append(("/%s" % part, self.get_page_title(part, os.path.basename(part))))
90
91 return ret
92
93 def search(self, query, account=None, limit=None):
94 query = util.parse_search_query(query)
95
96 res = self._get_pages("SELECT wiki.* FROM wiki_search_index search_index \
97 LEFT JOIN wiki ON search_index.wiki_id = wiki.id \
98 WHERE search_index.document @@ to_tsquery('english', %s) \
99 ORDER BY ts_rank(search_index.document, to_tsquery('english', %s)) DESC",
100 query, query)
101
102 for page in res:
103 # Skip any pages the user doesn't have permission for
104 if not page.check_acl(account):
105 continue
106
107 # Return any other pages
108 yield page
109
110 limit -= 1
111 if not limit:
112 break
113
114 def refresh(self):
115 """
116 Needs to be called after a page has been changed
117 """
118 self.db.execute("REFRESH MATERIALIZED VIEW wiki_search_index")
119
120 # ACL
121
122 def check_acl(self, page, account):
123 res = self.db.query("SELECT * FROM wiki_acls \
124 WHERE %s ILIKE (path || '%%') ORDER BY LENGTH(path) DESC LIMIT 1", page)
125
126 for row in res:
127 # Access not permitted when user is not logged in
128 if not account:
129 return False
130
131 # If user is in a matching group, we grant permission
132 for group in row.groups:
133 if group in account.groups:
134 return True
135
136 # Otherwise access is not permitted
137 return False
138
139 # If no ACLs are found, we permit access
140 return True
141
142 # Files
143
144 def _get_files(self, query, *args):
145 res = self.db.query(query, *args)
146
147 for row in res:
148 yield File(self.backend, row.id, data=row)
149
150 def _get_file(self, query, *args):
151 res = self.db.get(query, *args)
152
153 if res:
154 return File(self.backend, res.id, data=res)
155
156 def get_files(self, path):
157 files = self._get_files("SELECT * FROM wiki_files \
158 WHERE path = %s AND deleted_at IS NULL ORDER BY filename", path)
159
160 return list(files)
161
162 def get_file_by_path(self, path):
163 path, filename = os.path.dirname(path), os.path.basename(path)
164
165 return self._get_file("SELECT * FROM wiki_files \
166 WHERE path = %s AND filename = %s AND deleted_at IS NULL", path, filename)
167
168 def upload(self, path, filename, data, mimetype, author, address):
169 # Upload the blob first
170 blob = self.db.get("INSERT INTO wiki_blobs(data) VALUES(%s) RETURNING id", data)
171
172 # Create entry for file
173 return self._get_file("INSERT INTO wiki_files(path, filename, author_uid, address, \
174 mimetype, blob_id, size) VALUES(%s, %s, %s, %s, %s, %s, %s) RETURNING *", path,
175 filename, author.uid, address, mimetype, blob.id, len(data))
176
177 def find_image(self, path, filename):
178 for p in (path, os.path.dirname(path)):
179 file = self.get_file_by_path(os.path.join(p, filename))
180
181 if file and file.is_image():
182 return file
183
184
185 class Page(misc.Object):
186 def init(self, id, data=None):
187 self.id = id
188 self.data = data
189
190 def __repr__(self):
191 return "<%s %s %s>" % (self.__class__.__name__, self.page, self.timestamp)
192
193 def __eq__(self, other):
194 if isinstance(other, self.__class__):
195 return self.id == other.id
196
197 def __lt__(self, other):
198 if isinstance(other, self.__class__):
199 if self.page == other.page:
200 return self.timestamp < other.timestamp
201
202 return self.page < other.page
203
204 @staticmethod
205 def sanitise_page_name(page):
206 if not page:
207 return "/"
208
209 # Make sure that the page name does NOT end with a /
210 if page.endswith("/"):
211 page = page[:-1]
212
213 # Make sure the page name starts with a /
214 if not page.startswith("/"):
215 page = "/%s" % page
216
217 # Remove any double slashes
218 page = page.replace("//", "/")
219
220 return page
221
222 @property
223 def url(self):
224 return self.page
225
226 @property
227 def full_url(self):
228 return "https://wiki.ipfire.org%s" % self.url
229
230 @property
231 def page(self):
232 return self.data.page
233
234 @property
235 def title(self):
236 return self._title or os.path.basename(self.page[1:])
237
238 @property
239 def _title(self):
240 if not self.markdown:
241 return
242
243 # Find first H1 headline in markdown
244 markdown = self.markdown.splitlines()
245
246 m = re.match(r"^# (.*)( #)?$", markdown[0])
247 if m:
248 return m.group(1)
249
250 @lazy_property
251 def author(self):
252 if self.data.author_uid:
253 return self.backend.accounts.get_by_uid(self.data.author_uid)
254
255 def _render(self, text):
256 logging.debug("Rendering %s" % self)
257
258 # Link images
259 replacements = []
260 for match in re.finditer(r"!\[(.*?)\]\((.*?)\)", text):
261 alt_text, url = match.groups()
262
263 # Skip any absolute and external URLs
264 if url.startswith("/") or url.startswith("https://") or url.startswith("http://"):
265 continue
266
267 # Try to split query string
268 url, delimiter, qs = url.partition("?")
269
270 # Parse query arguments
271 args = urllib.parse.parse_qs(qs)
272
273 # Find image
274 file = self.backend.wiki.find_image(self.page, url)
275 if not file:
276 continue
277
278 # Scale down the image if not already done
279 if not "s" in args:
280 args["s"] = "768"
281
282 # Format URL
283 url = "%s?%s" % (file.url, urllib.parse.urlencode(args))
284
285 replacements.append((match.span(), file, alt_text, url))
286
287 # Apply all replacements
288 for (start, end), file, alt_text, url in reversed(replacements):
289 text = text[:start] + "[![%s](%s)](%s?action=detail)" % (alt_text, url, file.url) + text[end:]
290
291 # Add wiki links
292 patterns = (
293 (r"\[\[([\w\d\/\-\.]+)(?:\|(.+?))\]\]", r"\1", r"\2", None, True),
294 (r"\[\[([\w\d\/\-\.]+)\]\]", r"\1", r"\1", self.backend.wiki.get_page_title, True),
295
296 # External links
297 (r"\[\[((?:ftp|git|https?|rsync|sftp|ssh|webcal)\:\/\/.+?)(?:\|(.+?))\]\]",
298 r"\1", r"\2", None, False),
299
300 # Mail
301 (r"\[\[([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)\]\]",
302 r"\1", r"\1", None, False),
303 )
304
305 for pattern, link, title, repl, internal in patterns:
306 replacements = []
307
308 for match in re.finditer(pattern, text):
309 l = match.expand(link)
310 t = match.expand(title)
311
312 if internal:
313 # Allow relative links
314 if not l.startswith("/"):
315 l = os.path.join(self.page, l)
316
317 # Normalise links
318 l = os.path.normpath(l)
319
320 if callable(repl):
321 t = repl(l) or t
322
323 replacements.append((match.span(), t or l, l))
324
325 # Apply all replacements
326 for (start, end), t, l in reversed(replacements):
327 text = text[:start] + "[%s](%s)" % (t, l) + text[end:]
328
329 # Borrow this from the blog
330 return self.backend.blog._render_text(text, lang="markdown")
331
332 @property
333 def markdown(self):
334 return self.data.markdown or ""
335
336 @property
337 def html(self):
338 return self._render(self.markdown)
339
340 @property
341 def timestamp(self):
342 return self.data.timestamp
343
344 def was_deleted(self):
345 return self.markdown is None
346
347 @lazy_property
348 def breadcrumbs(self):
349 return self.backend.wiki.make_breadcrumbs(self.page)
350
351 def get_latest_revision(self):
352 revisions = self.get_revisions()
353
354 # Return first object
355 for rev in revisions:
356 return rev
357
358 def get_revisions(self):
359 return self.backend.wiki._get_pages("SELECT * FROM wiki \
360 WHERE page = %s ORDER BY timestamp DESC", self.page)
361
362 @lazy_property
363 def previous_revision(self):
364 return self.backend.wiki._get_page("SELECT * FROM wiki \
365 WHERE page = %s AND timestamp < %s ORDER BY timestamp DESC \
366 LIMIT 1", self.page, self.timestamp)
367
368 @property
369 def changes(self):
370 return self.data.changes
371
372 # ACL
373
374 def check_acl(self, account):
375 return self.backend.wiki.check_acl(self.page, account)
376
377 # Sidebar
378
379 @lazy_property
380 def sidebar(self):
381 parts = self.page.split("/")
382
383 while parts:
384 sidebar = self.backend.wiki.get_page("%s/sidebar" % os.path.join(*parts))
385 if sidebar:
386 return sidebar
387
388 parts.pop()
389
390 # Watchers
391
392 @lazy_property
393 def diff(self):
394 if self.previous_revision:
395 diff = difflib.unified_diff(
396 self.previous_revision.markdown.splitlines(),
397 self.markdown.splitlines(),
398 )
399
400 return "\n".join(diff)
401
402 @property
403 def watchers(self):
404 res = self.db.query("SELECT uid FROM wiki_watchlist \
405 WHERE page = %s", self.page)
406
407 for row in res:
408 # Search for account by UID and skip if none was found
409 account = self.backend.accounts.get_by_uid(row.uid)
410 if not account:
411 continue
412
413 # Return the account
414 yield account
415
416 def is_watched_by(self, account):
417 res = self.db.get("SELECT 1 FROM wiki_watchlist \
418 WHERE page = %s AND uid = %s", self.page, account.uid)
419
420 if res:
421 return True
422
423 return False
424
425 def add_watcher(self, account):
426 if self.is_watched_by(account):
427 return
428
429 self.db.execute("INSERT INTO wiki_watchlist(page, uid) \
430 VALUES(%s, %s)", self.page, account.uid)
431
432 def remove_watcher(self, account):
433 self.db.execute("DELETE FROM wiki_watchlist \
434 WHERE page = %s AND uid = %s", self.page, account.uid)
435
436 def _send_watcher_emails(self, excludes=[]):
437 # Nothing to do if there was no previous revision
438 if not self.previous_revision:
439 return
440
441 for watcher in self.watchers:
442 # Skip everyone who is excluded
443 if watcher in excludes:
444 logging.debug("Excluding %s" % watcher)
445 continue
446
447 logging.debug("Sending watcher email to %s" % watcher)
448
449 # Compose message
450 self.backend.messages.send_template("wiki/messages/page-changed",
451 recipients=[watcher], page=self, priority=-10)
452
453
454 class File(misc.Object):
455 def init(self, id, data):
456 self.id = id
457 self.data = data
458
459 @property
460 def url(self):
461 return os.path.join(self.path, self.filename)
462
463 @property
464 def path(self):
465 return self.data.path
466
467 @property
468 def filename(self):
469 return self.data.filename
470
471 @property
472 def mimetype(self):
473 return self.data.mimetype
474
475 @property
476 def size(self):
477 return self.data.size
478
479 @lazy_property
480 def author(self):
481 if self.data.author_uid:
482 return self.backend.accounts.get_by_uid(self.data.author_uid)
483
484 @property
485 def created_at(self):
486 return self.data.created_at
487
488 def is_pdf(self):
489 return self.mimetype in ("application/pdf", "application/x-pdf")
490
491 def is_image(self):
492 return self.mimetype.startswith("image/")
493
494 @lazy_property
495 def blob(self):
496 res = self.db.get("SELECT data FROM wiki_blobs \
497 WHERE id = %s", self.data.blob_id)
498
499 if res:
500 return bytes(res.data)
501
502 def get_thumbnail(self, size):
503 cache_key = "-".join((self.path, util.normalize(self.filename), self.created_at.isoformat(), "%spx" % size))
504
505 # Try to fetch the data from the cache
506 thumbnail = self.memcache.get(cache_key)
507 if thumbnail:
508 return thumbnail
509
510 # Generate the thumbnail
511 thumbnail = self._generate_thumbnail(size)
512
513 # Put it into the cache for forever
514 self.memcache.set(cache_key, thumbnail)
515
516 return thumbnail
517
518 def _generate_thumbnail(self, size, **args):
519 image = PIL.Image.open(io.BytesIO(self.blob))
520
521 # Remove any alpha-channels
522 if image.format == "JPEG" and not image.mode == "RGB":
523 # Make a white background
524 background = PIL.Image.new("RGBA", image.size, (255,255,255))
525
526 # Flatten both images together
527 flattened_image = PIL.Image.alpha_composite(background, image)
528
529 # Remove the alpha channel
530 image = flattened_image.convert("RGB")
531
532 # Resize the image to the desired resolution
533 image.thumbnail((size, size), PIL.Image.LANCZOS)
534
535 if image.format == "JPEG":
536 # Apply a gaussian blur to make compression easier
537 image = image.filter(PIL.ImageFilter.GaussianBlur(radius=0.05))
538
539 # Arguments to optimise the compression
540 args.update({
541 "subsampling" : "4:2:0",
542 "quality" : 70,
543 })
544
545 with io.BytesIO() as f:
546 # If writing out the image does not work with optimization,
547 # we try to write it out without any optimization.
548 try:
549 image.save(f, image.format, optimize=True, **args)
550 except:
551 image.save(f, image.format, **args)
552
553 return f.getvalue()