]> git.ipfire.org Git - ipfire.org.git/blame - src/backend/wiki.py
wiki: Let search function return a list of pages instead of a generator
[ipfire.org.git] / src / backend / wiki.py
CommitLineData
181d08f3
MT
1#!/usr/bin/python3
2
79dd9a0f 3import PIL
79c8de9d 4import PIL.ImageFilter
4ed1dadb 5import difflib
79dd9a0f 6import io
181d08f3 7import logging
6ac7e934 8import os.path
181d08f3 9import re
addc18d5 10import tornado.gen
9e90e800 11import urllib.parse
181d08f3
MT
12
13from . import misc
9523790a 14from . import util
181d08f3
MT
15from .decorators import *
16
181d08f3
MT
17class Wiki(misc.Object):
18 def _get_pages(self, query, *args):
19 res = self.db.query(query, *args)
20
21 for row in res:
22 yield Page(self.backend, row.id, data=row)
23
d398ca08
MT
24 def _get_page(self, query, *args):
25 res = self.db.get(query, *args)
26
27 if res:
28 return Page(self.backend, res.id, data=res)
29
6ac7e934
MT
30 def get_page_title(self, page, default=None):
31 doc = self.get_page(page)
32 if doc:
33 return doc.title
34
0b62a7f9 35 return default or os.path.basename(page)
6ac7e934 36
181d08f3
MT
37 def get_page(self, page, revision=None):
38 page = Page.sanitise_page_name(page)
39 assert page
40
41 if revision:
d398ca08 42 return self._get_page("SELECT * FROM wiki WHERE page = %s \
181d08f3
MT
43 AND timestamp = %s", page, revision)
44 else:
d398ca08 45 return self._get_page("SELECT * FROM wiki WHERE page = %s \
181d08f3
MT
46 ORDER BY timestamp DESC LIMIT 1", page)
47
11afe905
MT
48 def get_recent_changes(self, account, limit=None):
49 pages = self._get_pages("SELECT * FROM wiki \
f9db574a 50 WHERE timestamp >= NOW() - INTERVAL '4 weeks' \
11afe905
MT
51 ORDER BY timestamp DESC")
52
53 for page in pages:
54 if not page.check_acl(account):
55 continue
56
57 yield page
58
59 limit -= 1
60 if not limit:
61 break
181d08f3 62
495e9dc4 63 def create_page(self, page, author, content, changes=None, address=None):
181d08f3
MT
64 page = Page.sanitise_page_name(page)
65
aba5e58a
MT
66 # Write page to the database
67 page = self._get_page("INSERT INTO wiki(page, author_uid, markdown, changes, address) \
df01767e 68 VALUES(%s, %s, %s, %s, %s) RETURNING *", page, author.uid, content or None, changes, address)
181d08f3 69
aba5e58a
MT
70 # Send email to all watchers
71 page._send_watcher_emails(excludes=[author])
72
73 return page
74
495e9dc4 75 def delete_page(self, page, author, **kwargs):
181d08f3
MT
76 # Do nothing if the page does not exist
77 if not self.get_page(page):
78 return
79
80 # Just creates a blank last version of the page
495e9dc4 81 self.create_page(page, author=author, content=None, **kwargs)
181d08f3 82
3168788e
MT
83 def make_breadcrumbs(self, url):
84 # Split and strip all empty elements (double slashes)
181d08f3
MT
85 parts = list(e for e in url.split("/") if e)
86
3168788e 87 ret = []
b1bf7d48 88 for part in ("/".join(parts[:i]) for i in range(1, len(parts))):
3168788e 89 ret.append(("/%s" % part, self.get_page_title(part, os.path.basename(part))))
181d08f3 90
3168788e 91 return ret
181d08f3 92
11afe905 93 def search(self, query, account=None, limit=None):
9523790a
MT
94 query = util.parse_search_query(query)
95
96 res = self._get_pages("SELECT wiki.* FROM wiki_search_index search_index \
97 LEFT JOIN wiki ON search_index.wiki_id = wiki.id \
98 WHERE search_index.document @@ to_tsquery('english', %s) \
11afe905
MT
99 ORDER BY ts_rank(search_index.document, to_tsquery('english', %s)) DESC",
100 query, query)
9523790a 101
df80be2c 102 pages = []
11afe905
MT
103 for page in res:
104 # Skip any pages the user doesn't have permission for
105 if not page.check_acl(account):
106 continue
107
108 # Return any other pages
df80be2c 109 pages.append(page)
11afe905 110
df80be2c
MT
111 # Break when we have found enough pages
112 if limit and len(pages) >= limit:
11afe905 113 break
9523790a 114
df80be2c
MT
115 return pages
116
9523790a
MT
117 def refresh(self):
118 """
119 Needs to be called after a page has been changed
120 """
121 self.db.execute("REFRESH MATERIALIZED VIEW wiki_search_index")
122
11afe905
MT
123 # ACL
124
125 def check_acl(self, page, account):
126 res = self.db.query("SELECT * FROM wiki_acls \
127 WHERE %s ILIKE (path || '%%') ORDER BY LENGTH(path) DESC LIMIT 1", page)
128
129 for row in res:
130 # Access not permitted when user is not logged in
131 if not account:
132 return False
133
134 # If user is in a matching group, we grant permission
135 for group in row.groups:
136 if group in account.groups:
137 return True
138
139 # Otherwise access is not permitted
140 return False
141
142 # If no ACLs are found, we permit access
143 return True
144
f2cfd873
MT
145 # Files
146
147 def _get_files(self, query, *args):
148 res = self.db.query(query, *args)
149
150 for row in res:
151 yield File(self.backend, row.id, data=row)
152
153 def _get_file(self, query, *args):
154 res = self.db.get(query, *args)
155
156 if res:
157 return File(self.backend, res.id, data=res)
158
159 def get_files(self, path):
160 files = self._get_files("SELECT * FROM wiki_files \
161 WHERE path = %s AND deleted_at IS NULL ORDER BY filename", path)
162
163 return list(files)
164
165 def get_file_by_path(self, path):
166 path, filename = os.path.dirname(path), os.path.basename(path)
167
168 return self._get_file("SELECT * FROM wiki_files \
169 WHERE path = %s AND filename = %s AND deleted_at IS NULL", path, filename)
170
171 def upload(self, path, filename, data, mimetype, author, address):
172 # Upload the blob first
173 blob = self.db.get("INSERT INTO wiki_blobs(data) VALUES(%s) RETURNING id", data)
174
175 # Create entry for file
176 return self._get_file("INSERT INTO wiki_files(path, filename, author_uid, address, \
177 mimetype, blob_id, size) VALUES(%s, %s, %s, %s, %s, %s, %s) RETURNING *", path,
178 filename, author.uid, address, mimetype, blob.id, len(data))
179
9e90e800
MT
180 def find_image(self, path, filename):
181 for p in (path, os.path.dirname(path)):
182 file = self.get_file_by_path(os.path.join(p, filename))
183
184 if file and file.is_image():
185 return file
186
181d08f3
MT
187
188class Page(misc.Object):
189 def init(self, id, data=None):
190 self.id = id
191 self.data = data
192
dc847af5
MT
193 def __repr__(self):
194 return "<%s %s %s>" % (self.__class__.__name__, self.page, self.timestamp)
195
c21ffadb
MT
196 def __eq__(self, other):
197 if isinstance(other, self.__class__):
198 return self.id == other.id
199
181d08f3
MT
200 def __lt__(self, other):
201 if isinstance(other, self.__class__):
202 if self.page == other.page:
203 return self.timestamp < other.timestamp
204
205 return self.page < other.page
206
207 @staticmethod
208 def sanitise_page_name(page):
209 if not page:
210 return "/"
211
212 # Make sure that the page name does NOT end with a /
213 if page.endswith("/"):
214 page = page[:-1]
215
216 # Make sure the page name starts with a /
217 if not page.startswith("/"):
218 page = "/%s" % page
219
220 # Remove any double slashes
221 page = page.replace("//", "/")
222
223 return page
224
225 @property
226 def url(self):
db8448d9 227 return self.page
181d08f3 228
4ed1dadb
MT
229 @property
230 def full_url(self):
231 return "https://wiki.ipfire.org%s" % self.url
232
181d08f3
MT
233 @property
234 def page(self):
235 return self.data.page
236
237 @property
238 def title(self):
51e7a876 239 return self._title or os.path.basename(self.page[1:])
181d08f3
MT
240
241 @property
242 def _title(self):
243 if not self.markdown:
244 return
245
246 # Find first H1 headline in markdown
247 markdown = self.markdown.splitlines()
248
249 m = re.match(r"^# (.*)( #)?$", markdown[0])
250 if m:
251 return m.group(1)
252
3b05ef6e
MT
253 @lazy_property
254 def author(self):
255 if self.data.author_uid:
256 return self.backend.accounts.get_by_uid(self.data.author_uid)
257
181d08f3
MT
258 def _render(self, text):
259 logging.debug("Rendering %s" % self)
260
9e90e800
MT
261 # Link images
262 replacements = []
df6dd1a3 263 for match in re.finditer(r"!\[(.*?)\]\((.*?)\)", text):
9e90e800
MT
264 alt_text, url = match.groups()
265
266 # Skip any absolute and external URLs
267 if url.startswith("/") or url.startswith("https://") or url.startswith("http://"):
268 continue
269
270 # Try to split query string
271 url, delimiter, qs = url.partition("?")
272
273 # Parse query arguments
274 args = urllib.parse.parse_qs(qs)
275
276 # Find image
277 file = self.backend.wiki.find_image(self.page, url)
278 if not file:
279 continue
280
281 # Scale down the image if not already done
282 if not "s" in args:
283 args["s"] = "768"
284
285 # Format URL
286 url = "%s?%s" % (file.url, urllib.parse.urlencode(args))
287
bf59e35d 288 replacements.append((match.span(), file, alt_text, url))
9e90e800
MT
289
290 # Apply all replacements
bf59e35d
MT
291 for (start, end), file, alt_text, url in reversed(replacements):
292 text = text[:start] + "[![%s](%s)](%s?action=detail)" % (alt_text, url, file.url) + text[end:]
9e90e800 293
9e90e800 294 # Add wiki links
574794da 295 patterns = (
cc3d95d3
MT
296 (r"\[\[([\w\d\/\-\.]+)(?:\|(.+?))\]\]", r"\1", r"\2", None, True),
297 (r"\[\[([\w\d\/\-\.]+)\]\]", r"\1", r"\1", self.backend.wiki.get_page_title, True),
298
299 # External links
300 (r"\[\[((?:ftp|git|https?|rsync|sftp|ssh|webcal)\:\/\/.+?)(?:\|(.+?))\]\]",
301 r"\1", r"\2", None, False),
302
303 # Mail
304 (r"\[\[([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)\]\]",
305 r"\1", r"\1", None, False),
574794da
MT
306 )
307
cc3d95d3 308 for pattern, link, title, repl, internal in patterns:
574794da
MT
309 replacements = []
310
311 for match in re.finditer(pattern, text):
312 l = match.expand(link)
313 t = match.expand(title)
314
cc3d95d3
MT
315 if internal:
316 # Allow relative links
317 if not l.startswith("/"):
318 l = os.path.join(self.page, l)
78738820 319
cc3d95d3
MT
320 # Normalise links
321 l = os.path.normpath(l)
78738820 322
574794da 323 if callable(repl):
78738820 324 t = repl(l) or t
574794da
MT
325
326 replacements.append((match.span(), t or l, l))
327
328 # Apply all replacements
329 for (start, end), t, l in reversed(replacements):
330 text = text[:start] + "[%s](%s)" % (t, l) + text[end:]
331
045ea3db
MT
332 # Borrow this from the blog
333 return self.backend.blog._render_text(text, lang="markdown")
181d08f3
MT
334
335 @property
336 def markdown(self):
c21ffadb 337 return self.data.markdown or ""
181d08f3
MT
338
339 @property
340 def html(self):
31834b04 341 return self._render(self.markdown)
addc18d5 342
181d08f3
MT
343 @property
344 def timestamp(self):
345 return self.data.timestamp
346
347 def was_deleted(self):
348 return self.markdown is None
349
350 @lazy_property
351 def breadcrumbs(self):
352 return self.backend.wiki.make_breadcrumbs(self.page)
353
354 def get_latest_revision(self):
7d699684
MT
355 revisions = self.get_revisions()
356
357 # Return first object
358 for rev in revisions:
359 return rev
360
361 def get_revisions(self):
362 return self.backend.wiki._get_pages("SELECT * FROM wiki \
363 WHERE page = %s ORDER BY timestamp DESC", self.page)
091ac36b 364
c21ffadb
MT
365 @lazy_property
366 def previous_revision(self):
367 return self.backend.wiki._get_page("SELECT * FROM wiki \
368 WHERE page = %s AND timestamp < %s ORDER BY timestamp DESC \
369 LIMIT 1", self.page, self.timestamp)
370
d398ca08
MT
371 @property
372 def changes(self):
373 return self.data.changes
374
11afe905
MT
375 # ACL
376
377 def check_acl(self, account):
378 return self.backend.wiki.check_acl(self.page, account)
379
091ac36b
MT
380 # Sidebar
381
382 @lazy_property
383 def sidebar(self):
384 parts = self.page.split("/")
385
386 while parts:
3cc5f666 387 sidebar = self.backend.wiki.get_page("%s/sidebar" % os.path.join(*parts))
091ac36b
MT
388 if sidebar:
389 return sidebar
390
391 parts.pop()
f2cfd873 392
d64a1e35
MT
393 # Watchers
394
4ed1dadb
MT
395 @lazy_property
396 def diff(self):
397 if self.previous_revision:
398 diff = difflib.unified_diff(
399 self.previous_revision.markdown.splitlines(),
400 self.markdown.splitlines(),
401 )
402
403 return "\n".join(diff)
404
aba5e58a
MT
405 @property
406 def watchers(self):
407 res = self.db.query("SELECT uid FROM wiki_watchlist \
408 WHERE page = %s", self.page)
409
410 for row in res:
411 # Search for account by UID and skip if none was found
412 account = self.backend.accounts.get_by_uid(row.uid)
413 if not account:
414 continue
415
416 # Return the account
417 yield account
418
f2e25ded 419 def is_watched_by(self, account):
d64a1e35
MT
420 res = self.db.get("SELECT 1 FROM wiki_watchlist \
421 WHERE page = %s AND uid = %s", self.page, account.uid)
422
423 if res:
424 return True
425
426 return False
427
428 def add_watcher(self, account):
f2e25ded 429 if self.is_watched_by(account):
d64a1e35
MT
430 return
431
432 self.db.execute("INSERT INTO wiki_watchlist(page, uid) \
433 VALUES(%s, %s)", self.page, account.uid)
434
435 def remove_watcher(self, account):
436 self.db.execute("DELETE FROM wiki_watchlist \
437 WHERE page = %s AND uid = %s", self.page, account.uid)
438
aba5e58a
MT
439 def _send_watcher_emails(self, excludes=[]):
440 # Nothing to do if there was no previous revision
441 if not self.previous_revision:
442 return
443
444 for watcher in self.watchers:
445 # Skip everyone who is excluded
446 if watcher in excludes:
447 logging.debug("Excluding %s" % watcher)
448 continue
449
450 logging.debug("Sending watcher email to %s" % watcher)
451
4ed1dadb
MT
452 # Compose message
453 self.backend.messages.send_template("wiki/messages/page-changed",
454 recipients=[watcher], page=self, priority=-10)
aba5e58a 455
f2cfd873
MT
456
457class File(misc.Object):
458 def init(self, id, data):
459 self.id = id
460 self.data = data
461
462 @property
463 def url(self):
464 return os.path.join(self.path, self.filename)
465
466 @property
467 def path(self):
468 return self.data.path
469
470 @property
471 def filename(self):
472 return self.data.filename
473
474 @property
475 def mimetype(self):
476 return self.data.mimetype
477
478 @property
479 def size(self):
480 return self.data.size
481
8cb0bea4
MT
482 @lazy_property
483 def author(self):
484 if self.data.author_uid:
485 return self.backend.accounts.get_by_uid(self.data.author_uid)
486
487 @property
488 def created_at(self):
489 return self.data.created_at
490
491 def is_pdf(self):
492 return self.mimetype in ("application/pdf", "application/x-pdf")
493
f2cfd873
MT
494 def is_image(self):
495 return self.mimetype.startswith("image/")
496
497 @lazy_property
498 def blob(self):
499 res = self.db.get("SELECT data FROM wiki_blobs \
500 WHERE id = %s", self.data.blob_id)
501
502 if res:
503 return bytes(res.data)
79dd9a0f
MT
504
505 def get_thumbnail(self, size):
75d9b3da
MT
506 cache_key = "-".join((self.path, util.normalize(self.filename), self.created_at.isoformat(), "%spx" % size))
507
508 # Try to fetch the data from the cache
509 thumbnail = self.memcache.get(cache_key)
510 if thumbnail:
511 return thumbnail
512
513 # Generate the thumbnail
514 thumbnail = self._generate_thumbnail(size)
515
516 # Put it into the cache for forever
517 self.memcache.set(cache_key, thumbnail)
518
519 return thumbnail
520
79c8de9d 521 def _generate_thumbnail(self, size, **args):
79dd9a0f
MT
522 image = PIL.Image.open(io.BytesIO(self.blob))
523
79c8de9d
MT
524 # Remove any alpha-channels
525 if image.format == "JPEG" and not image.mode == "RGB":
526 # Make a white background
527 background = PIL.Image.new("RGBA", image.size, (255,255,255))
528
529 # Flatten both images together
530 flattened_image = PIL.Image.alpha_composite(background, image)
531
532 # Remove the alpha channel
533 image = flattened_image.convert("RGB")
534
79dd9a0f 535 # Resize the image to the desired resolution
79c8de9d
MT
536 image.thumbnail((size, size), PIL.Image.LANCZOS)
537
538 if image.format == "JPEG":
539 # Apply a gaussian blur to make compression easier
540 image = image.filter(PIL.ImageFilter.GaussianBlur(radius=0.05))
541
542 # Arguments to optimise the compression
543 args.update({
544 "subsampling" : "4:2:0",
545 "quality" : 70,
546 })
79dd9a0f
MT
547
548 with io.BytesIO() as f:
549 # If writing out the image does not work with optimization,
550 # we try to write it out without any optimization.
551 try:
79c8de9d 552 image.save(f, image.format, optimize=True, **args)
79dd9a0f 553 except:
79c8de9d 554 image.save(f, image.format, **args)
79dd9a0f
MT
555
556 return f.getvalue()