]> git.ipfire.org Git - ipfire.org.git/blob - src/backend/wiki.py
wiki: Cache thumbnail images in memcache
[ipfire.org.git] / src / backend / wiki.py
1 #!/usr/bin/python3
2
3 import PIL
4 import io
5 import logging
6 import os.path
7 import re
8 import urllib.parse
9
10 from . import misc
11 from . import util
12 from .decorators import *
13
14 class Wiki(misc.Object):
15 def _get_pages(self, query, *args):
16 res = self.db.query(query, *args)
17
18 for row in res:
19 yield Page(self.backend, row.id, data=row)
20
21 def _get_page(self, query, *args):
22 res = self.db.get(query, *args)
23
24 if res:
25 return Page(self.backend, res.id, data=res)
26
27 def get_page_title(self, page, default=None):
28 doc = self.get_page(page)
29 if doc:
30 return doc.title
31
32 return default or os.path.basename(page)
33
34 def get_page(self, page, revision=None):
35 page = Page.sanitise_page_name(page)
36 assert page
37
38 if revision:
39 return self._get_page("SELECT * FROM wiki WHERE page = %s \
40 AND timestamp = %s", page, revision)
41 else:
42 return self._get_page("SELECT * FROM wiki WHERE page = %s \
43 ORDER BY timestamp DESC LIMIT 1", page)
44
45 def get_recent_changes(self, limit=None):
46 return self._get_pages("SELECT * FROM wiki \
47 WHERE timestamp >= NOW() - INTERVAL '4 weeks' \
48 ORDER BY timestamp DESC LIMIT %s", limit)
49
50 def create_page(self, page, author, content, changes=None, address=None):
51 page = Page.sanitise_page_name(page)
52
53 return self._get_page("INSERT INTO wiki(page, author_uid, markdown, changes, address) \
54 VALUES(%s, %s, %s, %s, %s) RETURNING *", page, author.uid, content or None, changes, address)
55
56 def delete_page(self, page, author, **kwargs):
57 # Do nothing if the page does not exist
58 if not self.get_page(page):
59 return
60
61 # Just creates a blank last version of the page
62 self.create_page(page, author=author, content=None, **kwargs)
63
64 def make_breadcrumbs(self, url):
65 # Split and strip all empty elements (double slashes)
66 parts = list(e for e in url.split("/") if e)
67
68 ret = []
69 for part in ("/".join(parts[:i]) for i in range(1, len(parts))):
70 ret.append(("/%s" % part, self.get_page_title(part, os.path.basename(part))))
71
72 return ret
73
74 def search(self, query, limit=None):
75 query = util.parse_search_query(query)
76
77 res = self._get_pages("SELECT wiki.* FROM wiki_search_index search_index \
78 LEFT JOIN wiki ON search_index.wiki_id = wiki.id \
79 WHERE search_index.document @@ to_tsquery('english', %s) \
80 ORDER BY ts_rank(search_index.document, to_tsquery('english', %s)) DESC \
81 LIMIT %s", query, query, limit)
82
83 return list(res)
84
85 def refresh(self):
86 """
87 Needs to be called after a page has been changed
88 """
89 self.db.execute("REFRESH MATERIALIZED VIEW wiki_search_index")
90
91 # Files
92
93 def _get_files(self, query, *args):
94 res = self.db.query(query, *args)
95
96 for row in res:
97 yield File(self.backend, row.id, data=row)
98
99 def _get_file(self, query, *args):
100 res = self.db.get(query, *args)
101
102 if res:
103 return File(self.backend, res.id, data=res)
104
105 def get_files(self, path):
106 files = self._get_files("SELECT * FROM wiki_files \
107 WHERE path = %s AND deleted_at IS NULL ORDER BY filename", path)
108
109 return list(files)
110
111 def get_file_by_path(self, path):
112 path, filename = os.path.dirname(path), os.path.basename(path)
113
114 return self._get_file("SELECT * FROM wiki_files \
115 WHERE path = %s AND filename = %s AND deleted_at IS NULL", path, filename)
116
117 def upload(self, path, filename, data, mimetype, author, address):
118 # Upload the blob first
119 blob = self.db.get("INSERT INTO wiki_blobs(data) VALUES(%s) RETURNING id", data)
120
121 # Create entry for file
122 return self._get_file("INSERT INTO wiki_files(path, filename, author_uid, address, \
123 mimetype, blob_id, size) VALUES(%s, %s, %s, %s, %s, %s, %s) RETURNING *", path,
124 filename, author.uid, address, mimetype, blob.id, len(data))
125
126 def find_image(self, path, filename):
127 for p in (path, os.path.dirname(path)):
128 file = self.get_file_by_path(os.path.join(p, filename))
129
130 if file and file.is_image():
131 return file
132
133
134 class Page(misc.Object):
135 def init(self, id, data=None):
136 self.id = id
137 self.data = data
138
139 def __lt__(self, other):
140 if isinstance(other, self.__class__):
141 if self.page == other.page:
142 return self.timestamp < other.timestamp
143
144 return self.page < other.page
145
146 @staticmethod
147 def sanitise_page_name(page):
148 if not page:
149 return "/"
150
151 # Make sure that the page name does NOT end with a /
152 if page.endswith("/"):
153 page = page[:-1]
154
155 # Make sure the page name starts with a /
156 if not page.startswith("/"):
157 page = "/%s" % page
158
159 # Remove any double slashes
160 page = page.replace("//", "/")
161
162 return page
163
164 @property
165 def url(self):
166 return self.page
167
168 @property
169 def page(self):
170 return self.data.page
171
172 @property
173 def title(self):
174 return self._title or self.page[1:]
175
176 @property
177 def _title(self):
178 if not self.markdown:
179 return
180
181 # Find first H1 headline in markdown
182 markdown = self.markdown.splitlines()
183
184 m = re.match(r"^# (.*)( #)?$", markdown[0])
185 if m:
186 return m.group(1)
187
188 @lazy_property
189 def author(self):
190 if self.data.author_uid:
191 return self.backend.accounts.get_by_uid(self.data.author_uid)
192
193 def _render(self, text):
194 logging.debug("Rendering %s" % self)
195
196 # Link images
197 replacements = []
198 for match in re.finditer(r"!\[(.*)\]\((.*)\)", text):
199 alt_text, url = match.groups()
200
201 # Skip any absolute and external URLs
202 if url.startswith("/") or url.startswith("https://") or url.startswith("http://"):
203 continue
204
205 # Try to split query string
206 url, delimiter, qs = url.partition("?")
207
208 # Parse query arguments
209 args = urllib.parse.parse_qs(qs)
210
211 # Find image
212 file = self.backend.wiki.find_image(self.page, url)
213 if not file:
214 continue
215
216 # Scale down the image if not already done
217 if not "s" in args:
218 args["s"] = "768"
219
220 # Format URL
221 url = "%s?%s" % (file.url, urllib.parse.urlencode(args))
222
223 replacements.append((match.span(), file, alt_text, url))
224
225 # Apply all replacements
226 for (start, end), file, alt_text, url in reversed(replacements):
227 text = text[:start] + "[![%s](%s)](%s?action=detail)" % (alt_text, url, file.url) + text[end:]
228
229 # Add wiki links
230 patterns = (
231 (r"\[\[([\w\d\/]+)(?:\|([\w\d\s]+))\]\]", r"/\1", r"\2", None, None),
232 (r"\[\[([\w\d\/\-]+)\]\]", r"/\1", r"\1", self.backend.wiki.get_page_title, r"\1"),
233 )
234
235 for pattern, link, title, repl, args in patterns:
236 replacements = []
237
238 for match in re.finditer(pattern, text):
239 l = match.expand(link)
240 t = match.expand(title)
241
242 if callable(repl):
243 t = repl(match.expand(args)) or t
244
245 replacements.append((match.span(), t or l, l))
246
247 # Apply all replacements
248 for (start, end), t, l in reversed(replacements):
249 text = text[:start] + "[%s](%s)" % (t, l) + text[end:]
250
251 # Borrow this from the blog
252 return self.backend.blog._render_text(text, lang="markdown")
253
254 @property
255 def markdown(self):
256 return self.data.markdown
257
258 @property
259 def html(self):
260 return self.data.html or self._render(self.markdown)
261
262 @property
263 def timestamp(self):
264 return self.data.timestamp
265
266 def was_deleted(self):
267 return self.markdown is None
268
269 @lazy_property
270 def breadcrumbs(self):
271 return self.backend.wiki.make_breadcrumbs(self.page)
272
273 def get_latest_revision(self):
274 revisions = self.get_revisions()
275
276 # Return first object
277 for rev in revisions:
278 return rev
279
280 def get_revisions(self):
281 return self.backend.wiki._get_pages("SELECT * FROM wiki \
282 WHERE page = %s ORDER BY timestamp DESC", self.page)
283
284 @property
285 def changes(self):
286 return self.data.changes
287
288 # Sidebar
289
290 @lazy_property
291 def sidebar(self):
292 parts = self.page.split("/")
293
294 while parts:
295 sidebar = self.backend.wiki.get_page("%s/sidebar" % os.path.join(*parts))
296 if sidebar:
297 return sidebar
298
299 parts.pop()
300
301
302 class File(misc.Object):
303 def init(self, id, data):
304 self.id = id
305 self.data = data
306
307 @property
308 def url(self):
309 return os.path.join(self.path, self.filename)
310
311 @property
312 def path(self):
313 return self.data.path
314
315 @property
316 def filename(self):
317 return self.data.filename
318
319 @property
320 def mimetype(self):
321 return self.data.mimetype
322
323 @property
324 def size(self):
325 return self.data.size
326
327 @lazy_property
328 def author(self):
329 if self.data.author_uid:
330 return self.backend.accounts.get_by_uid(self.data.author_uid)
331
332 @property
333 def created_at(self):
334 return self.data.created_at
335
336 def is_pdf(self):
337 return self.mimetype in ("application/pdf", "application/x-pdf")
338
339 def is_image(self):
340 return self.mimetype.startswith("image/")
341
342 @lazy_property
343 def blob(self):
344 res = self.db.get("SELECT data FROM wiki_blobs \
345 WHERE id = %s", self.data.blob_id)
346
347 if res:
348 return bytes(res.data)
349
350 def get_thumbnail(self, size):
351 cache_key = "-".join((self.path, util.normalize(self.filename), self.created_at.isoformat(), "%spx" % size))
352
353 # Try to fetch the data from the cache
354 thumbnail = self.memcache.get(cache_key)
355 if thumbnail:
356 return thumbnail
357
358 # Generate the thumbnail
359 thumbnail = self._generate_thumbnail(size)
360
361 # Put it into the cache for forever
362 self.memcache.set(cache_key, thumbnail)
363
364 return thumbnail
365
366 def _generate_thumbnail(self, size):
367 image = PIL.Image.open(io.BytesIO(self.blob))
368
369 # Resize the image to the desired resolution
370 image.thumbnail((size, size), PIL.Image.ANTIALIAS)
371
372 with io.BytesIO() as f:
373 # If writing out the image does not work with optimization,
374 # we try to write it out without any optimization.
375 try:
376 image.save(f, image.format, optimize=True, quality=98)
377 except:
378 image.save(f, image.format, quality=98)
379
380 return f.getvalue()