]> git.ipfire.org Git - ipfire.org.git/blob - src/backend/blog.py
search: Use PostgreSQL's websearch_to_tsquery()
[ipfire.org.git] / src / backend / blog.py
1 #!/usr/bin/python
2
3 import datetime
4 import feedparser
5 import html2text
6 import markdown
7 import markdown.extensions
8 import markdown.preprocessors
9 import re
10 import textile
11 import unicodedata
12
13 from . import misc
14 from .decorators import *
15
16 class Blog(misc.Object):
17 def _get_post(self, query, *args):
18 res = self.db.get(query, *args)
19
20 if res:
21 return Post(self.backend, res.id, data=res)
22
23 def _get_posts(self, query, *args):
24 res = self.db.query(query, *args)
25
26 for row in res:
27 yield Post(self.backend, row.id, data=row)
28
29 def get_by_id(self, id):
30 return self._get_post("SELECT * FROM blog \
31 WHERE id = %s", id)
32
33 def get_by_slug(self, slug, published=True):
34 if published:
35 return self._get_post("SELECT * FROM blog \
36 WHERE slug = %s AND published_at <= NOW()", slug)
37
38 return self._get_post("SELECT * FROM blog \
39 WHERE slug = %s", slug)
40
41 def get_newest(self, limit=None):
42 return self._get_posts("SELECT * FROM blog \
43 WHERE published_at IS NOT NULL \
44 AND published_at <= NOW() \
45 ORDER BY published_at DESC LIMIT %s", limit)
46
47 def get_by_tag(self, tag, limit=None):
48 return self._get_posts("SELECT * FROM blog \
49 WHERE published_at IS NOT NULL \
50 AND published_at <= NOW() \
51 AND %s = ANY(tags) \
52 ORDER BY published_at DESC LIMIT %s", tag, limit)
53
54 def get_by_author(self, author, limit=None):
55 return self._get_posts("SELECT * FROM blog \
56 WHERE (author = %s OR author_uid = %s) \
57 AND published_at IS NOT NULL \
58 AND published_at <= NOW() \
59 ORDER BY published_at DESC LIMIT %s",
60 author.name, author.uid, limit)
61
62 def get_by_year(self, year):
63 return self._get_posts("SELECT * FROM blog \
64 WHERE EXTRACT(year FROM published_at) = %s \
65 AND published_at IS NOT NULL \
66 AND published_at <= NOW() \
67 ORDER BY published_at DESC", year)
68
69 def get_drafts(self, author=None, limit=None):
70 if author:
71 return self._get_posts("SELECT * FROM blog \
72 WHERE author_uid = %s \
73 AND (published_at IS NULL OR published_at > NOW()) \
74 ORDER BY COALESCE(updated_at, created_at) DESC LIMIT %s",
75 author.uid, limit)
76
77 return self._get_posts("SELECT * FROM blog \
78 WHERE (published_at IS NULL OR published_at > NOW()) \
79 ORDER BY COALESCE(updated_at, created_at) DESC LIMIT %s", limit)
80
81 def search(self, query, limit=None):
82 return self._get_posts("SELECT blog.* FROM blog \
83 LEFT JOIN blog_search_index search_index ON blog.id = search_index.post_id \
84 WHERE search_index.document @@ websearch_to_tsquery('english', %s) \
85 ORDER BY ts_rank(search_index.document, websearch_to_tsquery('english', %s)) DESC \
86 LIMIT %s", query, query, limit)
87
88 def has_had_recent_activity(self, **kwargs):
89 t = datetime.timedelta(**kwargs)
90
91 res = self.db.get("SELECT COUNT(*) AS count FROM blog \
92 WHERE published_at IS NOT NULL AND published_at BETWEEN NOW() - %s AND NOW()", t)
93
94 if res and res.count > 0:
95 return True
96
97 return False
98
99 def create_post(self, title, text, author, tags=[], lang="markdown"):
100 """
101 Creates a new post and returns the resulting Post object
102 """
103 # Pre-render HTML
104 html = self._render_text(text, lang=lang)
105
106 return self._get_post("INSERT INTO blog(title, slug, text, html, lang, author_uid, tags) \
107 VALUES(%s, %s, %s, %s, %s, %s, %s) RETURNING *", title, self._make_slug(title), text,
108 html, lang, author.uid, list(tags))
109
110 def _make_slug(self, s):
111 # Remove any non-ASCII characters
112 try:
113 s = unicodedata.normalize("NFKD", s)
114 except TypeError:
115 pass
116
117 # Remove excessive whitespace
118 s = re.sub(r"[^\w]+", " ", s)
119
120 slug = "-".join(s.split()).lower()
121
122 while True:
123 e = self.db.get("SELECT 1 FROM blog WHERE slug = %s", slug)
124 if not e:
125 break
126
127 slug += "-"
128
129 return slug
130
131 def _render_text(self, text, lang="markdown"):
132 if lang == "markdown":
133 return markdown.markdown(text,
134 extensions=[
135 PrettyLinksExtension(),
136 "codehilite",
137 "fenced_code",
138 "footnotes",
139 "nl2br",
140 "sane_lists",
141 "tables",
142 "toc",
143 ])
144
145 elif lang == "textile":
146 return textile.textile(text)
147
148 return text
149
150 def refresh(self):
151 """
152 Needs to be called after a post has been changed
153 and updates the search index.
154 """
155 self.db.execute("REFRESH MATERIALIZED VIEW blog_search_index")
156
157 @property
158 def years(self):
159 res = self.db.query("SELECT DISTINCT EXTRACT(year FROM published_at)::integer AS year \
160 FROM blog WHERE published_at IS NOT NULL AND published_at <= NOW() \
161 ORDER BY year DESC")
162
163 for row in res:
164 yield row.year
165
166 async def announce(self):
167 posts = self._get_posts("SELECT * FROM blog \
168 WHERE (published_at IS NOT NULL AND published_at <= NOW()) \
169 AND announced_at IS NULL")
170
171 for post in posts:
172 await post.announce()
173
174 async def update_feeds(self):
175 """
176 Updates all enabled feeds
177 """
178 for feed in self.db.query("SELECT * FROM blog_feeds WHERE enabled IS TRUE"):
179 try:
180 f = feedparser.parse(feed.url)
181 except Exception as e:
182 raise e
183
184 with self.db.transaction():
185 # Update name
186 self.db.execute("UPDATE blog_feeds SET name = %s \
187 WHERE id = %s", f.feed.title, feed.id)
188
189 # Walk through all entries
190 for entry in f.entries:
191 # Skip everything without the "blog.ipfire.org" tag
192 try:
193 tags = list((t.term for t in entry.tags))
194
195 if not "blog.ipfire.org" in tags:
196 continue
197 except AttributeError:
198 continue
199
200 # Get link to the posting site
201 link = entry.links[0].href
202
203 # Check if the entry has already been imported
204 res = self.db.get("SELECT id, (updated_at < %s) AS needs_update \
205 FROM blog WHERE feed_id = %s AND foreign_id = %s",
206 entry.updated, feed.id, entry.id)
207 if res:
208 # If the post needs to be updated, we do so
209 if res.needs_update:
210 self.db.execute("UPDATE blog SET title = %s, author = %s, \
211 published_at = %s, updated_at = %s, html = %s, link = %s, \
212 tags = %s WHERE id = %s", entry.title, entry.author,
213 entry.published, entry.updated, entry.summary, link,
214 feed.tags + tags, res.id)
215
216 # Done here
217 continue
218
219 # Insert the new post
220 self.db.execute("INSERT INTO blog(title, slug, author, \
221 published_at, html, link, tags, updated_at, feed_id, foreign_id) \
222 VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",
223 entry.title, self._make_slug(entry.title), entry.author,
224 entry.published, entry.summary, link, feed.tags + tags,
225 entry.updated, feed.id, entry.id)
226
227 # Mark feed as updated
228 self.db.execute("UPDATE blog_feeds SET last_updated_at = CURRENT_TIMESTAMP \
229 WHERE id = %s" % feed.id)
230
231 # Refresh the search index
232 with self.db.transaction():
233 self.refresh()
234
235
236 class Post(misc.Object):
237 def init(self, id, data=None):
238 self.id = id
239 self.data = data
240
241 # Title
242
243 @property
244 def title(self):
245 return self.data.title
246
247 @property
248 def slug(self):
249 return self.data.slug
250
251 @lazy_property
252 def author(self):
253 if self.data.author_uid:
254 return self.backend.accounts.get_by_uid(self.data.author_uid)
255
256 return self.data.author
257
258 @property
259 def created_at(self):
260 return self.data.created_at
261
262 @property
263 def lang(self):
264 return self.data.lang
265
266 # Published?
267
268 @property
269 def published_at(self):
270 return self.data.published_at
271
272 def is_published(self):
273 """
274 Returns True if the post is already published
275 """
276 return self.published_at and self.published_at <= datetime.datetime.now()
277
278 def publish(self, when=None):
279 if self.is_published():
280 return
281
282 self.db.execute("UPDATE blog SET published_at = COALESCE(%s, CURRENT_TIMESTAMP) \
283 WHERE id = %s", when, self.id)
284
285 # Update search indices
286 self.backend.blog.refresh()
287
288 # Updated?
289
290 @property
291 def updated_at(self):
292 return self.data.updated_at
293
294 # Text
295
296 @property
297 def text(self):
298 return self.data.text
299
300 # HTML
301
302 @lazy_property
303 def html(self):
304 """
305 Returns this post as rendered HTML
306 """
307 return self.data.html or self.backend.blog._render_text(self.text, lang=self.lang)
308
309 @lazy_property
310 def plaintext(self):
311 h = html2text.HTML2Text()
312 h.ignore_links = True
313
314 return h.handle(self.html)
315
316 # Excerpt
317
318 @property
319 def excerpt(self):
320 paragraphs = self.plaintext.split("\n\n")
321
322 excerpt = []
323
324 for paragraph in paragraphs:
325 excerpt.append(paragraph)
326
327 # Add another paragraph if we encountered a headline
328 if paragraph.startswith("#"):
329 continue
330
331 # End if this paragraph was long enough
332 if len(paragraph) >= 40:
333 break
334
335 return "\n\n".join(excerpt)
336
337 # Tags
338
339 @property
340 def tags(self):
341 return self.data.tags
342
343 # Link
344
345 @property
346 def link(self):
347 return self.data.link
348
349 @lazy_property
350 def release(self):
351 return self.backend.releases._get_release("SELECT * FROM releases \
352 WHERE published IS NOT NULL AND published <= NOW() AND blog_id = %s", self.id)
353
354 def is_editable(self, editor):
355 # Authors can edit their own posts
356 return self.author == editor
357
358 def update(self, title, text, tags=[]):
359 """
360 Called to update the content of this post
361 """
362 # Update slug when post isn't published yet
363 slug = self.backend.blog._make_slug(title) \
364 if not self.is_published() and not self.title == title else self.slug
365
366 # Render and cache HTML
367 html = self.backend.blog._render_text(text, lang=self.lang)
368
369 self.db.execute("UPDATE blog SET title = %s, slug = %s, text = %s, html = %s, \
370 tags = %s, updated_at = CURRENT_TIMESTAMP WHERE id = %s",
371 title, slug, text, html, list(tags), self.id)
372
373 # Update cache
374 self.data.update({
375 "title" : title,
376 "slug" : slug,
377 "text" : text,
378 "html" : html,
379 "tags" : tags,
380 })
381
382 # Update search index if post is published
383 if self.is_published():
384 self.backend.blog.refresh()
385
386 def delete(self):
387 self.db.execute("DELETE FROM blog WHERE id = %s", self.id)
388
389 # Update search indices
390 self.backend.blog.refresh()
391
392 async def announce(self):
393 # Get people who should receive this message
394 group = self.backend.groups.get_by_gid("promotional-consent")
395 if not group:
396 return
397
398 with self.db.transaction():
399 # Generate an email for everybody in this group
400 for account in group:
401 self.backend.messages.send_template("blog/messages/announcement",
402 account=account, post=self)
403
404 # Mark this post as announced
405 self.db.execute("UPDATE blog SET announced_at = CURRENT_TIMESTAMP \
406 WHERE id = %s", self.id)
407
408
409 class PrettyLinksExtension(markdown.extensions.Extension):
410 def extendMarkdown(self, md):
411 md.preprocessors.register(BugzillaLinksPreprocessor(md), "bugzilla", 10)
412 md.preprocessors.register(CVELinksPreprocessor(md), "cve", 10)
413
414
415 class BugzillaLinksPreprocessor(markdown.preprocessors.Preprocessor):
416 regex = re.compile(r"(?:#(\d{5,}))", re.I)
417
418 def run(self, lines):
419 for line in lines:
420 yield self.regex.sub(r"[#\1](https://bugzilla.ipfire.org/show_bug.cgi?id=\1)", line)
421
422
423 class CVELinksPreprocessor(markdown.preprocessors.Preprocessor):
424 regex = re.compile(r"(?:CVE)[\s\-](\d{4}\-\d+)")
425
426 def run(self, lines):
427 for line in lines:
428 yield self.regex.sub(r"[CVE-\1](https://cve.mitre.org/cgi-bin/cvename.cgi?name=\1)", line)