Add a couple of backlinks
[books.alexwlchan.net] / scripts / render_html.py
1 #!/usr/bin/env python3
2
3 import datetime
4 import hashlib
5 import itertools
6 import os
7 import pathlib
8 import re
9 import subprocess
10 import sys
11 import typing
12
13 import attr
14 import bs4
15 import cssmin
16 import frontmatter
17 from jinja2 import Environment, FileSystemLoader, select_autoescape
18 import markdown
19 from markdown.extensions.smarty import SmartyExtension
20 import smartypants
21
22 from generate_bookshelf import create_shelf_data_uri
23 from tint_colors import get_tint_colors, store_tint_color
24
25
26 def rsync(dir1, dir2):
27     subprocess.check_call(["rsync", "--recursive", "--delete", dir1, dir2])
28
29
30 def git(*args):
31     return subprocess.check_output(["git"] + list(args)).strip().decode("utf8")
32
33
34 def set_git_timestamps():
35     """
36     For everything in the covers/ directory, set the last modified timestamp to
37     the last time it was modified in Git.  This should make tint colour computations
38     stable across machines.
39     """
40     root = git("rev-parse", "--show-toplevel")
41
42     now = datetime.datetime.now().timestamp()
43
44     for f in os.listdir("src/covers"):
45         path = os.path.join("src/covers", f)
46
47         if not os.path.isfile(path):
48             continue
49
50         stat = os.stat(path)
51
52         # If the modified time is >7 days ago, skip setting the modified time.  This means
53         # the script stays pretty fast when doing a regular sync.
54         if now - stat.st_mtime > 7 * 24 * 60 * 60 and "--reset" not in sys.argv:
55             continue
56
57         revision = git("rev-list", "--max-count=1", "HEAD", path)
58
59         if not revision:
60             continue
61
62         timestamp, *_ = git("show", "--pretty=format:%ai", "--abbrev-commit", revision).splitlines()
63         modified_time = datetime.datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S %z").timestamp()
64
65         access_time = stat.st_atime
66
67         os.utime(path, times=(access_time, modified_time))
68
69
70 @attr.s
71 class Book:
72     slug = attr.ib()
73     title = attr.ib()
74     publication_year = attr.ib()
75
76     author = attr.ib(default="")
77     editor = attr.ib(default="")
78     narrator = attr.ib(default="")
79     illustrator = attr.ib(default="")
80
81     cover_image = attr.ib(default="")
82     cover_desc = attr.ib(default="")
83
84     isbn10 = attr.ib(default="")
85     isbn13 = attr.ib(default="")
86
87
88 @attr.s
89 class Review:
90     date_read = attr.ib()
91     text = attr.ib()
92     date_order = attr.ib(default=1)
93     format = attr.ib(default=None)
94     rating = attr.ib(default=None)
95     did_not_finish = attr.ib(default=False)
96
97     @property
98     def finished(self):
99         return not self.did_not_finish
100
101
102 @attr.s
103 class ReviewEntry:
104     path = attr.ib()
105     book = attr.ib()
106     review = attr.ib()
107
108     def out_path(self):
109         name = self.path.with_suffix("").name
110         return pathlib.Path(f"reviews/{name}")
111
112
113 def get_review_entry_from_path(path):
114     post = frontmatter.load(path)
115
116     kwargs = {}
117     for attr_name in Book.__attrs_attrs__:
118         try:
119             kwargs[attr_name.name] = post["book"][attr_name.name]
120         except KeyError:
121             pass
122
123     kwargs["slug"] = os.path.basename(os.path.splitext(path)[0])
124     book = Book(**kwargs)
125
126     review = Review(**post["review"], text=post.content)
127
128     return ReviewEntry(path=path, book=book, review=review)
129
130
131 @attr.s
132 class CurrentlyReading:
133     text = attr.ib()
134
135
136 @attr.s
137 class CurrentlyReadingEntry:
138     path = attr.ib()
139     book = attr.ib()
140     reading = attr.ib()
141
142
143 def get_reading_entry_from_path(path):
144     post = frontmatter.load(path)
145
146     slug = os.path.basename(os.path.splitext(path)[0])
147     book = Book(slug=slug, **post["book"])
148
149     reading = CurrentlyReading(text=post.content)
150
151     return CurrentlyReadingEntry(path=path, book=book, reading=reading)
152
153
154 def _parse_date(value):
155     if isinstance(value, datetime.date):
156         return value
157     else:
158         return datetime.datetime.strptime(value, "%Y-%m-%d").date()
159
160
161 @attr.s
162 class Plan:
163     text = attr.ib()
164     date_added = attr.ib(converter=_parse_date)
165
166
167 @attr.s
168 class PlanEntry:
169     path = attr.ib()
170     book = attr.ib()
171     plan = attr.ib()
172
173
174 def get_plan_entry_from_path(path):
175     post = frontmatter.load(path)
176
177     slug = os.path.basename(os.path.splitext(path)[0])
178     book = Book(slug=slug, **post["book"])
179
180     plan = Plan(date_added=post["plan"]["date_added"], text=post.content)
181
182     return PlanEntry(path=path, book=book, plan=plan)
183
184
185 def get_entries(dirpath, constructor):
186     for dirpath, _, filenames in os.walk(dirpath):
187         for f in filenames:
188             if not f.endswith(".md"):
189                 continue
190
191             path = pathlib.Path(dirpath) / f
192
193             try:
194                 yield constructor(path)
195             except Exception:
196                 print(f"Error parsing {path}", file=sys.stderr)
197                 raise
198
199
200 def render_markdown(text):
201     return markdown.markdown(text, extensions=[SmartyExtension()])
202
203
204 def render_date(date_value):
205     if isinstance(date_value, datetime.date):
206         return date_value.strftime("%-d %B %Y")
207
208     date_match = re.match(
209         r"^(?P<year>\d{4})-(?P<month>\d{2})(?:-(?P<day>\d{2}))?$", date_value
210     )
211     assert date_match is not None, date_value
212
213     date_obj = datetime.datetime(
214         year=int(date_match.group("year")),
215         month=int(date_match.group("month")),
216         day=int(date_match.group("day") or "1"),
217     )
218
219     if date_match.group("day"):
220         return render_date(date_obj)
221     else:
222         return date_obj.strftime("%B %Y")
223
224
225 def save_html(template, out_name="", **kwargs):
226     html = template.render(**kwargs)
227     out_path = pathlib.Path("_html") / out_name / "index.html"
228     out_path.parent.mkdir(exist_ok=True, parents=True)
229
230     soup = bs4.BeautifulSoup(html, "html.parser")
231
232     # Minify the CSS in all inline <style> tags.
233     for style_tag in soup.find_all("style"):
234         style_tag.string = cssmin.cssmin(style_tag.string)
235
236     # Remove any comments
237     for comment in soup(text=lambda text: isinstance(text, bs4.Comment)):
238         comment.extract()
239
240     out_path.write_text(str(soup))
241
242
243 def _create_new_thumbnail(src_path, dst_path):
244     dst_path.parent.mkdir(exist_ok=True, parents=True)
245
246     # Thumbnails are 240x240 max, then 2x for retina displays
247     subprocess.check_call([
248         "convert", src_path, "-resize", "480x480>", dst_path
249     ])
250
251
252 def thumbnail_1x(name):
253     pth = pathlib.Path(name)
254     return pth.stem + "_1x" + pth.suffix
255
256
257 def _create_new_square(src_path, square_path):
258     square_path.parent.mkdir(exist_ok=True, parents=True)
259
260     subprocess.check_call([
261         "convert",
262         src_path, "-resize", "240x240", "-gravity", "center", "-background", "white", "-extent", "240x240", square_path
263     ])
264
265
266 def create_thumbnails():
267     for image_name in os.listdir("src/covers"):
268         if image_name == ".DS_Store":
269             continue
270
271         src_path = pathlib.Path("src/covers") / image_name
272         dst_path = pathlib.Path("_html/thumbnails") / image_name
273
274         if not dst_path.exists():
275             _create_new_thumbnail(src_path, dst_path)
276         elif src_path.stat().st_mtime > dst_path.stat().st_mtime:
277             _create_new_thumbnail(src_path, dst_path)
278
279         square_path = pathlib.Path("_html/squares") / image_name
280
281         if not square_path.exists():
282             _create_new_square(src_path, square_path)
283         elif src_path.stat().st_mtime > square_path.stat().st_mtime:
284             _create_new_square(src_path, square_path)
285
286         store_tint_color(dst_path)
287
288
289 CSS_HASH = hashlib.md5(open('static/style.css', 'rb').read()).hexdigest()
290
291
292 def css_hash(_):
293     return f"md5:{CSS_HASH}"
294
295
296 def count_finished_books(review_entries: typing.List[ReviewEntry]):
297     return len([r for r in review_entries if r.review.finished])
298
299
300 def main():
301     set_git_timestamps()
302
303     env = Environment(
304         loader=FileSystemLoader("templates"),
305         autoescape=select_autoescape(["html", "xml"]),
306     )
307
308     env.filters["render_markdown"] = render_markdown
309     env.filters["render_date"] = render_date
310     env.filters["smartypants"] = smartypants.smartypants
311     env.filters["thumbnail_1x"] = thumbnail_1x
312     env.filters["css_hash"] = css_hash
313     env.filters["create_shelf_data_uri"] = create_shelf_data_uri
314     env.filters["cap_rgb"] = lambda v: min([v, 255])
315     env.filters["count_finished_books"] = count_finished_books
316
317     create_thumbnails()
318
319     tint_colors = get_tint_colors()
320
321     rsync("src/covers/", "_html/covers/")
322     rsync("static/", "_html/static/")
323
324     # Render the "all reviews page"
325
326     all_reviews = list(
327         get_entries(dirpath="src/reviews", constructor=get_review_entry_from_path)
328     )
329     all_reviews = sorted(
330         all_reviews, key=lambda rev: f"{rev.review.date_read}/{rev.review.date_order}", reverse=True
331     )
332
333     for review_entry in all_reviews:
334         save_html(
335             template=env.get_template("review.html"),
336             out_name=review_entry.out_path(),
337             review_entry=review_entry,
338             title=f"My review of {review_entry.book.title}",
339             tint_colors=tint_colors
340         )
341
342     save_html(
343         template=env.get_template("list_reviews.html"),
344         out_name="reviews",
345         all_reviews=[
346             (year, list(reviews))
347             for (year, reviews) in itertools.groupby(
348                 all_reviews, key=lambda rev: str(rev.review.date_read)[:4]
349             )
350         ],
351         title="books i’ve read",
352         this_year=str(datetime.datetime.now().year),
353         tint_colors=tint_colors
354     )
355
356     # Render the "currently reading" page
357
358     all_reading = list(
359         get_entries(
360             dirpath="src/currently_reading", constructor=get_reading_entry_from_path
361         )
362     )
363
364     save_html(
365         template=env.get_template("list_reading.html"),
366         out_name="reading",
367         all_reading=all_reading,
368         title="books i’m currently reading",
369         tint_colors=tint_colors
370     )
371
372     # Render the "want to read" page
373
374     all_plans = list(
375         get_entries(dirpath="src/plans", constructor=get_plan_entry_from_path)
376     )
377
378     all_plans = sorted(all_plans, key=lambda plan: plan.plan.date_added, reverse=True)
379
380     save_html(
381         template=env.get_template("list_plans.html"),
382         out_name="to-read",
383         all_plans=all_plans,
384         title="books i want to read",
385         tint_colors=tint_colors,
386     )
387
388     # Render the "never going to read this page"
389
390     all_retired = list(
391         get_entries(dirpath="src/will_never_read", constructor=get_plan_entry_from_path)
392     )
393
394     all_retired = sorted(
395         all_retired, key=lambda plan: plan.plan.date_added, reverse=True
396     )
397
398     save_html(
399         template=env.get_template("list_will_never_read.html"),
400         out_name="will-never-read",
401         all_retired=all_retired,
402         title="books i&rsquo;m never going to read",
403         tint_colors=tint_colors
404     )
405
406     # Render the front page
407
408     save_html(
409         template=env.get_template("index.html"),
410         text=open("src/index.md").read(),
411         reviews=all_reviews[:5],
412         tint_colors=tint_colors
413     )
414
415     print("✨ Rendered HTML files to _html ✨")
416
417
418 if __name__ == "__main__":
419     main()