03e89bbcd07ad120a575652bfc7706f70ce7095b
[books.alexwlchan.net] / scripts / render_html.py
1 #!/usr/bin/env python3
2
3 import datetime
4 import hashlib
5 import itertools
6 import os
7 import pathlib
8 import re
9 import subprocess
10 import sys
11
12 import attr
13 import bs4
14 import cssmin
15 import frontmatter
16 from jinja2 import Environment, FileSystemLoader, select_autoescape
17 import markdown
18 from markdown.extensions.smarty import SmartyExtension
19 import smartypants
20
21 from generate_bookshelf import create_shelf_data_uri
22 from tint_colors import get_tint_colors, store_tint_color
23
24
25 def rsync(dir1, dir2):
26     subprocess.check_call(["rsync", "--recursive", "--delete", dir1, dir2])
27
28
29 def git(*args):
30     return subprocess.check_output(["git"] + list(args)).strip().decode("utf8")
31
32
33 def set_git_timestamps():
34     """
35     For everything in the covers/ directory, set the last modified timestamp to
36     the last time it was modified in Git.  This should make tint colour computations
37     stable across machines.
38     """
39     root = git("rev-parse", "--show-toplevel")
40
41     now = datetime.datetime.now().timestamp()
42
43     for f in os.listdir("src/covers"):
44         path = os.path.join("src/covers", f)
45
46         if not os.path.isfile(path):
47             continue
48
49         stat = os.stat(path)
50
51         # If the modified time is >7 days ago, skip setting the modified time.  This means
52         # the script stays pretty fast when doing a regular sync.
53         if now - stat.st_mtime > 7 * 24 * 60 * 60 and "--reset" not in sys.argv:
54             continue
55
56         revision = git("rev-list", "--max-count=1", "HEAD", path)
57
58         if not revision:
59             continue
60
61         timestamp, *_ = git("show", "--pretty=format:%ai", "--abbrev-commit", revision).splitlines()
62         modified_time = datetime.datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S %z").timestamp()
63
64         access_time = stat.st_atime
65
66         os.utime(path, times=(access_time, modified_time))
67
68
69 @attr.s
70 class Book:
71     title = attr.ib()
72     author = attr.ib()
73     publication_year = attr.ib()
74     cover_image = attr.ib(default="")
75     cover_desc = attr.ib(default="")
76
77     isbn10 = attr.ib(default="")
78     isbn13 = attr.ib(default="")
79
80
81 @attr.s
82 class Review:
83     date_read = attr.ib()
84     text = attr.ib()
85     format = attr.ib(default=None)
86     rating = attr.ib(default=None)
87     did_not_finish = attr.ib(default=False)
88
89
90 @attr.s
91 class ReviewEntry:
92     path = attr.ib()
93     book = attr.ib()
94     review = attr.ib()
95
96     def out_path(self):
97         name = self.path.with_suffix("").name
98         return pathlib.Path(f"reviews/{name}")
99
100
101 def get_review_entry_from_path(path):
102     post = frontmatter.load(path)
103
104     kwargs = {}
105     for attr_name in Book.__attrs_attrs__:
106         try:
107             kwargs[attr_name.name] = post["book"][attr_name.name]
108         except KeyError:
109             pass
110
111     book = Book(**kwargs)
112
113     review = Review(**post["review"], text=post.content)
114
115     return ReviewEntry(path=path, book=book, review=review)
116
117
118 @attr.s
119 class CurrentlyReading:
120     text = attr.ib()
121
122
123 @attr.s
124 class CurrentlyReadingEntry:
125     path = attr.ib()
126     book = attr.ib()
127     reading = attr.ib()
128
129
130 def get_reading_entry_from_path(path):
131     post = frontmatter.load(path)
132
133     book = Book(**post["book"])
134     reading = CurrentlyReading(text=post.content)
135
136     return CurrentlyReadingEntry(path=path, book=book, reading=reading)
137
138
139 def _parse_date(value):
140     if isinstance(value, datetime.date):
141         return value
142     else:
143         return datetime.datetime.strptime(value, "%Y-%m-%d").date()
144
145
146 @attr.s
147 class Plan:
148     text = attr.ib()
149     date_added = attr.ib(converter=_parse_date)
150
151
152 @attr.s
153 class PlanEntry:
154     path = attr.ib()
155     book = attr.ib()
156     plan = attr.ib()
157
158
159 def get_plan_entry_from_path(path):
160     post = frontmatter.load(path)
161
162     book = Book(**post["book"])
163     plan = Plan(date_added=post["plan"]["date_added"], text=post.content)
164
165     return PlanEntry(path=path, book=book, plan=plan)
166
167
168 def get_entries(dirpath, constructor):
169     for dirpath, _, filenames in os.walk(dirpath):
170         for f in filenames:
171             if not f.endswith(".md"):
172                 continue
173
174             path = pathlib.Path(dirpath) / f
175
176             try:
177                 yield constructor(path)
178             except Exception:
179                 print(f"Error parsing {path}", file=sys.stderr)
180                 raise
181
182
183 def render_markdown(text):
184     return markdown.markdown(text, extensions=[SmartyExtension()])
185
186
187 def render_date(date_value):
188     if isinstance(date_value, datetime.date):
189         return date_value.strftime("%-d %B %Y")
190
191     date_match = re.match(
192         r"^(?P<year>\d{4})-(?P<month>\d{2})(?:-(?P<day>\d{2}))?$", date_value
193     )
194     assert date_match is not None, date_value
195
196     date_obj = datetime.datetime(
197         year=int(date_match.group("year")),
198         month=int(date_match.group("month")),
199         day=int(date_match.group("day") or "1"),
200     )
201
202     if date_match.group("day"):
203         return render_date(date_obj)
204     else:
205         return date_obj.strftime("%B %Y")
206
207
208 def save_html(template, out_name="", **kwargs):
209     html = template.render(**kwargs)
210     out_path = pathlib.Path("_html") / out_name / "index.html"
211     out_path.parent.mkdir(exist_ok=True, parents=True)
212
213     soup = bs4.BeautifulSoup(html, "html.parser")
214
215     # Minify the CSS in all inline <style> tags.
216     for style_tag in soup.find_all("style"):
217         style_tag.string = cssmin.cssmin(style_tag.string)
218
219     # Remove any comments
220     for comment in soup(text=lambda text: isinstance(text, bs4.Comment)):
221         comment.extract()
222
223     out_path.write_text(str(soup))
224
225
226 def _create_new_thumbnail(src_path, dst_path):
227     dst_path.parent.mkdir(exist_ok=True, parents=True)
228
229     # Thumbnails are 240x240 max, then 2x for retina displays
230     subprocess.check_call([
231         "convert", src_path, "-resize", "480x480>", dst_path
232     ])
233
234
235 def thumbnail_1x(name):
236     pth = pathlib.Path(name)
237     return pth.stem + "_1x" + pth.suffix
238
239
240 def _create_new_square(src_path, square_path):
241     square_path.parent.mkdir(exist_ok=True, parents=True)
242
243     subprocess.check_call([
244         "convert",
245         src_path, "-resize", "240x240", "-gravity", "center", "-background", "white", "-extent", "240x240", square_path
246     ])
247
248
249 def create_thumbnails():
250     for image_name in os.listdir("src/covers"):
251         if image_name == ".DS_Store":
252             continue
253
254         src_path = pathlib.Path("src/covers") / image_name
255         dst_path = pathlib.Path("_html/thumbnails") / image_name
256
257         if not dst_path.exists():
258             _create_new_thumbnail(src_path, dst_path)
259         elif src_path.stat().st_mtime > dst_path.stat().st_mtime:
260             _create_new_thumbnail(src_path, dst_path)
261
262         square_path = pathlib.Path("_html/squares") / image_name
263
264         if not square_path.exists():
265             _create_new_square(src_path, square_path)
266         elif src_path.stat().st_mtime > square_path.stat().st_mtime:
267             _create_new_square(src_path, square_path)
268
269         store_tint_color(dst_path)
270
271
272 CSS_HASH = hashlib.md5(open('static/style.css', 'rb').read()).hexdigest()
273
274
275 def css_hash(_):
276     return f"md5:{CSS_HASH}"
277
278
279 def main():
280     set_git_timestamps()
281
282     env = Environment(
283         loader=FileSystemLoader("templates"),
284         autoescape=select_autoescape(["html", "xml"]),
285     )
286
287     env.filters["render_markdown"] = render_markdown
288     env.filters["render_date"] = render_date
289     env.filters["smartypants"] = smartypants.smartypants
290     env.filters["thumbnail_1x"] = thumbnail_1x
291     env.filters["css_hash"] = css_hash
292     env.filters["create_shelf_data_uri"] = create_shelf_data_uri
293
294     create_thumbnails()
295
296     tint_colors = get_tint_colors()
297
298     rsync("src/covers/", "_html/covers/")
299     rsync("static/", "_html/static/")
300
301     # Render the "all reviews page"
302
303     all_reviews = list(
304         get_entries(dirpath="src/reviews", constructor=get_review_entry_from_path)
305     )
306     all_reviews = sorted(
307         all_reviews, key=lambda rev: str(rev.review.date_read), reverse=True
308     )
309
310     for review_entry in all_reviews:
311         save_html(
312             template=env.get_template("review.html"),
313             out_name=review_entry.out_path(),
314             review_entry=review_entry,
315             title=f"My review of {review_entry.book.title}",
316             tint_colors=tint_colors
317         )
318
319     save_html(
320         template=env.get_template("list_reviews.html"),
321         out_name="reviews",
322         all_reviews=[
323             (year, list(reviews))
324             for (year, reviews) in itertools.groupby(
325                 all_reviews, key=lambda rev: str(rev.review.date_read)[:4]
326             )
327         ],
328         title="books i’ve read",
329         this_year=str(datetime.datetime.now().year),
330         tint_colors=tint_colors
331     )
332
333     # Render the "currently reading" page
334
335     all_reading = list(
336         get_entries(
337             dirpath="src/currently_reading", constructor=get_reading_entry_from_path
338         )
339     )
340
341     save_html(
342         template=env.get_template("list_reading.html"),
343         out_name="reading",
344         all_reading=all_reading,
345         title="books i’m currently reading",
346         tint_colors=tint_colors
347     )
348
349     # Render the "want to read" page
350
351     all_plans = list(
352         get_entries(dirpath="src/plans", constructor=get_plan_entry_from_path)
353     )
354
355     all_plans = sorted(all_plans, key=lambda plan: plan.plan.date_added, reverse=True)
356
357     save_html(
358         template=env.get_template("list_plans.html"),
359         out_name="to-read",
360         all_plans=all_plans,
361         title="books i want to read",
362         tint_colors=tint_colors,
363     )
364
365     # Render the "never going to read this page"
366
367     all_retired = list(
368         get_entries(dirpath="src/will_never_read", constructor=get_plan_entry_from_path)
369     )
370
371     all_retired = sorted(
372         all_retired, key=lambda plan: plan.plan.date_added, reverse=True
373     )
374
375     save_html(
376         template=env.get_template("list_will_never_read.html"),
377         out_name="will-never-read",
378         all_retired=all_retired,
379         title="books i&rsquo;m never going to read",
380         tint_colors=tint_colors
381     )
382
383     # Render the front page
384
385     save_html(
386         template=env.get_template("index.html"),
387         text=open("src/index.md").read(),
388         reviews=all_reviews[:5],
389         tint_colors=tint_colors
390     )
391
392     print("✨ Rendered HTML files to _html ✨")
393
394
395 if __name__ == "__main__":
396     main()