Use the slug to identify books, not the color
[books.alexwlchan.net] / scripts / render_html.py
1 #!/usr/bin/env python3
2
3 import datetime
4 import hashlib
5 import itertools
6 import os
7 import pathlib
8 import re
9 import subprocess
10 import sys
11
12 import attr
13 import bs4
14 import cssmin
15 import frontmatter
16 from jinja2 import Environment, FileSystemLoader, select_autoescape
17 import markdown
18 from markdown.extensions.smarty import SmartyExtension
19 import smartypants
20
21 from generate_bookshelf import create_shelf_data_uri
22 from tint_colors import get_tint_colors, store_tint_color
23
24
25 def rsync(dir1, dir2):
26     subprocess.check_call(["rsync", "--recursive", "--delete", dir1, dir2])
27
28
29 def git(*args):
30     return subprocess.check_output(["git"] + list(args)).strip().decode("utf8")
31
32
33 def set_git_timestamps():
34     """
35     For everything in the covers/ directory, set the last modified timestamp to
36     the last time it was modified in Git.  This should make tint colour computations
37     stable across machines.
38     """
39     root = git("rev-parse", "--show-toplevel")
40
41     now = datetime.datetime.now().timestamp()
42
43     for f in os.listdir("src/covers"):
44         path = os.path.join("src/covers", f)
45
46         if not os.path.isfile(path):
47             continue
48
49         stat = os.stat(path)
50
51         # If the modified time is >7 days ago, skip setting the modified time.  This means
52         # the script stays pretty fast when doing a regular sync.
53         if now - stat.st_mtime > 7 * 24 * 60 * 60 and "--reset" not in sys.argv:
54             continue
55
56         revision = git("rev-list", "--max-count=1", "HEAD", path)
57
58         if not revision:
59             continue
60
61         timestamp, *_ = git("show", "--pretty=format:%ai", "--abbrev-commit", revision).splitlines()
62         modified_time = datetime.datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S %z").timestamp()
63
64         access_time = stat.st_atime
65
66         os.utime(path, times=(access_time, modified_time))
67
68
69 @attr.s
70 class Book:
71     slug = attr.ib()
72     title = attr.ib()
73     author = attr.ib()
74     publication_year = attr.ib()
75     cover_image = attr.ib(default="")
76     cover_desc = attr.ib(default="")
77
78     isbn10 = attr.ib(default="")
79     isbn13 = attr.ib(default="")
80
81
82 @attr.s
83 class Review:
84     date_read = attr.ib()
85     text = attr.ib()
86     date_order = attr.ib(default=1)
87     format = attr.ib(default=None)
88     rating = attr.ib(default=None)
89     did_not_finish = attr.ib(default=False)
90
91
92 @attr.s
93 class ReviewEntry:
94     path = attr.ib()
95     book = attr.ib()
96     review = attr.ib()
97
98     def out_path(self):
99         name = self.path.with_suffix("").name
100         return pathlib.Path(f"reviews/{name}")
101
102
103 def get_review_entry_from_path(path):
104     post = frontmatter.load(path)
105
106     kwargs = {}
107     for attr_name in Book.__attrs_attrs__:
108         try:
109             kwargs[attr_name.name] = post["book"][attr_name.name]
110         except KeyError:
111             pass
112
113     kwargs["slug"] = os.path.basename(os.path.splitext(path)[0])
114     book = Book(**kwargs)
115
116     review = Review(**post["review"], text=post.content)
117
118     return ReviewEntry(path=path, book=book, review=review)
119
120
121 @attr.s
122 class CurrentlyReading:
123     text = attr.ib()
124
125
126 @attr.s
127 class CurrentlyReadingEntry:
128     path = attr.ib()
129     book = attr.ib()
130     reading = attr.ib()
131
132
133 def get_reading_entry_from_path(path):
134     post = frontmatter.load(path)
135
136     slug = os.path.basename(os.path.splitext(path)[0])
137     book = Book(slug=slug, **post["book"])
138
139     reading = CurrentlyReading(text=post.content)
140
141     return CurrentlyReadingEntry(path=path, book=book, reading=reading)
142
143
144 def _parse_date(value):
145     if isinstance(value, datetime.date):
146         return value
147     else:
148         return datetime.datetime.strptime(value, "%Y-%m-%d").date()
149
150
151 @attr.s
152 class Plan:
153     text = attr.ib()
154     date_added = attr.ib(converter=_parse_date)
155
156
157 @attr.s
158 class PlanEntry:
159     path = attr.ib()
160     book = attr.ib()
161     plan = attr.ib()
162
163
164 def get_plan_entry_from_path(path):
165     post = frontmatter.load(path)
166
167     slug = os.path.basename(os.path.splitext(path)[0])
168     book = Book(slug=slug, **post["book"])
169
170     plan = Plan(date_added=post["plan"]["date_added"], text=post.content)
171
172     return PlanEntry(path=path, book=book, plan=plan)
173
174
175 def get_entries(dirpath, constructor):
176     for dirpath, _, filenames in os.walk(dirpath):
177         for f in filenames:
178             if not f.endswith(".md"):
179                 continue
180
181             path = pathlib.Path(dirpath) / f
182
183             try:
184                 yield constructor(path)
185             except Exception:
186                 print(f"Error parsing {path}", file=sys.stderr)
187                 raise
188
189
190 def render_markdown(text):
191     return markdown.markdown(text, extensions=[SmartyExtension()])
192
193
194 def render_date(date_value):
195     if isinstance(date_value, datetime.date):
196         return date_value.strftime("%-d %B %Y")
197
198     date_match = re.match(
199         r"^(?P<year>\d{4})-(?P<month>\d{2})(?:-(?P<day>\d{2}))?$", date_value
200     )
201     assert date_match is not None, date_value
202
203     date_obj = datetime.datetime(
204         year=int(date_match.group("year")),
205         month=int(date_match.group("month")),
206         day=int(date_match.group("day") or "1"),
207     )
208
209     if date_match.group("day"):
210         return render_date(date_obj)
211     else:
212         return date_obj.strftime("%B %Y")
213
214
215 def save_html(template, out_name="", **kwargs):
216     html = template.render(**kwargs)
217     out_path = pathlib.Path("_html") / out_name / "index.html"
218     out_path.parent.mkdir(exist_ok=True, parents=True)
219
220     soup = bs4.BeautifulSoup(html, "html.parser")
221
222     # Minify the CSS in all inline <style> tags.
223     for style_tag in soup.find_all("style"):
224         style_tag.string = cssmin.cssmin(style_tag.string)
225
226     # Remove any comments
227     for comment in soup(text=lambda text: isinstance(text, bs4.Comment)):
228         comment.extract()
229
230     out_path.write_text(str(soup))
231
232
233 def _create_new_thumbnail(src_path, dst_path):
234     dst_path.parent.mkdir(exist_ok=True, parents=True)
235
236     # Thumbnails are 240x240 max, then 2x for retina displays
237     subprocess.check_call([
238         "convert", src_path, "-resize", "480x480>", dst_path
239     ])
240
241
242 def thumbnail_1x(name):
243     pth = pathlib.Path(name)
244     return pth.stem + "_1x" + pth.suffix
245
246
247 def _create_new_square(src_path, square_path):
248     square_path.parent.mkdir(exist_ok=True, parents=True)
249
250     subprocess.check_call([
251         "convert",
252         src_path, "-resize", "240x240", "-gravity", "center", "-background", "white", "-extent", "240x240", square_path
253     ])
254
255
256 def create_thumbnails():
257     for image_name in os.listdir("src/covers"):
258         if image_name == ".DS_Store":
259             continue
260
261         src_path = pathlib.Path("src/covers") / image_name
262         dst_path = pathlib.Path("_html/thumbnails") / image_name
263
264         if not dst_path.exists():
265             _create_new_thumbnail(src_path, dst_path)
266         elif src_path.stat().st_mtime > dst_path.stat().st_mtime:
267             _create_new_thumbnail(src_path, dst_path)
268
269         square_path = pathlib.Path("_html/squares") / image_name
270
271         if not square_path.exists():
272             _create_new_square(src_path, square_path)
273         elif src_path.stat().st_mtime > square_path.stat().st_mtime:
274             _create_new_square(src_path, square_path)
275
276         store_tint_color(dst_path)
277
278
279 CSS_HASH = hashlib.md5(open('static/style.css', 'rb').read()).hexdigest()
280
281
282 def css_hash(_):
283     return f"md5:{CSS_HASH}"
284
285
286 def main():
287     set_git_timestamps()
288
289     env = Environment(
290         loader=FileSystemLoader("templates"),
291         autoescape=select_autoescape(["html", "xml"]),
292     )
293
294     env.filters["render_markdown"] = render_markdown
295     env.filters["render_date"] = render_date
296     env.filters["smartypants"] = smartypants.smartypants
297     env.filters["thumbnail_1x"] = thumbnail_1x
298     env.filters["css_hash"] = css_hash
299     env.filters["create_shelf_data_uri"] = create_shelf_data_uri
300
301     create_thumbnails()
302
303     tint_colors = get_tint_colors()
304
305     rsync("src/covers/", "_html/covers/")
306     rsync("static/", "_html/static/")
307
308     # Render the "all reviews page"
309
310     all_reviews = list(
311         get_entries(dirpath="src/reviews", constructor=get_review_entry_from_path)
312     )
313     all_reviews = sorted(
314         all_reviews, key=lambda rev: f"{rev.review.date_read}/{rev.review.date_order}", reverse=True
315     )
316
317     for review_entry in all_reviews:
318         save_html(
319             template=env.get_template("review.html"),
320             out_name=review_entry.out_path(),
321             review_entry=review_entry,
322             title=f"My review of {review_entry.book.title}",
323             tint_colors=tint_colors
324         )
325
326     save_html(
327         template=env.get_template("list_reviews.html"),
328         out_name="reviews",
329         all_reviews=[
330             (year, list(reviews))
331             for (year, reviews) in itertools.groupby(
332                 all_reviews, key=lambda rev: str(rev.review.date_read)[:4]
333             )
334         ],
335         title="books i’ve read",
336         this_year=str(datetime.datetime.now().year),
337         tint_colors=tint_colors
338     )
339
340     # Render the "currently reading" page
341
342     all_reading = list(
343         get_entries(
344             dirpath="src/currently_reading", constructor=get_reading_entry_from_path
345         )
346     )
347
348     save_html(
349         template=env.get_template("list_reading.html"),
350         out_name="reading",
351         all_reading=all_reading,
352         title="books i’m currently reading",
353         tint_colors=tint_colors
354     )
355
356     # Render the "want to read" page
357
358     all_plans = list(
359         get_entries(dirpath="src/plans", constructor=get_plan_entry_from_path)
360     )
361
362     all_plans = sorted(all_plans, key=lambda plan: plan.plan.date_added, reverse=True)
363
364     save_html(
365         template=env.get_template("list_plans.html"),
366         out_name="to-read",
367         all_plans=all_plans,
368         title="books i want to read",
369         tint_colors=tint_colors,
370     )
371
372     # Render the "never going to read this page"
373
374     all_retired = list(
375         get_entries(dirpath="src/will_never_read", constructor=get_plan_entry_from_path)
376     )
377
378     all_retired = sorted(
379         all_retired, key=lambda plan: plan.plan.date_added, reverse=True
380     )
381
382     save_html(
383         template=env.get_template("list_will_never_read.html"),
384         out_name="will-never-read",
385         all_retired=all_retired,
386         title="books i&rsquo;m never going to read",
387         tint_colors=tint_colors
388     )
389
390     # Render the front page
391
392     save_html(
393         template=env.get_template("index.html"),
394         text=open("src/index.md").read(),
395         reviews=all_reviews[:5],
396         tint_colors=tint_colors
397     )
398
399     print("✨ Rendered HTML files to _html ✨")
400
401
402 if __name__ == "__main__":
403     main()