gawa/gawa/blog/models.py
2023-10-02 03:06:45 +02:00

284 lines
9.4 KiB
Python

import toml
import ast
import re
import os
import pathlib
import markdown
from django.db import models
from django.utils.translation import gettext as _
from start.models import Keyword, Searchable
import logging
logger = logging.getLogger(__name__)
EXTENSIONS = [
"extra",
"admonition",
"codehilite",
"meta",
"toc"
]
EXTENSION_CONFIGS = {
'codehilite': {
'linenums': True,
'pygments_style': 'monokai'
},
}
MD = markdown.Markdown(extensions=EXTENSIONS,
extension_configs=EXTENSION_CONFIGS)
class Category(models.Model):
"""
A category of blog posts
Name not translated because it would make i18n in urls and Searchables specifically a pain.
Maybe some day it would be cool if these were Searchable
"""
name = models.CharField(max_length=50)
slug = models.SlugField(unique=True)
class Meta:
verbose_name = _("Category")
verbose_name_plural = _("Categories")
def __str__(self):
return f"{{<{self.__class__.__name__}>\"{self.name}\"}}"
@staticmethod
def get_or_create_uncategorized():
try:
return Category.objects.get(slug="uncategorized")
except Category.DoesNotExist:
return Category.objects.create(
slug="uncategorized", name="uncategorized")
class BlogPost(Searchable):
"""
Should contain a blogpost
"""
body_en = models.TextField(blank=True, default="")
body_de = models.TextField(blank=True, default="")
category = models.ForeignKey(
Category, on_delete=models.SET_DEFAULT, null=False,
default=Category.get_or_create_uncategorized)
thumbnail = models.ImageField(
blank=True,
upload_to="img/thumbnails",
default="img/thumbnails/default.jpg")
featured = models.BooleanField(default=False)
langs = models.CharField(
default="['en': False, 'de': False]", max_length=64)
slug = models.SlugField()
# TODO autodiscover new blog posts based on markdown files?
DATA_DIR = "/app/blog/data/articles"
DEFAULT_LANGS = {'en': False, 'de': False}
META_TOP_KEYS = [
"date",
"keywords",
"category",
"featured",
"public",
"lang"]
META_LANG_KEYS = ["title", "subtitle", "desc"]
def regenerate(self):
"""
regenerate a object
Implements the abstract method of Searchable
"""
logger.info(f"regenerating {self.__class__.__name__} object: {self}")
# url stuff
self.suburl = f"/blog/{self.category.name}/{self.slug}"
# load from markdown
# self.sync_file()
# redundand vvvv
self.save()
def sync_file(self):
"""
generate an article fromm it's original markdown file
"""
logger = logging.getLogger(__name__)
logger.info(f"syncing article to markdown for: {self}")
# read metadata
try:
fmeta = open(f"{self.DATA_DIR}/{self.slug}.toml", "r")
except Exception as e:
logger.error(f"could not find meta file for '{self}'")
return
data = toml.load(fmeta)
for key in self.META_TOP_KEYS:
if key not in data:
logger.error(f"Key '{key}' missing in meta file for '{self}'")
raise ValueError(
f"Key '{key}' missing in meta file for '{self}'")
for lang in data['lang']:
for key in self.META_LANG_KEYS:
if key not in data['lang'][lang]:
logger.error(
f"Key '{key}' ('{lang}') missing in meta file for '{self}'")
raise ValueError(
f"Key '{key}' ('{lang}') missing in meta file for '{self}'")
with open(f"{self.DATA_DIR}/{lang}-{self.slug}.md") as f_en:
MD.reset()
body: str = f_en.read()
match lang:
case "en":
self.title_en = data['lang'][lang]["title"]
self.subtitle_en = data['lang'][lang]["subtitle"]
self.desc_en = data['lang'][lang]["desc"]
self.body_en = MD.convert(body)
case "de":
self.title_en = data['lang'][lang]["title"]
self.subtitle_en = data['lang'][lang]["subtitle"]
self.desc_en = data['lang'][lang]["desc"]
self.body_de = MD.convert(body)
case _:
logger.error(
f"unknown language '{lang}' in meta file for '{self}'")
self.date = data["date"]
self.featured = data["featured"]
self.public = data["public"]
# NOTE: thumbnail is optional
if "thumbnail" in data:
self.thumbnail = data["thumbnail"]
# NOTE: thumbnail is optional
if "category" in data:
try:
category: Category = Category.objects.get(
slug=data['category'])
except Category.DoesNotExist:
category = Category.objects.create(
name=data['category'], slug=data['category'])
self.category = category
self.save()
logger.debug("keywords next")
for keyword in data["keywords"]:
try:
self.keywords.add(Keyword.objects.get(slug=keyword))
except Keyword.DoesNotExist:
self.keywords.create(
slug=keyword, text_en=keyword, text_de=keyword)
logger.debug(f"keywords of '{self}': {self.keywords}")
self.save()
def get_langs(self) -> dict[str, bool]:
"""
get available languages
"""
# TODO:
# make sure this is safe
# SECURITY:
# If someone could inject the langs field, arbitrary python code might
# run, Potentially ending in a critical RCE vulnerability
try:
langs = ast.literal_eval(str(self.langs))
return langs
except ValueError as e:
logger.error(
f"could not safely evaluate 'langs' for '{self}': {e}")
raise e
def set_langs(self, langs: dict[str, bool]):
"""
set available languages
"""
self.langs = langs.__repr__()
@ classmethod
def sync_all(cls):
"""
Sync all Blog Posts with the filesystem.
Caution: Will delete all Blog Posts
"""
# logger.name = logger.name + ".sync_all"
# delete all existing objects
BlogPost.objects.all().delete()
# check if the DATA_DIR is OK
data_dir = pathlib.Path(cls.DATA_DIR)
if not data_dir.exists():
logger.error(f"'{cls.DATA_DIR} does not exist'")
if not data_dir.is_dir():
logger.error(f"'{cls.DATA_DIR} is not a directory'")
files = [f for f in os.listdir(data_dir) if (
data_dir.joinpath(f)).is_file()]
logger.debug(f"discovered files: {files}")
# finding lang and title
regex = r"^(en|de)-(.*)\.md"
# filepath, language codes, slug
files = [[f, cls.DEFAULT_LANGS, ""] for f in files]
for file in files:
# parse file name
try:
matches = re.match(regex, file[0])
if matches is None:
logger.warning(
f"Data file '{file[0]}' does not fit to the filename\
regex")
files.remove(file)
else:
current_lang = matches.group(1)
file[1][current_lang] = True
file[2] = matches.group(2)
except Exception as e:
logger.error(e)
files.remove(file)
# PERF:
# Could possibly be done in one loop
# collapse diffrent versions
for file in files:
try:
if [_f[2] for _f in files].count(file[2]) >= 2:
logger.debug(f"multiple versions of '{file[2]}'")
versions = [_f for _f in files if _f[2] == file[2]]
lang: dict[str, bool] = file[1]
for version in versions:
for key in version[1]:
lang[key] |= version[1][key]
else:
# only a single version of this file
continue
except Exception as e:
logger.error(
f"Could not combine BlogPosts for '{file[0]}': {e}")
try:
# deduplicate
_files = []
for f in [[_f[1], _f[2]]
for _f in files]: # dont care about fname
if f not in _files:
_files.append(f)
files = _files
logger.debug(f"to save: {files}")
except Exception as e:
logger.error(f"Could not dedup BlogPosts: {e}")
for file in files:
try:
obj = BlogPost(langs=file[0], slug=file[1])
obj.sync_file()
obj.regenerate()
obj.save()
except Exception as e:
logger.error(f"Could not create BlogPost for '{file[1]}': {e}")
class Meta:
verbose_name = _("blog post")
verbose_name_plural = _("blog posts")