from django.db import models from django.utils.translation import gettext as _ from start.models import Searchable import logging logger = logging.getLogger(__name__) import markdown EXTENSIONS = [ "extra", "admonition", "codehilite", "meta", "toc" ] EXTENSION_CONFIGS = { 'codehilite': { 'linenums': True, 'pygments_style': 'monokai' }, } MD = markdown.Markdown(extensions=EXTENSIONS, extension_configs=EXTENSION_CONFIGS) import pathlib import os import re class Category(models.Model): """ A category of blog posts Name not translated because it would make i18n in urls and Searchables specifically a pain. Maybe some day it would be cool if these were Searchable """ name= models.CharField(max_length=50) slug = models.SlugField() class Meta: verbose_name = _("Category") verbose_name_plural = _("Categories") def __str__(self): return f"{{<{self.__class__.__name__}>\"{self.name}\"}}" class BlogPost(Searchable): """ Should contain a blogpost """ body_en = models.TextField(blank=True, default="") body_de = models.TextField(blank=True, default="") category = models.ForeignKey(Category, on_delete=models.SET_NULL, null=True) thumbnail = models.ImageField(blank=True, upload_to="img/thumbnails") featured = models.BooleanField(default=False) langs = models.CharField(default="['en': False, 'de': False]", max_length=64) slug = models.SlugField() # TODO autodiscover new blog posts based on markdown files? DATA_DIR = "/app/blog/data/articles" DEFAULT_LANGS = {'en': False, 'de': False} def regenerate(self): """ regenerate a object Implements the abstract method of Searchable """ logger.info(f"regenerating {self.__class__.__name__} object: {self}") # url stuff self.suburl = f"/blog/{self.category.name}/{self.slug}" # load from markdown self.sync_file() self.save() def sync_file(self): """ generate an article fromm it's original markdown file """ logger.info(f"regenerating article from markdown for: {self}") try: MD.reset() with open(f"{self.DATA_DIR}/en-{self.slug}.md") as f_en: body_en: str = f_en.read() html_en: str = MD.convert(body_en) try: meta_en = MD.Meta self.title_en = meta_en["title"][0] self.subtitle_en = meta_en["subtitle"][0] self.desc_en = meta_en["desc"][0] # TODO: parse date from markdown self.featured = meta_en["featured"][0] == "True" self.public = meta_en["public"][0] == "True" # self.thumbnail = meta_en["thumbnail"] # TODO: parse keywords from markdown # TODO: parse category from markdown # if keyword or category do not exist, create them # I suppose except Exception as e: logger.warning(f"could not generate metadata {self.slug} from markdown: {e}") self.body_en = "" self.body_en = html_en except FileNotFoundError as e: # TODO: mark as untranslated pass except Exception as e: logger.warning(f"could not generate article {self.slug} from markdown: {e}") try: MD.reset() with open(f"{self.DATA_DIR}/de-{self.slug}.md") as f_de: body_de: str = f_de.read() html_de: str = MD.convert(body_de) try: meta_de = MD.Meta self.title_de = meta_de["title"][0] self.subtitle_de = meta_de["subtitle"][0] self.desc_de = meta_de["desc"][0] # TODO: parse date from markdown self.featured = meta_de["featured"][0] == "True" self.public = meta_de["public"][0] == "True" # self.thumbnail = meta_de["thumbnail"] # TODO: parse keywords from markdown # TODO: parse category from markdown # if keyword or category do not exist, create them # I suppose except Exception as e: logger.warning(f"could not generate metadata {self.slug} from markdown: {e}") self.body_de = "" self.body_de = html_de except FileNotFoundError as e: # TODO: mark as untranslated pass except Exception as e: logger.warning(f"could not generate article {self.slug} from markdown: {e}") def get_langs(self) -> dict[str, bool]: """ get available languages """ # TODO: # make sure this is safe # SECURITY: # If someone could inject the langs field, arbitrary python code might # run, Potentially ending in a critical RCE vulnerability return eval(str(self.langs)) def set_langs(self, langs: dict[str, bool]): """ set available languages """ self.langs = langs.__repr__() @classmethod def sync_all(cls): """ Sync all Blog Posts with the filesystem. Caution: Will delete all Blog Posts """ # logger.name = logger.name + ".sync_all" # delete all existing objects BlogPost.objects.all().delete() # check if the DATA_DIR is OK data_dir = pathlib.Path(cls.DATA_DIR) if not data_dir.exists(): logger.error(f"'{cls.DATA_DIR} does not exist'") if not data_dir.is_dir(): logger.error(f"'{cls.DATA_DIR} is not a directory'") files = [f for f in os.listdir(data_dir) if (data_dir.joinpath(f)).is_file()] logger.debug(f"discovered files: {files}") # finding lang and title regex = r"^(en|de)-(.*)\.md" # filepath, language codes, slug files = [[f, cls.DEFAULT_LANGS, ""] for f in files] for file in files: # parse file name try: matches = re.match(regex, file[0]) current_lang = matches.group(1) file[1][current_lang] = True file[2] = matches.group(2) except Exception as e: logger.error(e) files.remove(file) # PERF: # Could possibly be done in one loop # collapse diffrent versions for file in files: try: if [_f[2] for _f in files].count(file[2]) >= 2: logger.debug(f"multiple versions of '{file[2]}'") versions = [_f for _f in files if _f[2] == file[2]] lang: dict[str, bool] = file[1] for version in versions: for key in version[1]: lang[key] |= version[1][key] else: # only a single version of this file continue except Exception as e: logger.error(f"Could not combine BlogPosts for '{file[0]}': {e}") try: # deduplicate _files = [] for f in [[_f[1],_f[2]] for _f in files]: # dont care about fname if f not in _files: _files.append(f) files = _files logger.debug(f"to save: {files}") except Exception as e: logger.error(f"Could not dedup BlogPosts: {e}") for file in files: try: obj = BlogPost(langs=file[0], slug=file[1]) obj.sync_file() obj.save() except Exception as e: logger.error(f"Could not create BlogPost for '{file[1]}': {e}") class Meta: verbose_name = _("blog post") verbose_name_plural = _("blog posts")