289 lines
9.8 KiB
Python
289 lines
9.8 KiB
Python
import ast
|
|
import re
|
|
import os
|
|
import pathlib
|
|
import markdown
|
|
from django.db import models
|
|
from django.utils.translation import gettext as _
|
|
from start.models import Searchable
|
|
|
|
import logging
|
|
logger = logging.getLogger(__name__)
|
|
|
|
EXTENSIONS = [
|
|
"extra",
|
|
"admonition",
|
|
"codehilite",
|
|
"meta",
|
|
"toc"
|
|
]
|
|
EXTENSION_CONFIGS = {
|
|
'codehilite': {
|
|
'linenums': True,
|
|
'pygments_style': 'monokai'
|
|
},
|
|
}
|
|
|
|
MD = markdown.Markdown(extensions=EXTENSIONS,
|
|
extension_configs=EXTENSION_CONFIGS)
|
|
|
|
|
|
class Category(models.Model):
|
|
"""
|
|
A category of blog posts
|
|
|
|
Name not translated because it would make i18n in urls and Searchables specifically a pain.
|
|
Maybe some day it would be cool if these were Searchable
|
|
"""
|
|
name = models.CharField(max_length=50)
|
|
slug = models.SlugField(unique=True)
|
|
|
|
class Meta:
|
|
verbose_name = _("Category")
|
|
verbose_name_plural = _("Categories")
|
|
|
|
def __str__(self):
|
|
return f"{{<{self.__class__.__name__}>\"{self.name}\"}}"
|
|
|
|
|
|
class BlogPost(Searchable):
|
|
"""
|
|
Should contain a blogpost
|
|
"""
|
|
body_en = models.TextField(blank=True, default="")
|
|
body_de = models.TextField(blank=True, default="")
|
|
category = models.ForeignKey(
|
|
Category, on_delete=models.SET_NULL, null=True)
|
|
thumbnail = models.ImageField(blank=True, upload_to="img/thumbnails")
|
|
featured = models.BooleanField(default=False)
|
|
langs = models.CharField(
|
|
default="['en': False, 'de': False]", max_length=64)
|
|
slug = models.SlugField()
|
|
|
|
# TODO autodiscover new blog posts based on markdown files?
|
|
|
|
DATA_DIR = "/app/blog/data/articles"
|
|
DEFAULT_LANGS = {'en': False, 'de': False}
|
|
|
|
def regenerate(self):
|
|
"""
|
|
regenerate a object
|
|
|
|
Implements the abstract method of Searchable
|
|
"""
|
|
logger.info(f"regenerating {self.__class__.__name__} object: {self}")
|
|
# url stuff
|
|
self.suburl = f"/blog/{self.category.name}/{self.slug}"
|
|
|
|
# load from markdown
|
|
self.sync_file()
|
|
|
|
self.save()
|
|
|
|
def sync_file(self):
|
|
"""
|
|
generate an article fromm it's original markdown file
|
|
"""
|
|
logger.info(f"regenerating article from markdown for: {self}")
|
|
try:
|
|
MD.reset()
|
|
with open(f"{self.DATA_DIR}/en-{self.slug}.md") as f_en:
|
|
|
|
body_en: str = f_en.read()
|
|
|
|
html_en: str = MD.convert(body_en)
|
|
try:
|
|
# NOTE: MD.Meta is generated after MD.convert() by the meta
|
|
# extension.
|
|
if not hasattr(MD, 'Meta'):
|
|
logger.error("Metadata extension for markdown\
|
|
not loaded")
|
|
raise ValueError("Metadata extension for markdown\
|
|
not loaded")
|
|
meta_en = MD.Meta
|
|
self.title_en = meta_en["title"][0]
|
|
self.subtitle_en = meta_en["subtitle"][0]
|
|
self.desc_en = meta_en["desc"][0]
|
|
self.date = meta_en["date"][0]
|
|
self.featured = meta_en["featured"][0] == "True"
|
|
self.public = meta_en["public"][0] == "True"
|
|
# self.thumbnail = meta_en["thumbnail"]
|
|
# TODO: parse keywords from markdown
|
|
# TODO: parse category from markdown
|
|
|
|
try:
|
|
category: Category = Category.objects.get(
|
|
slug=meta_en['category'][0])
|
|
except Category.DoesNotExist:
|
|
category = Category.objects.create(
|
|
name=meta_en['category'], slug=meta_en['category'])
|
|
logger.debug(f"category of {self}: {category}")
|
|
self.category = category
|
|
|
|
# if keyword or category do not exist, create them
|
|
# I suppose
|
|
except Exception as e:
|
|
logger.warning(
|
|
f"could not generate metadata {self.slug} from markdown: {e}")
|
|
|
|
self.body_en = ""
|
|
self.body_en = html_en
|
|
except FileNotFoundError as e:
|
|
# TODO: mark as untranslated
|
|
pass
|
|
except Exception as e:
|
|
logger.warning(
|
|
f"could not generate article {self.slug} from markdown: {e}")
|
|
try:
|
|
MD.reset()
|
|
with open(f"{self.DATA_DIR}/de-{self.slug}.md") as f_de:
|
|
|
|
body_de: str = f_de.read()
|
|
|
|
html_de: str = MD.convert(body_de)
|
|
try:
|
|
# NOTE: MD.Meta is generated after MD.convert() by the meta
|
|
# extension.
|
|
if not hasattr(MD, 'Meta'):
|
|
logger.error("Metadata extension for markdown\
|
|
not loaded")
|
|
raise ValueError("Metadata extension for markdown\
|
|
not loaded")
|
|
meta_de = MD.Meta
|
|
self.title_de = meta_de["title"][0]
|
|
self.subtitle_de = meta_de["subtitle"][0]
|
|
self.desc_de = meta_de["desc"][0]
|
|
# TODO: parse date from markdown
|
|
self.featured = meta_de["featured"][0] == "True"
|
|
self.public = meta_de["public"][0] == "True"
|
|
# self.thumbnail = meta_de["thumbnail"]
|
|
# TODO: parse keywords from markdown
|
|
# TODO: parse category from markdown
|
|
|
|
# if keyword or category do not exist, create them
|
|
# I suppose
|
|
except Exception as e:
|
|
logger.warning(
|
|
f"could not generate metadata {self.slug} from markdown: {e}")
|
|
|
|
self.body_de = ""
|
|
self.body_de = html_de
|
|
except FileNotFoundError as e:
|
|
# TODO: mark as untranslated
|
|
pass
|
|
except Exception as e:
|
|
logger.warning(
|
|
f"could not generate article {self.slug} from markdown: {e}")
|
|
|
|
def get_langs(self) -> dict[str, bool] | None:
|
|
"""
|
|
get available languages
|
|
"""
|
|
# TODO:
|
|
# make sure this is safe
|
|
# SECURITY:
|
|
# If someone could inject the langs field, arbitrary python code might
|
|
# run, Potentially ending in a critical RCE vulnerability
|
|
try:
|
|
langs = ast.literal_eval(str(self.langs))
|
|
return langs
|
|
except ValueError as e:
|
|
logger.error(
|
|
f"could not safely evaluate 'langs' for '{self}': {e}")
|
|
return None
|
|
|
|
def set_langs(self, langs: dict[str, bool]):
|
|
"""
|
|
set available languages
|
|
"""
|
|
self.langs = langs.__repr__()
|
|
|
|
@classmethod
|
|
def sync_all(cls):
|
|
"""
|
|
Sync all Blog Posts with the filesystem.
|
|
|
|
Caution: Will delete all Blog Posts
|
|
"""
|
|
# logger.name = logger.name + ".sync_all"
|
|
|
|
# delete all existing objects
|
|
BlogPost.objects.all().delete()
|
|
|
|
# check if the DATA_DIR is OK
|
|
data_dir = pathlib.Path(cls.DATA_DIR)
|
|
if not data_dir.exists():
|
|
logger.error(f"'{cls.DATA_DIR} does not exist'")
|
|
if not data_dir.is_dir():
|
|
logger.error(f"'{cls.DATA_DIR} is not a directory'")
|
|
|
|
files = [f for f in os.listdir(data_dir) if (
|
|
data_dir.joinpath(f)).is_file()]
|
|
logger.debug(f"discovered files: {files}")
|
|
|
|
# finding lang and title
|
|
regex = r"^(en|de)-(.*)\.md"
|
|
|
|
# filepath, language codes, slug
|
|
files = [[f, cls.DEFAULT_LANGS, ""] for f in files]
|
|
for file in files:
|
|
# parse file name
|
|
try:
|
|
matches = re.match(regex, file[0])
|
|
if matches is None:
|
|
logger.warning(
|
|
f"Data file '{file[0]}' does not fit to the filename\
|
|
regex")
|
|
files.remove(file)
|
|
else:
|
|
current_lang = matches.group(1)
|
|
file[1][current_lang] = True
|
|
file[2] = matches.group(2)
|
|
except Exception as e:
|
|
logger.error(e)
|
|
files.remove(file)
|
|
|
|
# PERF:
|
|
# Could possibly be done in one loop
|
|
|
|
# collapse diffrent versions
|
|
for file in files:
|
|
try:
|
|
if [_f[2] for _f in files].count(file[2]) >= 2:
|
|
logger.debug(f"multiple versions of '{file[2]}'")
|
|
versions = [_f for _f in files if _f[2] == file[2]]
|
|
lang: dict[str, bool] = file[1]
|
|
for version in versions:
|
|
for key in version[1]:
|
|
lang[key] |= version[1][key]
|
|
else:
|
|
# only a single version of this file
|
|
continue
|
|
except Exception as e:
|
|
logger.error(
|
|
f"Could not combine BlogPosts for '{file[0]}': {e}")
|
|
|
|
try:
|
|
# deduplicate
|
|
_files = []
|
|
for f in [[_f[1], _f[2]]
|
|
for _f in files]: # dont care about fname
|
|
if f not in _files:
|
|
_files.append(f)
|
|
files = _files
|
|
logger.debug(f"to save: {files}")
|
|
except Exception as e:
|
|
logger.error(f"Could not dedup BlogPosts: {e}")
|
|
|
|
for file in files:
|
|
try:
|
|
obj = BlogPost(langs=file[0], slug=file[1])
|
|
obj.sync_file()
|
|
obj.regenerate()
|
|
obj.save()
|
|
except Exception as e:
|
|
logger.error(f"Could not create BlogPost for '{file[1]}': {e}")
|
|
|
|
class Meta:
|
|
verbose_name = _("blog post")
|
|
verbose_name_plural = _("blog posts")
|