From 42ea8d14d48985e1272422441d9611901de0093c Mon Sep 17 00:00:00 2001
From: PlexSheep <software@cscherr.de>
Date: Sun, 1 Oct 2023 00:41:19 +0200
Subject: [PATCH] generation works but frontend crash

---
 gawa/blog/data/articles/de-test.md            | 103 ++++++++++++++++++
 gawa/blog/data/articles/en-bash-arrays.md     |   1 +
 .../migrations/0010_auto_20230930_2331.py     |  32 ++++++
 gawa/blog/models.py                           |  87 ++++++++++++---
 .../migrations/0012_auto_20230930_2331.py     |  28 +++++
 gawa/start/models.py                          |   8 +-
 6 files changed, 242 insertions(+), 17 deletions(-)
 create mode 100644 gawa/blog/data/articles/de-test.md
 create mode 100644 gawa/blog/migrations/0010_auto_20230930_2331.py
 create mode 100644 gawa/start/migrations/0012_auto_20230930_2331.py

diff --git a/gawa/blog/data/articles/de-test.md b/gawa/blog/data/articles/de-test.md
new file mode 100644
index 0000000..19d2053
--- /dev/null
+++ b/gawa/blog/data/articles/de-test.md
@@ -0,0 +1,103 @@
+---
+Title:      Bash Arrays
+Subtitle:   sub
+Desc:       Brief intro to Bash Arrays
+Date:       2023-09-29
+Keywords:   bash
+            technology
+Category:   Test
+Featured:   True
+Public:     True
+---
+
+**NOTE**
+
+This is a stolen article from [opensource.com](https://opensource.com/article/18/5/you-dont-know-bash-intro-bash-arrays)
+about bash scripting. It's a good article and I've decided to use it to test my
+markdown rendering.
+
+# GERMAN VERY YES YES
+# Bash scripting
+
+[TOC]
+
+
+## Wait, but why?
+
+Writing about Bash is challenging because it's remarkably easy for an article
+to devolve into a manual that focuses on syntax oddities. Rest assured,
+however, the intent of this article is to avoid having you RTFM.
+
+## A real (actually useful) example
+
+To that end, let's consider a real-world scenario and how Bash can help:
+You are leading a new effort at your company to evaluate and optimize the
+runtime of your internal data pipeline. As a first step, you want to do a
+parameter sweep to evaluate how well the pipeline makes use of threads. For
+the sake of simplicity, we'll treat the pipeline as a compiled C++ black box
+where the only parameter we can tweak is the number of threads reserved for
+data processing: `./pipeline --threads 4.`
+
+## The basics
+
+The first thing we'll do is define an array containing the values of the
+`--threads` parameter that we want to test:
+
+```bash
+allThreads=(1 2 4 8 16 32 64 128)
+```
+
+In this example, all the elements are numbers, but it need not be the
+case—arrays in Bash can contain both numbers and strings, e.g., `myArray=(1
+2 "three" 4 "five")` is a valid expression. And just as with any other Bash
+variable, make sure to leave no spaces around the equal sign. Otherwise,
+Bash will treat the variable name as a program to execute, and the `=` as its
+first parameter!
+
+Now that we've initialized the array, let's retrieve a few of its
+elements. You'll notice that simply doing `echo $allThreads` will output only
+the first element.
+
+To understand why that is, let's take a step back and revisit how we usually
+output variables in Bash. Consider the following scenario:
+
+```bash
+type="article" echo "Found 42 $type"
+```
+
+Say the variable $type is given to us as a singular noun and we want to add
+an `s` at the end of our sentence. We can't simply add an s to `$type` since
+that would turn it into a different variable, `$types`. And although we could
+utilize code contortions such as `echo "Found 42 "$type"s"`, the best way
+to solve this problem is to use curly braces: `echo "Found 42 ${type}s"`,
+which allows us to tell Bash where the name of a variable starts and ends
+(interestingly, this is the same syntax used in JavaScript/ES6 to inject
+variables and expressions in template literals).
+
+So as it turns out, although Bash variables don't generally require curly
+brackets, they are required for arrays. In turn, this allows us to specify
+the index to access, e.g., `echo ${allThreads[1]}` returns the second element
+of the array. Not including brackets, e.g.,`echo $allThreads[1]`, leads Bash
+to treat `[1]` as a string and output it as such.
+
+Yes, Bash arrays have odd syntax, but at least they are zero-indexed, unlike
+some other languages (I'm looking at you, R).[^1]
+
+## Looping through arrays
+
+Although in the examples above we used integer indices in our arrays, let's
+consider two occasions when that won't be the case: First, if we wanted the
+$i-th element of the array, where $i is a variable containing the index of
+interest, we can retrieve that element using: echo ${allThreads[$i]}. Second,
+to output all the elements of an array, we replace the numeric index with
+the @ symbol (you can think of @ as standing for all):
+
+```bash
+type="article"
+echo "Found 42 $type"
+```
+
+*[RTFM]: Read the Fucking Manual
+*[HTML]: Hyper Text Markup Language
+
+[^1]: Example Footnote
diff --git a/gawa/blog/data/articles/en-bash-arrays.md b/gawa/blog/data/articles/en-bash-arrays.md
index c2556f0..297584d 100644
--- a/gawa/blog/data/articles/en-bash-arrays.md
+++ b/gawa/blog/data/articles/en-bash-arrays.md
@@ -3,6 +3,7 @@ Title:      Bash Arrays
 Subtitle:   sub
 Desc:       Brief intro to Bash Arrays
 Date:       2023-09-29
+Thumbnail:  media/thumbnails/wayland.png
 Keywords:   bash
             technology
 Category:   Test
diff --git a/gawa/blog/migrations/0010_auto_20230930_2331.py b/gawa/blog/migrations/0010_auto_20230930_2331.py
new file mode 100644
index 0000000..52c50b7
--- /dev/null
+++ b/gawa/blog/migrations/0010_auto_20230930_2331.py
@@ -0,0 +1,32 @@
+# Generated by Django 3.2.21 on 2023-09-30 21:31
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('blog', '0009_auto_20230616_2236'),
+    ]
+
+    operations = [
+        migrations.RemoveField(
+            model_name='blogpost',
+            name='markdown',
+        ),
+        migrations.AddField(
+            model_name='blogpost',
+            name='langs',
+            field=models.CharField(default="['en': False, 'de': False]", max_length=64),
+        ),
+        migrations.AlterField(
+            model_name='blogpost',
+            name='body_de',
+            field=models.TextField(blank=True, default=''),
+        ),
+        migrations.AlterField(
+            model_name='blogpost',
+            name='body_en',
+            field=models.TextField(blank=True, default=''),
+        ),
+    ]
diff --git a/gawa/blog/models.py b/gawa/blog/models.py
index 152f1bc..736d3be 100644
--- a/gawa/blog/models.py
+++ b/gawa/blog/models.py
@@ -47,16 +47,18 @@ class BlogPost(Searchable):
     """
     Should contain a blogpost
     """
-    body_en = models.TextField(default="No english translation yet.")
-    body_de = models.TextField(default="Bis jetzt keine deutsche Übersetzung.")
+    body_en = models.TextField(blank=True, default="")
+    body_de = models.TextField(blank=True, default="")
     category = models.ForeignKey(Category, on_delete=models.SET_NULL, null=True)
     thumbnail = models.ImageField(blank=True, upload_to="img/thumbnails")
     featured = models.BooleanField(default=False)
+    langs = models.CharField(default="['en': False, 'de': False]", max_length=64)
     slug = models.SlugField()
 
     # TODO autodiscover new blog posts based on markdown files?
 
     DATA_DIR = "/app/blog/data/articles"
+    DEFAULT_LANGS = {'en': False, 'de': False}
 
     def regenerate(self):
         """
@@ -93,6 +95,7 @@ class BlogPost(Searchable):
                     # TODO: parse date from markdown
                     self.featured = meta_en["featured"][0] == "True"
                     self.public = meta_en["public"][0] == "True"
+                    # self.thumbnail = meta_en["thumbnail"]
                     # TODO: parse keywords from markdown
                     # TODO: parse category from markdown
 
@@ -123,6 +126,7 @@ class BlogPost(Searchable):
                     # TODO: parse date from markdown
                     self.featured = meta_de["featured"][0] == "True"
                     self.public = meta_de["public"][0] == "True"
+                    # self.thumbnail = meta_de["thumbnail"]
                     # TODO: parse keywords from markdown
                     # TODO: parse category from markdown
 
@@ -139,6 +143,23 @@ class BlogPost(Searchable):
         except Exception as e:
             logger.warning(f"could not generate article {self.slug} from markdown: {e}")
 
+    def get_langs(self) -> dict[str, bool]:
+        """
+        get available languages
+        """
+        # TODO:
+        # make sure this is safe
+        # SECURITY:
+        # If someone could inject the langs field, arbitrary python code might
+        # run, Potentially ending in a critical RCE vulnerability
+        return eval(str(self.langs))
+
+    def set_langs(self, langs: dict[str, bool]):
+        """
+        set available languages
+        """
+        self.langs = langs.__repr__()
+
     @classmethod
     def sync_all(cls):
         """
@@ -146,7 +167,7 @@ class BlogPost(Searchable):
 
         Caution: Will delete all Blog Posts
         """
-        logger.name = logger.name + ".sync_all"
+        # logger.name = logger.name + ".sync_all"
 
         # delete all existing objects
         BlogPost.objects.all().delete()
@@ -164,18 +185,58 @@ class BlogPost(Searchable):
         # finding lang and title
         regex = r"^(en|de)-(.*)\.md"
 
-        # filepath, language code, slug
-        files = [(f, "", "") for f in files]
-        for f in files:
+        # filepath, language codes, slug
+        files = [[f, cls.DEFAULT_LANGS, ""] for f in files]
+        for file in files:
+            # parse file name
             try:
-                matches = re.match(regex, f[0])
-                lang = matches.group(1)
-                titl = matches.group(2)
-                f = (f[0], lang, titl)
-                logger.debug(f"discovered file tup: {f}")
+                matches = re.match(regex, file[0])
+                current_lang = matches.group(1)
+                file[1][current_lang] = True
+                file[2] = matches.group(2)
             except Exception as e:
-                logger.debug(e)
-                files.remove(f)
+                logger.error(e)
+                files.remove(file)
+
+        # PERF:
+        # Could possibly be done in one loop
+
+        # collapse diffrent versions
+        for file in files:
+            try:
+                if [_f[2] for _f in files].count(file[2]) >= 2:
+                    logger.debug(f"multiple versions of '{file[2]}'")
+                    versions = [_f for _f in files if _f[2] == file[2]]
+                    lang: dict[str, bool] = file[1]
+                    for version in versions:
+                        for key in version[1]:
+                            lang[key] |= version[1][key]
+                else:
+                    # only a single version of this file
+                    continue
+            except Exception as e:
+                logger.error(f"Could not combine BlogPosts for '{file[0]}': {e}")
+
+        try:
+            # deduplicate
+            _files = []
+            for f in [[_f[1],_f[2]] for _f in files]:   # dont care about fname
+                if f not in _files:
+                    _files.append(f)
+            files = _files
+            logger.debug(f"to save: {files}")
+        except Exception as e:
+            logger.error(f"Could not dedup BlogPosts: {e}")
+
+        for file in files:
+            try:
+                obj = BlogPost(langs=file[0], slug=file[1])
+                obj.sync_file()
+                obj.save()
+            except Exception as e:
+                logger.error(f"Could not create BlogPost for '{file[1]}': {e}")
+
+
 
     class Meta:
         verbose_name = _("blog post")
diff --git a/gawa/start/migrations/0012_auto_20230930_2331.py b/gawa/start/migrations/0012_auto_20230930_2331.py
new file mode 100644
index 0000000..8b0e3ba
--- /dev/null
+++ b/gawa/start/migrations/0012_auto_20230930_2331.py
@@ -0,0 +1,28 @@
+# Generated by Django 3.2.21 on 2023-09-30 21:31
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('start', '0011_auto_20230715_1441'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='searchable',
+            name='desc_de',
+            field=models.TextField(blank=True, default='Beschreibung DE', max_length=250),
+        ),
+        migrations.AlterField(
+            model_name='searchable',
+            name='desc_en',
+            field=models.TextField(blank=True, default='Description EN', max_length=250),
+        ),
+        migrations.AlterField(
+            model_name='searchable',
+            name='public',
+            field=models.BooleanField(default=False),
+        ),
+    ]
diff --git a/gawa/start/models.py b/gawa/start/models.py
index 0c06496..b9c1f03 100644
--- a/gawa/start/models.py
+++ b/gawa/start/models.py
@@ -37,13 +37,13 @@ class Searchable(models.Model):
     title_en = models.CharField(max_length=50, default="title EN")
     subtitle_de = models.CharField(max_length=50, blank=True)
     subtitle_en = models.CharField(max_length=50, blank=True)
-    desc_de = models.TextField(max_length=250, unique=False, default="Beschreibung DE")
-    desc_en = models.TextField(max_length=250, unique=False, default="Description EN")
+    desc_de = models.TextField(blank=True, max_length=250, unique=False, default="Beschreibung DE")
+    desc_en = models.TextField(blank=True, max_length=250, unique=False, default="Description EN")
     # may be empty/blank for some entries
     date = models.DateField(blank=True, null=True)
     keywords = models.ManyToManyField(Keyword)
     suburl = models.CharField(max_length=200, blank=True, null=True)
-    public = models.BooleanField(default=True)
+    public = models.BooleanField(default=False)
 
     @classmethod
     def regenerate_all_entries(cls):
@@ -55,7 +55,7 @@ class Searchable(models.Model):
             obj.regenerate()
 
     def __str__(self):
-        return f"{{<{self.__class__.__name__}>\"{self.title_en}\"}}"
+        return f"{{<{self.__class__.__name__}>\"{self.slug}\"}}"
 
     def regenerate(self):
         """