🐛 fix(GenerationContext.py): fix indentation issue in process() method

✨ feat(GenerationContext.py): add support for z-index of moviepy clips to improve video rendering The indentation issue in the process() method has been fixed. The z-index of moviepy clips has been added to improve the rendering of the video. This allows the clips to be rendered in different layers based on their index, resulting in a more visually appealing video.
2026-03-03 02:14:54 +00:00 · 2024-02-21 09:06:36 +01:00
parent cced96d792
commit bd00a49063
17 changed files with 75 additions and 66 deletions
--- a/src/chore/GenerationContext.py
+++ b/src/chore/GenerationContext.py
@@ -63,27 +63,33 @@ class GenerationContext:
    def process(self):
        # ⚠️ IMPORTANT NOTE: All methods called here are expected to be defined as abstract methods in the base classes, if not there is an issue with the engine implementation.
-        # we start by loading the settings
+        # Kinda like in css, we have a z-index of moviepy clips (any). Then the engines append some clips to this, and we render it all with index 0 below, and index 9 at the top.
        self.index_0 = []
        self.index_1 = []
        self.index_2 = []
        self.index_3 = []
        self.index_4 = []
        self.index_5 = []
        self.index_6 = []
        self.index_7 = []
        self.index_8 = []
        self.index_9 = []
        self.progress(0.1, "Loading settings...")
        self.setup_dir()
        self.settingsengine.load()
        self.setup_dir()
        self.progress(0.2, "Generating script...")
-        self.script = self.scriptengine.generate()
+        self.scriptengine.generate()
        self.progress(0.3, "Generating synthtetizing voice...")
-        self.timed_script = self.ttsengine.synthesize(
+        self.ttsengine.synthesize(self.script, self.get_file_path("tts.wav"))
-            self.script, self.get_file_path("tts.wav")
+        self.duration: float #for type hinting
        )
        self.assets = []
        if not isinstance(self.backgroundengine, engines.NoneEngine):
            self.progress(0.4, "Generating background...")
-            self.background = self.backgroundengine.get_background()
+            self.backgroundengine.get_background()
-            self.assets.append(self.background)
+
        self.assetsengine = [
            engine
            for engine in self.assetsengine
@@ -91,21 +97,32 @@ class GenerationContext:
        ]
        if len(self.assetsengine) > 0:
            self.progress(0.5, "Generating assets...")
-            self.assets.extend(self.assetsengineselector.get_assets())
+            self.assetsengineselector.get_assets()
        if not isinstance(self.captioningengine, engines.NoneEngine):
            self.progress(0.6, "Generating captions...")
-            self.captions = self.captioningengine.get_captions()
+            self.captioningengine.get_captions()
        else:
            self.captions = []
        # add any other processing steps here
        # we render to a file called final.mp4
        # using moviepy CompositeVideoClip
        self.progress(0.7, "Rendering video...")
-        clips = [*self.assets, *self.captions]
+        clips = [
            *self.index_0,
            *self.index_1,
            *self.index_2,
            *self.index_3,
            *self.index_4,
            *self.index_5,
            *self.index_6,
            *self.index_7,
            *self.index_8,
            *self.index_9,
        ]
        clip = mp.CompositeVideoClip(clips, size=(self.width, self.height))
        clip.set_duration(self.duration)
        audio = mp.AudioFileClip(self.get_file_path("tts.wav"))
        clip = clip.set_audio(audio)
        clip.write_videofile(self.get_file_path("final.mp4"), fps=60)
--- a/src/engines/AssetsEngine/AssetsEngineSelector.py
+++ b/src/engines/AssetsEngine/AssetsEngineSelector.py
@@ -33,4 +33,4 @@ class AssetsEngineSelector:
            assets_opts = [asset for asset in assets if asset["engine"] == engine.name]
            assets_opts = [asset["args"] for asset in assets_opts]
            clips.extend(engine.get_assets(assets_opts))
-        return clips
+        self.ctx.index_3.extend(clips)
--- a/src/engines/BackgroundEngine/BaseBackgroundEngine.py
+++ b/src/engines/BackgroundEngine/BaseBackgroundEngine.py
@@ -6,5 +6,5 @@ from moviepy.editor import VideoClip
 class BaseBackgroundEngine(BaseEngine):
    @abstractmethod
-    def get_background(self) -> VideoClip:
+    def get_background(self) -> None:
        ...
--- a/src/engines/BackgroundEngine/SimpleBackgroundEngine.py
+++ b/src/engines/BackgroundEngine/SimpleBackgroundEngine.py
@@ -10,7 +10,7 @@ from moviepy.video.fx.crop import crop
 from . import BaseBackgroundEngine
-class SimpleBackgroundEngine(BaseBackgroundEngine):
+class VideoBackgroundEngine(BaseBackgroundEngine):
    name = "SImple Background Engine"
    description = "A basic background engine to set the background of the video from a local file."
    num_options = 1
@@ -48,13 +48,15 @@ class SimpleBackgroundEngine(BaseBackgroundEngine):
        start = random.uniform(0, background_max_start)
        clip = background.subclip(start, start + self.ctx.duration)
        w, h = clip.size
-        return crop(
+        self.ctx.index_0.append(
            crop(
                clip,
                width=self.ctx.width,
                height=self.ctx.height,
                x_center=w / 2,
                y_center=h / 2,
            )
        )
    @classmethod
    def get_settings(cls) -> list:
--- a/src/engines/BackgroundEngine/init.py
+++ b/src/engines/BackgroundEngine/init.py
@@ -1,2 +1,2 @@
 from .BaseBackgroundEngine import BaseBackgroundEngine
-from .SimpleBackgroundEngine import SimpleBackgroundEngine
+from .VideoBackgroundEngine import VideoBackgroundEngine
--- a/src/engines/BaseEngine.py
+++ b/src/engines/BaseEngine.py
@@ -19,7 +19,7 @@ class BaseEngine(ABC):
    @classmethod
    @abstractmethod
-    def get_options():
+    def get_options(cls):
        ...
    def get_video_duration(self, path: str) -> float:
--- a/src/engines/CaptioningEngine/BaseCaptioningEngine.py
+++ b/src/engines/CaptioningEngine/BaseCaptioningEngine.py
@@ -6,5 +6,5 @@ from moviepy.editor import TextClip
 class BaseCaptioningEngine(BaseEngine):
    @abstractmethod
-    def get_captions(self) -> list[TextClip]:
+    def get_captions(self) -> None:
        ...
--- a/src/engines/CaptioningEngine/SimpleCaptioningEngine.py
+++ b/src/engines/CaptioningEngine/SimpleCaptioningEngine.py
@@ -39,7 +39,7 @@ class SimpleCaptioningEngine(BaseCaptioningEngine):
        punctuations = (".", "?", "!", ",", ":", ";")
        return text.strip().endswith(tuple(punctuations))
-    def get_captions(self) -> list[TextClip]:
+    def get_captions(self):
        # 3 words per 1000 px, we do the math
        max_words = int(self.ctx.width / 1000 * 3)
@@ -78,7 +78,7 @@ class SimpleCaptioningEngine(BaseCaptioningEngine):
                )
            )
-        return clips
+        self.ctx.index_7.extend(clips)
    @classmethod
    def get_options(cls) -> list:
--- a/src/engines/MetadataEngine/ShortsMetadataEngine.py
+++ b/src/engines/MetadataEngine/ShortsMetadataEngine.py
@@ -4,6 +4,11 @@ from ...utils.prompting import get_prompt
 class ShortsMetadataEngine(BaseMetadataEngine):
    name = "ShortsMetadata"
    description = "Generate metadata for YouTube Shorts / TikTok format videos"
    num_options = 0
    def __init__(self, **kwargs) -> None:
        ...
@@ -13,9 +18,12 @@ class ShortsMetadataEngine(BaseMetadataEngine):
        )
        chat_prompt = chat_prompt.replace("{script}", self.ctx.script)
-        return self.ctx.simplellmengine.generate(
+        result = self.ctx.simplellmengine.generate(
            chat_prompt=chat_prompt, system_prompt=sytsem_prompt, json_mode=True
        )
        self.ctx.title = result["title"]
        self.ctx.description = result["description"]
-    def get_options(self):
+    @classmethod
    def get_options(cls):
        return []
--- a/src/engines/ScriptEngine/BaseScriptEngine.py
+++ b/src/engines/ScriptEngine/BaseScriptEngine.py
@@ -6,7 +6,7 @@ class BaseScriptEngine(BaseEngine):
    pass
    @abstractmethod
-    def generate(self) -> str:
+    def generate(self) -> None:
        pass
    def time_script(self):
--- a/src/engines/ScriptEngine/CustomScriptEngine.py
+++ b/src/engines/ScriptEngine/CustomScriptEngine.py
@@ -12,7 +12,7 @@ class CustomScriptEngine(BaseScriptEngine):
        super().__init__()
    def generate(self, *args, **kwargs) -> str:
-        return self.script
+        self.ctx.script = self.script.strip().copy()
    @classmethod
    def get_options(cls) -> list:
--- a/src/engines/ScriptEngine/ShowerThoughtsScriptEngine.py
+++ b/src/engines/ScriptEngine/ShowerThoughtsScriptEngine.py
@@ -23,13 +23,17 @@ class ShowerThoughtsScriptEngine(BaseScriptEngine):
        )
        sys_prompt = sys_prompt.format(n_sentences=self.n_sentences)
        chat_prompt = chat_prompt.format(n_sentences=self.n_sentences)
-        return self.ctx.powerfulllmengine.generate(
+        self.ctx.script = (
            self.ctx.powerfulllmengine.generate(
                system_prompt=sys_prompt,
                chat_prompt=chat_prompt,
                max_tokens=20 * self.n_sentences,
                temperature=1.3,
                json_mode=False,
            )
            .strip()
            .copy()
        )
    @classmethod
    def get_options(cls) -> list:
--- a/src/engines/TTSEngine/BaseTTSEngine.py
+++ b/src/engines/TTSEngine/BaseTTSEngine.py
@@ -16,7 +16,7 @@ class Word(TypedDict):
 class BaseTTSEngine(BaseEngine):
    @abstractmethod
-    def synthesize(self, text: str, path: str) -> list[Word]:
+    def synthesize(self, text: str, path: str) -> None:
        pass
    def remove_punctuation(self, text: str) -> str:
--- a/src/engines/TTSEngine/CoquiTTSEngine.py
+++ b/src/engines/TTSEngine/CoquiTTSEngine.py
@@ -127,7 +127,7 @@ class CoquiTTSEngine(BaseTTSEngine):
        self.ctx.duration = self.get_audio_duration(path)
-        return self.time_with_whisper(path)
+        self.ctx.timed_script = self.time_with_whisper(path)
    @classmethod
    def get_options(cls) -> list:
--- a/src/engines/TTSEngine/ElevenLabsTTSEngine.py
+++ b/src/engines/TTSEngine/ElevenLabsTTSEngine.py
@@ -1,19 +0,0 @@
 from .BaseTTSEngine import BaseTTSEngine
 import gradio as gr
 class ElevenLabsTTSEngine(BaseTTSEngine):
    name = "ElevenLabs"
    description = "ElevenLabs TTS engine."
    num_options = 0
    def __init__(self, options: list[list | tuple | str | int | float | bool | None]):
        # self.voice = options[0][0]
        super().__init__()
    def synthesize(self, text: str, path: str) -> str:
        pass
    @classmethod
    def get_options(cls) -> list:
        return []
--- a/src/engines/init.py
+++ b/src/engines/init.py
@@ -50,7 +50,7 @@ ENGINES: dict[str, EngineDict] = {
        "multiple": True,
    },
    "BackgroundEngine": {
-        "classes": [BackgroundEngine.SimpleBackgroundEngine, NoneEngine],
+        "classes": [BackgroundEngine.VideoBackgroundEngine, NoneEngine],
        "multiple": False,
    },
    "MetadataEngine": {
--- a/ui/gradio_ui.py
+++ b/ui/gradio_ui.py
@@ -75,11 +75,8 @@ class GenerateUI:
                            inputs.append(engine_dropdown)
                            engine_rows = []
                            for i, engine in enumerate(engines):
-                                with gr.Group(visible=(i == 0)) as engine_row:
+                                with gr.Row(visible=(i == 0)) as engine_row:
-                                    gr.Markdown(
+                                    gr.Markdown(value=f"## {engine.name}")
                                        value=f"## {engine.name}",
                                        render=False,
                                    )
                                    engine_rows.append(engine_row)
                                    options = engine.get_options()
                                    inputs.extend(options)
`@@ -1,2 +1,2 @@`
	`from .BaseBackgroundEngine import BaseBackgroundEngine`	`from .BaseBackgroundEngine import BaseBackgroundEngine`
	`from .SimpleBackgroundEngine import SimpleBackgroundEngine`	`from .VideoBackgroundEngine import VideoBackgroundEngine`