From bd00a490637220a92628768c6ef701c8770b805d Mon Sep 17 00:00:00 2001 From: Paillat Date: Wed, 21 Feb 2024 09:06:36 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20fix(GenerationContext.py):=20fix?= =?UTF-8?q?=20indentation=20issue=20in=20process()=20method=20=E2=9C=A8=20?= =?UTF-8?q?feat(GenerationContext.py):=20add=20support=20for=20z-index=20o?= =?UTF-8?q?f=20moviepy=20clips=20to=20improve=20video=20rendering=20The=20?= =?UTF-8?q?indentation=20issue=20in=20the=20process()=20method=20has=20bee?= =?UTF-8?q?n=20fixed.=20The=20z-index=20of=20moviepy=20clips=20has=20been?= =?UTF-8?q?=20added=20to=20improve=20the=20rendering=20of=20the=20video.?= =?UTF-8?q?=20This=20allows=20the=20clips=20to=20be=20rendered=20in=20diff?= =?UTF-8?q?erent=20layers=20based=20on=20their=20index,=20resulting=20in?= =?UTF-8?q?=20a=20more=20visually=20appealing=20video.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/chore/GenerationContext.py | 47 +++++++++++++------ .../AssetsEngine/AssetsEngineSelector.py | 2 +- .../BackgroundEngine/BaseBackgroundEngine.py | 2 +- ...oundEngine.py => VideoBackgroundEngine.py} | 16 ++++--- src/engines/BackgroundEngine/__init__.py | 2 +- src/engines/BaseEngine.py | 2 +- .../CaptioningEngine/BaseCaptioningEngine.py | 2 +- .../SimpleCaptioningEngine.py | 4 +- .../MetadataEngine/ShortsMetadataEngine.py | 12 ++++- src/engines/ScriptEngine/BaseScriptEngine.py | 2 +- .../ScriptEngine/CustomScriptEngine.py | 2 +- .../ShowerThoughtsScriptEngine.py | 16 ++++--- src/engines/TTSEngine/BaseTTSEngine.py | 2 +- src/engines/TTSEngine/CoquiTTSEngine.py | 2 +- src/engines/TTSEngine/ElevenLabsTTSEngine.py | 19 -------- src/engines/__init__.py | 2 +- ui/gradio_ui.py | 7 +-- 17 files changed, 75 insertions(+), 66 deletions(-) rename src/engines/BackgroundEngine/{SimpleBackgroundEngine.py => VideoBackgroundEngine.py} (90%) delete mode 100644 src/engines/TTSEngine/ElevenLabsTTSEngine.py diff --git a/src/chore/GenerationContext.py b/src/chore/GenerationContext.py index 4e740b7..f6e67b3 100644 --- a/src/chore/GenerationContext.py +++ b/src/chore/GenerationContext.py @@ -63,27 +63,33 @@ class GenerationContext: def process(self): # ⚠️ IMPORTANT NOTE: All methods called here are expected to be defined as abstract methods in the base classes, if not there is an issue with the engine implementation. - # we start by loading the settings + # Kinda like in css, we have a z-index of moviepy clips (any). Then the engines append some clips to this, and we render it all with index 0 below, and index 9 at the top. + self.index_0 = [] + self.index_1 = [] + self.index_2 = [] + self.index_3 = [] + self.index_4 = [] + self.index_5 = [] + self.index_6 = [] + self.index_7 = [] + self.index_8 = [] + self.index_9 = [] self.progress(0.1, "Loading settings...") + self.setup_dir() self.settingsengine.load() - self.setup_dir() - self.progress(0.2, "Generating script...") - self.script = self.scriptengine.generate() + self.scriptengine.generate() self.progress(0.3, "Generating synthtetizing voice...") - self.timed_script = self.ttsengine.synthesize( - self.script, self.get_file_path("tts.wav") - ) - - self.assets = [] + self.ttsengine.synthesize(self.script, self.get_file_path("tts.wav")) + self.duration: float #for type hinting if not isinstance(self.backgroundengine, engines.NoneEngine): self.progress(0.4, "Generating background...") - self.background = self.backgroundengine.get_background() - self.assets.append(self.background) + self.backgroundengine.get_background() + self.assetsengine = [ engine for engine in self.assetsengine @@ -91,21 +97,32 @@ class GenerationContext: ] if len(self.assetsengine) > 0: self.progress(0.5, "Generating assets...") - self.assets.extend(self.assetsengineselector.get_assets()) + self.assetsengineselector.get_assets() if not isinstance(self.captioningengine, engines.NoneEngine): self.progress(0.6, "Generating captions...") - self.captions = self.captioningengine.get_captions() + self.captioningengine.get_captions() else: self.captions = [] # add any other processing steps here # we render to a file called final.mp4 - # using moviepy CompositeVideoClip self.progress(0.7, "Rendering video...") - clips = [*self.assets, *self.captions] + clips = [ + *self.index_0, + *self.index_1, + *self.index_2, + *self.index_3, + *self.index_4, + *self.index_5, + *self.index_6, + *self.index_7, + *self.index_8, + *self.index_9, + ] clip = mp.CompositeVideoClip(clips, size=(self.width, self.height)) + clip.set_duration(self.duration) audio = mp.AudioFileClip(self.get_file_path("tts.wav")) clip = clip.set_audio(audio) clip.write_videofile(self.get_file_path("final.mp4"), fps=60) diff --git a/src/engines/AssetsEngine/AssetsEngineSelector.py b/src/engines/AssetsEngine/AssetsEngineSelector.py index 632caac..a259a43 100644 --- a/src/engines/AssetsEngine/AssetsEngineSelector.py +++ b/src/engines/AssetsEngine/AssetsEngineSelector.py @@ -33,4 +33,4 @@ class AssetsEngineSelector: assets_opts = [asset for asset in assets if asset["engine"] == engine.name] assets_opts = [asset["args"] for asset in assets_opts] clips.extend(engine.get_assets(assets_opts)) - return clips + self.ctx.index_3.extend(clips) diff --git a/src/engines/BackgroundEngine/BaseBackgroundEngine.py b/src/engines/BackgroundEngine/BaseBackgroundEngine.py index 662356e..68f9558 100644 --- a/src/engines/BackgroundEngine/BaseBackgroundEngine.py +++ b/src/engines/BackgroundEngine/BaseBackgroundEngine.py @@ -6,5 +6,5 @@ from moviepy.editor import VideoClip class BaseBackgroundEngine(BaseEngine): @abstractmethod - def get_background(self) -> VideoClip: + def get_background(self) -> None: ... diff --git a/src/engines/BackgroundEngine/SimpleBackgroundEngine.py b/src/engines/BackgroundEngine/VideoBackgroundEngine.py similarity index 90% rename from src/engines/BackgroundEngine/SimpleBackgroundEngine.py rename to src/engines/BackgroundEngine/VideoBackgroundEngine.py index 642a218..4b470dd 100644 --- a/src/engines/BackgroundEngine/SimpleBackgroundEngine.py +++ b/src/engines/BackgroundEngine/VideoBackgroundEngine.py @@ -10,7 +10,7 @@ from moviepy.video.fx.crop import crop from . import BaseBackgroundEngine -class SimpleBackgroundEngine(BaseBackgroundEngine): +class VideoBackgroundEngine(BaseBackgroundEngine): name = "SImple Background Engine" description = "A basic background engine to set the background of the video from a local file." num_options = 1 @@ -48,12 +48,14 @@ class SimpleBackgroundEngine(BaseBackgroundEngine): start = random.uniform(0, background_max_start) clip = background.subclip(start, start + self.ctx.duration) w, h = clip.size - return crop( - clip, - width=self.ctx.width, - height=self.ctx.height, - x_center=w / 2, - y_center=h / 2, + self.ctx.index_0.append( + crop( + clip, + width=self.ctx.width, + height=self.ctx.height, + x_center=w / 2, + y_center=h / 2, + ) ) @classmethod diff --git a/src/engines/BackgroundEngine/__init__.py b/src/engines/BackgroundEngine/__init__.py index a6b1c97..3d67f9b 100644 --- a/src/engines/BackgroundEngine/__init__.py +++ b/src/engines/BackgroundEngine/__init__.py @@ -1,2 +1,2 @@ from .BaseBackgroundEngine import BaseBackgroundEngine -from .SimpleBackgroundEngine import SimpleBackgroundEngine +from .VideoBackgroundEngine import VideoBackgroundEngine diff --git a/src/engines/BaseEngine.py b/src/engines/BaseEngine.py index fd589c5..41520d1 100644 --- a/src/engines/BaseEngine.py +++ b/src/engines/BaseEngine.py @@ -19,7 +19,7 @@ class BaseEngine(ABC): @classmethod @abstractmethod - def get_options(): + def get_options(cls): ... def get_video_duration(self, path: str) -> float: diff --git a/src/engines/CaptioningEngine/BaseCaptioningEngine.py b/src/engines/CaptioningEngine/BaseCaptioningEngine.py index b5731a8..bbadf88 100644 --- a/src/engines/CaptioningEngine/BaseCaptioningEngine.py +++ b/src/engines/CaptioningEngine/BaseCaptioningEngine.py @@ -6,5 +6,5 @@ from moviepy.editor import TextClip class BaseCaptioningEngine(BaseEngine): @abstractmethod - def get_captions(self) -> list[TextClip]: + def get_captions(self) -> None: ... diff --git a/src/engines/CaptioningEngine/SimpleCaptioningEngine.py b/src/engines/CaptioningEngine/SimpleCaptioningEngine.py index 60d7958..18e751e 100644 --- a/src/engines/CaptioningEngine/SimpleCaptioningEngine.py +++ b/src/engines/CaptioningEngine/SimpleCaptioningEngine.py @@ -39,7 +39,7 @@ class SimpleCaptioningEngine(BaseCaptioningEngine): punctuations = (".", "?", "!", ",", ":", ";") return text.strip().endswith(tuple(punctuations)) - def get_captions(self) -> list[TextClip]: + def get_captions(self): # 3 words per 1000 px, we do the math max_words = int(self.ctx.width / 1000 * 3) @@ -78,7 +78,7 @@ class SimpleCaptioningEngine(BaseCaptioningEngine): ) ) - return clips + self.ctx.index_7.extend(clips) @classmethod def get_options(cls) -> list: diff --git a/src/engines/MetadataEngine/ShortsMetadataEngine.py b/src/engines/MetadataEngine/ShortsMetadataEngine.py index 99b2622..357a302 100644 --- a/src/engines/MetadataEngine/ShortsMetadataEngine.py +++ b/src/engines/MetadataEngine/ShortsMetadataEngine.py @@ -4,6 +4,11 @@ from ...utils.prompting import get_prompt class ShortsMetadataEngine(BaseMetadataEngine): + name = "ShortsMetadata" + description = "Generate metadata for YouTube Shorts / TikTok format videos" + + num_options = 0 + def __init__(self, **kwargs) -> None: ... @@ -13,9 +18,12 @@ class ShortsMetadataEngine(BaseMetadataEngine): ) chat_prompt = chat_prompt.replace("{script}", self.ctx.script) - return self.ctx.simplellmengine.generate( + result = self.ctx.simplellmengine.generate( chat_prompt=chat_prompt, system_prompt=sytsem_prompt, json_mode=True ) + self.ctx.title = result["title"] + self.ctx.description = result["description"] - def get_options(self): + @classmethod + def get_options(cls): return [] diff --git a/src/engines/ScriptEngine/BaseScriptEngine.py b/src/engines/ScriptEngine/BaseScriptEngine.py index 3acb6a1..eecece4 100644 --- a/src/engines/ScriptEngine/BaseScriptEngine.py +++ b/src/engines/ScriptEngine/BaseScriptEngine.py @@ -6,7 +6,7 @@ class BaseScriptEngine(BaseEngine): pass @abstractmethod - def generate(self) -> str: + def generate(self) -> None: pass def time_script(self): diff --git a/src/engines/ScriptEngine/CustomScriptEngine.py b/src/engines/ScriptEngine/CustomScriptEngine.py index c6c052a..3660862 100644 --- a/src/engines/ScriptEngine/CustomScriptEngine.py +++ b/src/engines/ScriptEngine/CustomScriptEngine.py @@ -12,7 +12,7 @@ class CustomScriptEngine(BaseScriptEngine): super().__init__() def generate(self, *args, **kwargs) -> str: - return self.script + self.ctx.script = self.script.strip().copy() @classmethod def get_options(cls) -> list: diff --git a/src/engines/ScriptEngine/ShowerThoughtsScriptEngine.py b/src/engines/ScriptEngine/ShowerThoughtsScriptEngine.py index 92caefd..9290845 100644 --- a/src/engines/ScriptEngine/ShowerThoughtsScriptEngine.py +++ b/src/engines/ScriptEngine/ShowerThoughtsScriptEngine.py @@ -23,12 +23,16 @@ class ShowerThoughtsScriptEngine(BaseScriptEngine): ) sys_prompt = sys_prompt.format(n_sentences=self.n_sentences) chat_prompt = chat_prompt.format(n_sentences=self.n_sentences) - return self.ctx.powerfulllmengine.generate( - system_prompt=sys_prompt, - chat_prompt=chat_prompt, - max_tokens=20 * self.n_sentences, - temperature=1.3, - json_mode=False, + self.ctx.script = ( + self.ctx.powerfulllmengine.generate( + system_prompt=sys_prompt, + chat_prompt=chat_prompt, + max_tokens=20 * self.n_sentences, + temperature=1.3, + json_mode=False, + ) + .strip() + .copy() ) @classmethod diff --git a/src/engines/TTSEngine/BaseTTSEngine.py b/src/engines/TTSEngine/BaseTTSEngine.py index 3e7aa1e..39b9f5e 100644 --- a/src/engines/TTSEngine/BaseTTSEngine.py +++ b/src/engines/TTSEngine/BaseTTSEngine.py @@ -16,7 +16,7 @@ class Word(TypedDict): class BaseTTSEngine(BaseEngine): @abstractmethod - def synthesize(self, text: str, path: str) -> list[Word]: + def synthesize(self, text: str, path: str) -> None: pass def remove_punctuation(self, text: str) -> str: diff --git a/src/engines/TTSEngine/CoquiTTSEngine.py b/src/engines/TTSEngine/CoquiTTSEngine.py index 985ecfe..db499bf 100644 --- a/src/engines/TTSEngine/CoquiTTSEngine.py +++ b/src/engines/TTSEngine/CoquiTTSEngine.py @@ -127,7 +127,7 @@ class CoquiTTSEngine(BaseTTSEngine): self.ctx.duration = self.get_audio_duration(path) - return self.time_with_whisper(path) + self.ctx.timed_script = self.time_with_whisper(path) @classmethod def get_options(cls) -> list: diff --git a/src/engines/TTSEngine/ElevenLabsTTSEngine.py b/src/engines/TTSEngine/ElevenLabsTTSEngine.py deleted file mode 100644 index 3e34850..0000000 --- a/src/engines/TTSEngine/ElevenLabsTTSEngine.py +++ /dev/null @@ -1,19 +0,0 @@ -from .BaseTTSEngine import BaseTTSEngine -import gradio as gr - - -class ElevenLabsTTSEngine(BaseTTSEngine): - name = "ElevenLabs" - description = "ElevenLabs TTS engine." - num_options = 0 - - def __init__(self, options: list[list | tuple | str | int | float | bool | None]): - # self.voice = options[0][0] - super().__init__() - - def synthesize(self, text: str, path: str) -> str: - pass - - @classmethod - def get_options(cls) -> list: - return [] diff --git a/src/engines/__init__.py b/src/engines/__init__.py index 3126315..d1808e7 100644 --- a/src/engines/__init__.py +++ b/src/engines/__init__.py @@ -50,7 +50,7 @@ ENGINES: dict[str, EngineDict] = { "multiple": True, }, "BackgroundEngine": { - "classes": [BackgroundEngine.SimpleBackgroundEngine, NoneEngine], + "classes": [BackgroundEngine.VideoBackgroundEngine, NoneEngine], "multiple": False, }, "MetadataEngine": { diff --git a/ui/gradio_ui.py b/ui/gradio_ui.py index 7b9218c..d74c064 100644 --- a/ui/gradio_ui.py +++ b/ui/gradio_ui.py @@ -75,11 +75,8 @@ class GenerateUI: inputs.append(engine_dropdown) engine_rows = [] for i, engine in enumerate(engines): - with gr.Group(visible=(i == 0)) as engine_row: - gr.Markdown( - value=f"## {engine.name}", - render=False, - ) + with gr.Row(visible=(i == 0)) as engine_row: + gr.Markdown(value=f"## {engine.name}") engine_rows.append(engine_row) options = engine.get_options() inputs.extend(options)