🐛 fix(GenerationContext.py): fix indentation issue in process() method

 feat(GenerationContext.py): add support for z-index of moviepy clips to improve video rendering
The indentation issue in the process() method has been fixed. The z-index of moviepy clips has been added to improve the rendering of the video. This allows the clips to be rendered in different layers based on their index, resulting in a more visually appealing video.
This commit is contained in:
2024-02-21 09:06:36 +01:00
parent cced96d792
commit bd00a49063
17 changed files with 75 additions and 66 deletions

View File

@@ -63,27 +63,33 @@ class GenerationContext:
def process(self): def process(self):
# ⚠️ IMPORTANT NOTE: All methods called here are expected to be defined as abstract methods in the base classes, if not there is an issue with the engine implementation. # ⚠️ IMPORTANT NOTE: All methods called here are expected to be defined as abstract methods in the base classes, if not there is an issue with the engine implementation.
# we start by loading the settings # Kinda like in css, we have a z-index of moviepy clips (any). Then the engines append some clips to this, and we render it all with index 0 below, and index 9 at the top.
self.index_0 = []
self.index_1 = []
self.index_2 = []
self.index_3 = []
self.index_4 = []
self.index_5 = []
self.index_6 = []
self.index_7 = []
self.index_8 = []
self.index_9 = []
self.progress(0.1, "Loading settings...") self.progress(0.1, "Loading settings...")
self.setup_dir()
self.settingsengine.load() self.settingsengine.load()
self.setup_dir()
self.progress(0.2, "Generating script...") self.progress(0.2, "Generating script...")
self.script = self.scriptengine.generate() self.scriptengine.generate()
self.progress(0.3, "Generating synthtetizing voice...") self.progress(0.3, "Generating synthtetizing voice...")
self.timed_script = self.ttsengine.synthesize( self.ttsengine.synthesize(self.script, self.get_file_path("tts.wav"))
self.script, self.get_file_path("tts.wav") self.duration: float #for type hinting
)
self.assets = []
if not isinstance(self.backgroundengine, engines.NoneEngine): if not isinstance(self.backgroundengine, engines.NoneEngine):
self.progress(0.4, "Generating background...") self.progress(0.4, "Generating background...")
self.background = self.backgroundengine.get_background() self.backgroundengine.get_background()
self.assets.append(self.background)
self.assetsengine = [ self.assetsengine = [
engine engine
for engine in self.assetsengine for engine in self.assetsengine
@@ -91,21 +97,32 @@ class GenerationContext:
] ]
if len(self.assetsengine) > 0: if len(self.assetsengine) > 0:
self.progress(0.5, "Generating assets...") self.progress(0.5, "Generating assets...")
self.assets.extend(self.assetsengineselector.get_assets()) self.assetsengineselector.get_assets()
if not isinstance(self.captioningengine, engines.NoneEngine): if not isinstance(self.captioningengine, engines.NoneEngine):
self.progress(0.6, "Generating captions...") self.progress(0.6, "Generating captions...")
self.captions = self.captioningengine.get_captions() self.captioningengine.get_captions()
else: else:
self.captions = [] self.captions = []
# add any other processing steps here # add any other processing steps here
# we render to a file called final.mp4 # we render to a file called final.mp4
# using moviepy CompositeVideoClip
self.progress(0.7, "Rendering video...") self.progress(0.7, "Rendering video...")
clips = [*self.assets, *self.captions] clips = [
*self.index_0,
*self.index_1,
*self.index_2,
*self.index_3,
*self.index_4,
*self.index_5,
*self.index_6,
*self.index_7,
*self.index_8,
*self.index_9,
]
clip = mp.CompositeVideoClip(clips, size=(self.width, self.height)) clip = mp.CompositeVideoClip(clips, size=(self.width, self.height))
clip.set_duration(self.duration)
audio = mp.AudioFileClip(self.get_file_path("tts.wav")) audio = mp.AudioFileClip(self.get_file_path("tts.wav"))
clip = clip.set_audio(audio) clip = clip.set_audio(audio)
clip.write_videofile(self.get_file_path("final.mp4"), fps=60) clip.write_videofile(self.get_file_path("final.mp4"), fps=60)

View File

@@ -33,4 +33,4 @@ class AssetsEngineSelector:
assets_opts = [asset for asset in assets if asset["engine"] == engine.name] assets_opts = [asset for asset in assets if asset["engine"] == engine.name]
assets_opts = [asset["args"] for asset in assets_opts] assets_opts = [asset["args"] for asset in assets_opts]
clips.extend(engine.get_assets(assets_opts)) clips.extend(engine.get_assets(assets_opts))
return clips self.ctx.index_3.extend(clips)

View File

@@ -6,5 +6,5 @@ from moviepy.editor import VideoClip
class BaseBackgroundEngine(BaseEngine): class BaseBackgroundEngine(BaseEngine):
@abstractmethod @abstractmethod
def get_background(self) -> VideoClip: def get_background(self) -> None:
... ...

View File

@@ -10,7 +10,7 @@ from moviepy.video.fx.crop import crop
from . import BaseBackgroundEngine from . import BaseBackgroundEngine
class SimpleBackgroundEngine(BaseBackgroundEngine): class VideoBackgroundEngine(BaseBackgroundEngine):
name = "SImple Background Engine" name = "SImple Background Engine"
description = "A basic background engine to set the background of the video from a local file." description = "A basic background engine to set the background of the video from a local file."
num_options = 1 num_options = 1
@@ -48,13 +48,15 @@ class SimpleBackgroundEngine(BaseBackgroundEngine):
start = random.uniform(0, background_max_start) start = random.uniform(0, background_max_start)
clip = background.subclip(start, start + self.ctx.duration) clip = background.subclip(start, start + self.ctx.duration)
w, h = clip.size w, h = clip.size
return crop( self.ctx.index_0.append(
crop(
clip, clip,
width=self.ctx.width, width=self.ctx.width,
height=self.ctx.height, height=self.ctx.height,
x_center=w / 2, x_center=w / 2,
y_center=h / 2, y_center=h / 2,
) )
)
@classmethod @classmethod
def get_settings(cls) -> list: def get_settings(cls) -> list:

View File

@@ -1,2 +1,2 @@
from .BaseBackgroundEngine import BaseBackgroundEngine from .BaseBackgroundEngine import BaseBackgroundEngine
from .SimpleBackgroundEngine import SimpleBackgroundEngine from .VideoBackgroundEngine import VideoBackgroundEngine

View File

@@ -19,7 +19,7 @@ class BaseEngine(ABC):
@classmethod @classmethod
@abstractmethod @abstractmethod
def get_options(): def get_options(cls):
... ...
def get_video_duration(self, path: str) -> float: def get_video_duration(self, path: str) -> float:

View File

@@ -6,5 +6,5 @@ from moviepy.editor import TextClip
class BaseCaptioningEngine(BaseEngine): class BaseCaptioningEngine(BaseEngine):
@abstractmethod @abstractmethod
def get_captions(self) -> list[TextClip]: def get_captions(self) -> None:
... ...

View File

@@ -39,7 +39,7 @@ class SimpleCaptioningEngine(BaseCaptioningEngine):
punctuations = (".", "?", "!", ",", ":", ";") punctuations = (".", "?", "!", ",", ":", ";")
return text.strip().endswith(tuple(punctuations)) return text.strip().endswith(tuple(punctuations))
def get_captions(self) -> list[TextClip]: def get_captions(self):
# 3 words per 1000 px, we do the math # 3 words per 1000 px, we do the math
max_words = int(self.ctx.width / 1000 * 3) max_words = int(self.ctx.width / 1000 * 3)
@@ -78,7 +78,7 @@ class SimpleCaptioningEngine(BaseCaptioningEngine):
) )
) )
return clips self.ctx.index_7.extend(clips)
@classmethod @classmethod
def get_options(cls) -> list: def get_options(cls) -> list:

View File

@@ -4,6 +4,11 @@ from ...utils.prompting import get_prompt
class ShortsMetadataEngine(BaseMetadataEngine): class ShortsMetadataEngine(BaseMetadataEngine):
name = "ShortsMetadata"
description = "Generate metadata for YouTube Shorts / TikTok format videos"
num_options = 0
def __init__(self, **kwargs) -> None: def __init__(self, **kwargs) -> None:
... ...
@@ -13,9 +18,12 @@ class ShortsMetadataEngine(BaseMetadataEngine):
) )
chat_prompt = chat_prompt.replace("{script}", self.ctx.script) chat_prompt = chat_prompt.replace("{script}", self.ctx.script)
return self.ctx.simplellmengine.generate( result = self.ctx.simplellmengine.generate(
chat_prompt=chat_prompt, system_prompt=sytsem_prompt, json_mode=True chat_prompt=chat_prompt, system_prompt=sytsem_prompt, json_mode=True
) )
self.ctx.title = result["title"]
self.ctx.description = result["description"]
def get_options(self): @classmethod
def get_options(cls):
return [] return []

View File

@@ -6,7 +6,7 @@ class BaseScriptEngine(BaseEngine):
pass pass
@abstractmethod @abstractmethod
def generate(self) -> str: def generate(self) -> None:
pass pass
def time_script(self): def time_script(self):

View File

@@ -12,7 +12,7 @@ class CustomScriptEngine(BaseScriptEngine):
super().__init__() super().__init__()
def generate(self, *args, **kwargs) -> str: def generate(self, *args, **kwargs) -> str:
return self.script self.ctx.script = self.script.strip().copy()
@classmethod @classmethod
def get_options(cls) -> list: def get_options(cls) -> list:

View File

@@ -23,13 +23,17 @@ class ShowerThoughtsScriptEngine(BaseScriptEngine):
) )
sys_prompt = sys_prompt.format(n_sentences=self.n_sentences) sys_prompt = sys_prompt.format(n_sentences=self.n_sentences)
chat_prompt = chat_prompt.format(n_sentences=self.n_sentences) chat_prompt = chat_prompt.format(n_sentences=self.n_sentences)
return self.ctx.powerfulllmengine.generate( self.ctx.script = (
self.ctx.powerfulllmengine.generate(
system_prompt=sys_prompt, system_prompt=sys_prompt,
chat_prompt=chat_prompt, chat_prompt=chat_prompt,
max_tokens=20 * self.n_sentences, max_tokens=20 * self.n_sentences,
temperature=1.3, temperature=1.3,
json_mode=False, json_mode=False,
) )
.strip()
.copy()
)
@classmethod @classmethod
def get_options(cls) -> list: def get_options(cls) -> list:

View File

@@ -16,7 +16,7 @@ class Word(TypedDict):
class BaseTTSEngine(BaseEngine): class BaseTTSEngine(BaseEngine):
@abstractmethod @abstractmethod
def synthesize(self, text: str, path: str) -> list[Word]: def synthesize(self, text: str, path: str) -> None:
pass pass
def remove_punctuation(self, text: str) -> str: def remove_punctuation(self, text: str) -> str:

View File

@@ -127,7 +127,7 @@ class CoquiTTSEngine(BaseTTSEngine):
self.ctx.duration = self.get_audio_duration(path) self.ctx.duration = self.get_audio_duration(path)
return self.time_with_whisper(path) self.ctx.timed_script = self.time_with_whisper(path)
@classmethod @classmethod
def get_options(cls) -> list: def get_options(cls) -> list:

View File

@@ -1,19 +0,0 @@
from .BaseTTSEngine import BaseTTSEngine
import gradio as gr
class ElevenLabsTTSEngine(BaseTTSEngine):
name = "ElevenLabs"
description = "ElevenLabs TTS engine."
num_options = 0
def __init__(self, options: list[list | tuple | str | int | float | bool | None]):
# self.voice = options[0][0]
super().__init__()
def synthesize(self, text: str, path: str) -> str:
pass
@classmethod
def get_options(cls) -> list:
return []

View File

@@ -50,7 +50,7 @@ ENGINES: dict[str, EngineDict] = {
"multiple": True, "multiple": True,
}, },
"BackgroundEngine": { "BackgroundEngine": {
"classes": [BackgroundEngine.SimpleBackgroundEngine, NoneEngine], "classes": [BackgroundEngine.VideoBackgroundEngine, NoneEngine],
"multiple": False, "multiple": False,
}, },
"MetadataEngine": { "MetadataEngine": {

View File

@@ -75,11 +75,8 @@ class GenerateUI:
inputs.append(engine_dropdown) inputs.append(engine_dropdown)
engine_rows = [] engine_rows = []
for i, engine in enumerate(engines): for i, engine in enumerate(engines):
with gr.Group(visible=(i == 0)) as engine_row: with gr.Row(visible=(i == 0)) as engine_row:
gr.Markdown( gr.Markdown(value=f"## {engine.name}")
value=f"## {engine.name}",
render=False,
)
engine_rows.append(engine_row) engine_rows.append(engine_row)
options = engine.get_options() options = engine.get_options()
inputs.extend(options) inputs.extend(options)