From bfb742b41b67405c490c43e7a21cabfa47aea01f Mon Sep 17 00:00:00 2001
From: Paillat <me@paillat.dev>
Date: Mon, 22 Apr 2024 14:01:17 +0200
Subject: [PATCH] :recycle: Settings are now directly in pipelines - remove
 settings engine.

---
 src/chore/GenerationContext.py                |  4 -
 src/engines/LLMEngine/AnthropicLLMEngine.py   |  4 +-
 .../Pipelines/ScriptedVideoPipeline.py        | 89 +++++++++++++++++--
 3 files changed, 84 insertions(+), 13 deletions(-)

diff --git a/src/chore/GenerationContext.py b/src/chore/GenerationContext.py
index 84f798d..ec8cf2f 100644
--- a/src/chore/GenerationContext.py
+++ b/src/chore/GenerationContext.py
@@ -24,7 +24,6 @@ class GenerationContext:
     def __init__(
         self,
         pipeline,
-        settingsengine,
         simplellmengine,
         powerfulllmengine,
         ttsengine,
@@ -75,9 +74,6 @@ class GenerationContext:
         )
         self.stockimageengine.ctx = self
 
-        self.settingsengine: engines.SettingsEngine.SettingsEngine = settingsengine[0]
-        self.settingsengine.ctx = self
-
         self.backgroundengine: engines.BackgroundEngine.BaseBackgroundEngine = (
             backgroundengine[0]
         )
diff --git a/src/engines/LLMEngine/AnthropicLLMEngine.py b/src/engines/LLMEngine/AnthropicLLMEngine.py
index b093e5b..7bae4bf 100644
--- a/src/engines/LLMEngine/AnthropicLLMEngine.py
+++ b/src/engines/LLMEngine/AnthropicLLMEngine.py
@@ -61,9 +61,7 @@ class AnthropicLLMEngine(BaseLLMEngine):
                     returnable = fix_busted_json.repair_json(content)
                     returnable = orjson.loads(returnable)
                     return returnable
-                except ( # noqa wait for library to imlement pep https://peps.python.org/pep-0352/ (Required Superclass for Exceptions
-                    Exception
-                ) as e:
+                except fix_busted_json.JsonFixError as e:
                     tries += 1
             else:
                 return content
diff --git a/src/engines/Pipelines/ScriptedVideoPipeline.py b/src/engines/Pipelines/ScriptedVideoPipeline.py
index 1bf343d..c584dda 100644
--- a/src/engines/Pipelines/ScriptedVideoPipeline.py
+++ b/src/engines/Pipelines/ScriptedVideoPipeline.py
@@ -14,19 +14,23 @@ class ScriptedVideoPipeline(BasePipeline):
     description = (
         "A pipeline that generates a long form video based on a script instruction."
     )
-    num_options = 2
+    num_options = 5
 
     def __init__(self, options: list) -> None:
         self.user_instructions = options[0]
         self.assets_instructions = options[1]
+        # ratio = options[2] we don't need this
+        self.width = options[3]
+        self.height = options[4]
         super().__init__()
 
     def launch(self, ctx: GenerationContext) -> None:
 
         ctx.progress(0.1, "Loading settings...")
         ctx.setup_dir()
-        if not isinstance(ctx.settingsengine, engines.NoneEngine):
-            ctx.settingsengine.load()
+        ctx.width = self.width
+        ctx.height = self.height
+
         prompts = get_prompts("long_form", by_file_location=__file__)
         ctx.progress(0.2, "Generating chapters...")
         system = prompts["chapters"]["system"]
@@ -69,12 +73,38 @@ class ScriptedVideoPipeline(BasePipeline):
             ctx.get_file_path("tts.wav"), fast=False, words=True
         )
 
+        sentence_split_script = []
+        current_sentence = None
+
+        for word in ctx.timed_script.copy():
+            if current_sentence is None:
+                # Initialize the first sentence
+                current_sentence = {
+                    "text": word["text"],
+                    "end": word["end"],
+                    "start": word["start"],
+                }
+            elif word["text"].endswith((".", "!", "?")):
+                # Add the word to the current sentence and finalize it
+                current_sentence["text"] += f" {word['text']}"
+                current_sentence["end"] = word["end"]
+                sentence_split_script.append(current_sentence)
+                current_sentence = None  # Prepare to start a new sentence
+            else:
+                # Continue adding words to the current sentence
+                current_sentence["text"] += f" {word['text']}"
+                current_sentence["end"] = word["end"]
+
+        # If the last sentence didn't end with a punctuation mark
+        if current_sentence is not None:
+            sentence_split_script.append(current_sentence)
+
         ctx.progress(0.5, "Generating images...")
         system = prompts["imager"]["system"]
         chat = prompts["imager"]["chat"]
         chat = chat.replace("{user_instructions}", str(self.user_instructions))
         chat = chat.replace("{assets_instructions}", str(self.assets_instructions))
-        chat = chat.replace("{video_transcript}", str(ctx.timed_script))
+        chat = chat.replace("{video_transcript}", str(sentence_split_script))
         assets: list[dict[str, str | float]] = ctx.powerfulllmengine.generate(
             system_prompt=system,
             chat_prompt=chat,
@@ -82,14 +112,16 @@ class ScriptedVideoPipeline(BasePipeline):
             max_tokens=4096,
             json_mode=True,
         )["assets"]
-        for asset in assets:
+        for i, asset in enumerate(assets):
             if asset["type"] == "stock":
+                ctx.progress(0.5, f"Getting stock image {i + 1}...")
                 ctx.index_4.append(
                     ctx.stockimageengine.get(
                         asset["query"], asset["start"], asset["end"]
                     )
                 )
             elif asset["type"] == "ai":
+                ctx.progress(0.5, f"Generating AI image {i + 1}...")
                 ctx.index_5.append(
                     ctx.aiimageengine.generate(
                         asset["prompt"], asset["start"], asset["end"]
@@ -97,9 +129,13 @@ class ScriptedVideoPipeline(BasePipeline):
                 )
 
         if not isinstance(ctx.audiobackgroundengine, engines.NoneEngine):
-            ctx.progress(0.45, "Generating audio background...")
+            ctx.progress(0.6, "Generating audio background...")
             ctx.audio.append(ctx.audiobackgroundengine.get_background())
 
+        if not isinstance(ctx.backgroundengine, engines.NoneEngine):
+            ctx.progress(0.65, "Generating background...")
+            ctx.audio.append(ctx.backgroundengine.get_background())
+
         ctx.progress(0.7, "Rendering video...")
         clips = [
             *ctx.index_0,
@@ -151,6 +187,44 @@ class ScriptedVideoPipeline(BasePipeline):
 
     @classmethod
     def get_options(cls):
+        def change_resolution(chosen_ratio: str) -> list[gr.update]:
+            match chosen_ratio:
+                case "1920x1080":
+                    return [
+                        gr.update(value=1920, visible=False),
+                        gr.update(value=1080, visible=False),
+                    ]
+                case "1080x1920":
+                    return [
+                        gr.update(value=1080, visible=False),
+                        gr.update(value=1920, visible=False),
+                    ]
+                case "1280x720":
+                    return [
+                        gr.update(value=1280, visible=False),
+                        gr.update(value=720, visible=False),
+                    ]
+                case "720x1280":
+                    return [
+                        gr.update(value=720, visible=False),
+                        gr.update(value=1280, visible=False),
+                    ]
+                case "custom":
+                    return [gr.update(visible=True), gr.update(visible=True)]
+
+        with gr.Row():
+            ratio = gr.Dropdown(
+                choices=["1920x1080", "1080x1920", "1280x720", "720x1280", "custom"],
+                label="Resolution",
+            )
+            width = gr.Number(
+                value=1080, minimum=720, maximum=3840, label="Width", step=1
+            )
+            height = gr.Number(
+                value=1920, minimum=720, maximum=3840, label="Height", step=1
+            )
+            ratio.change(change_resolution, inputs=[ratio], outputs=[width, height])
+
         return [
             gr.Textbox(
                 lines=4,
@@ -162,4 +236,7 @@ class ScriptedVideoPipeline(BasePipeline):
                 max_lines=6,
                 label="Assets only instructions",
             ),
+            ratio,
+            width,
+            height,
         ]