diff --git a/src/engines/Pipelines/ScriptedVideoPipeline.py b/src/engines/Pipelines/ScriptedVideoPipeline.py
index c584dda..0d79b6e 100644
--- a/src/engines/Pipelines/ScriptedVideoPipeline.py
+++ b/src/engines/Pipelines/ScriptedVideoPipeline.py
@@ -45,7 +45,11 @@ class ScriptedVideoPipeline(BasePipeline):
         )["chapters"]
         ctx.script = ""
 
-        for chapter in chapters:
+        text_audio = []
+
+        ctx.duration = 0
+
+        for i, chapter in enumerate(chapters):
             ctx.progress(0.2, f"Generating chapter: {chapter['title']}...")
             system = prompts["writer"]["system"]
             chat = prompts["writer"]["chat"]
@@ -54,87 +58,96 @@ class ScriptedVideoPipeline(BasePipeline):
                 .replace("{chapter_title}", chapter["title"])
                 .replace("{chapter_instructions}", chapter["explanation"])
             )
-            ctx.script += ctx.powerfulllmengine.generate(
+            script = ctx.powerfulllmengine.generate(
                 system_prompt=system,
                 chat_prompt=chat,
                 temperature=1,
                 max_tokens=4096,
                 json_mode=True,
             )["chapter"]
+            ctx.script += script
             ctx.script += "\n"
 
-        ctx.progress(0.3, "Synthesizing voice...")
-        ctx.duration = ctx.ttsengine.synthesize(
-            ctx.script, ctx.get_file_path("tts.wav")
-        )
-        ctx.audio.append(mp.AudioFileClip(ctx.get_file_path("tts.wav")))
-        ctx.progress(0.4, "Transcribing audio...")
-        ctx.timed_script = ctx.transcriptionengine.transcribe(
-            ctx.get_file_path("tts.wav"), fast=False, words=True
-        )
+            ctx.progress(0.3, "Synthesizing voice...")
+            duration = ctx.ttsengine.synthesize(
+                script, ctx.get_file_path(f"tts_{i}.wav")
+            )
+            audioclip = mp.AudioFileClip(ctx.get_file_path(f"tts_{i}.wav"))
+            audioclip = audioclip.with_start(ctx.duration)
+            text_audio.append(audioclip)
+            ctx.progress(0.2, f"Transcribing chapter: {chapter['title']}...")
+            timed_script = ctx.transcriptionengine.transcribe(
+                ctx.get_file_path(f"tts_{i}.wav"), fast=False, words=True
+            )
 
-        sentence_split_script = []
-        current_sentence = None
+            sentence_split_script = []
+            current_sentence = None
 
-        for word in ctx.timed_script.copy():
-            if current_sentence is None:
-                # Initialize the first sentence
-                current_sentence = {
-                    "text": word["text"],
-                    "end": word["end"],
-                    "start": word["start"],
-                }
-            elif word["text"].endswith((".", "!", "?")):
-                # Add the word to the current sentence and finalize it
-                current_sentence["text"] += f" {word['text']}"
-                current_sentence["end"] = word["end"]
+            for word in timed_script.copy():
+                if current_sentence is None:
+                    # Initialize the first sentence
+                    current_sentence = {
+                        "text": word["text"],
+                        "end": word["end"],
+                        "start": word["start"],
+                    }
+                elif word["text"].endswith((".", "!", "?")):
+                    # Add the word to the current sentence and finalize it
+                    current_sentence["text"] += f" {word['text']}"
+                    current_sentence["end"] = word["end"]
+                    sentence_split_script.append(current_sentence)
+                    current_sentence = None  # Prepare to start a new sentence
+                else:
+                    # Continue adding words to the current sentence
+                    current_sentence["text"] += f" {word['text']}"
+                    current_sentence["end"] = word["end"]
+
+            # If the last sentence didn't end with a punctuation mark
+            if current_sentence is not None:
                 sentence_split_script.append(current_sentence)
-                current_sentence = None  # Prepare to start a new sentence
-            else:
-                # Continue adding words to the current sentence
-                current_sentence["text"] += f" {word['text']}"
-                current_sentence["end"] = word["end"]
 
-        # If the last sentence didn't end with a punctuation mark
-        if current_sentence is not None:
-            sentence_split_script.append(current_sentence)
-
-        ctx.progress(0.5, "Generating images...")
-        system = prompts["imager"]["system"]
-        chat = prompts["imager"]["chat"]
-        chat = chat.replace("{user_instructions}", str(self.user_instructions))
-        chat = chat.replace("{assets_instructions}", str(self.assets_instructions))
-        chat = chat.replace("{video_transcript}", str(sentence_split_script))
-        assets: list[dict[str, str | float]] = ctx.powerfulllmengine.generate(
-            system_prompt=system,
-            chat_prompt=chat,
-            temperature=1,
-            max_tokens=4096,
-            json_mode=True,
-        )["assets"]
-        for i, asset in enumerate(assets):
-            if asset["type"] == "stock":
-                ctx.progress(0.5, f"Getting stock image {i + 1}...")
-                ctx.index_4.append(
-                    ctx.stockimageengine.get(
-                        asset["query"], asset["start"], asset["end"]
+            ctx.progress(0.2, f"Generating video for chapter: {chapter['title']}...")
+            system = prompts["imager"]["system"]
+            chat = prompts["imager"]["chat"]
+            chat = chat.replace("{user_instructions}", str(self.user_instructions))
+            chat = chat.replace("{assets_instructions}", str(self.assets_instructions))
+            chat = chat.replace("{video_transcript}", str(sentence_split_script))
+            assets: list[dict[str, str | float]] = ctx.powerfulllmengine.generate(
+                system_prompt=system,
+                chat_prompt=chat,
+                temperature=1,
+                max_tokens=4096,
+                json_mode=True,
+            )["assets"]
+            for i, asset in enumerate(assets):
+                if asset["type"] == "stock":
+                    ctx.progress(0.5, f"Getting stock image {i + 1}...")
+                    ctx.index_4.append(
+                        ctx.stockimageengine.get(
+                            asset["query"],
+                            asset["start"] + ctx.duration,
+                            asset["end"] + ctx.duration,
+                        )
                     )
-                )
-            elif asset["type"] == "ai":
-                ctx.progress(0.5, f"Generating AI image {i + 1}...")
-                ctx.index_5.append(
-                    ctx.aiimageengine.generate(
-                        asset["prompt"], asset["start"], asset["end"]
+                elif asset["type"] == "ai":
+                    ctx.progress(0.5, f"Generating AI image {i + 1}...")
+                    ctx.index_5.append(
+                        ctx.aiimageengine.generate(
+                            asset["prompt"],
+                            asset["start"] + ctx.duration,
+                            asset["end"] + ctx.duration,
+                        )
                     )
-                )
 
+            ctx.duration += duration + 0.5
+        ctx.audio.extend(text_audio)
         if not isinstance(ctx.audiobackgroundengine, engines.NoneEngine):
             ctx.progress(0.6, "Generating audio background...")
             ctx.audio.append(ctx.audiobackgroundengine.get_background())
 
         if not isinstance(ctx.backgroundengine, engines.NoneEngine):
             ctx.progress(0.65, "Generating background...")
-            ctx.audio.append(ctx.backgroundengine.get_background())
+            ctx.index_0.append(ctx.backgroundengine.get_background())
 
         ctx.progress(0.7, "Rendering video...")
         clips = [
@@ -230,11 +243,13 @@ class ScriptedVideoPipeline(BasePipeline):
                 lines=4,
                 max_lines=6,
                 label="Video instructions",
+                info="Explain what the video should be about, how many chapters, and any specific instructions.",
             ),
             gr.Textbox(
                 lines=4,
                 max_lines=6,
                 label="Assets only instructions",
+                info="Explain how the assets should be used in the video. When, how many, and of what type (stock images, AI or both)",
             ),
             ratio,
             width,