fix(GenerationContext.py): fix typo in variable name powerfulllmengine to powerfulllmengine for better readability

feat(GenerationContext.py): add setup_dir method to create a directory for output files with a timestamp feat(GenerationContext.py): call setup_dir method before generating script and synthesizing audio to ensure output directory exists feat(prompts/fix_captions.yaml): add a new prompt file to provide instructions for fixing captions fix(BaseTTSEngine.py): add force_duration method to adjust audio clip duration if it exceeds a specified duration feat(CoquiTTSEngine.py): add options for forcing duration and specifying duration in the UI feat(utils/prompting.py): add get_prompt function to load prompt files from a specified location fix(gradio_ui.py): set equal_height=True for engine_rows to ensure consistent height for engine options
2026-01-02 17:24:54 +00:00 · 2024-02-15 12:27:13 +01:00
parent 9f88e6d069
commit 57bcf0af8e
7 changed files with 73 additions and 9 deletions
--- a/src/engines/TTSEngine/BaseTTSEngine.py
+++ b/src/engines/TTSEngine/BaseTTSEngine.py
@@ -1,10 +1,23 @@
+import moviepy.editor as mp
 from abc import ABC, abstractmethod
+# Assuming BaseEngine is defined elsewhere in your project
 from ..BaseEngine import BaseEngine


 class BaseTTSEngine(BaseEngine):
-    pass

    @abstractmethod
    def synthesize(self, text: str, path: str) -> str:
        pass
+    
+    def force_duration(self, duration: float, path: str):
+        audio_clip = mp.AudioFileClip(path)
+        
+        if audio_clip.duration > duration:
+            speed_factor = audio_clip.duration / duration
+            
+            new_audio = audio_clip.fx(mp.vfx.speedx, speed_factor, final_duration=duration)
+            
+            new_audio.write_audiofile(path, codec='libmp3lame')
+            
+        audio_clip.close()
--- a/src/engines/TTSEngine/CoquiTTSEngine.py
+++ b/src/engines/TTSEngine/CoquiTTSEngine.py
@@ -90,13 +90,15 @@ class CoquiTTSEngine(BaseTTSEngine):
        "ko",  # Korean
        "hi",  # Hindi
    ]
-    num_options = 2
+    num_options = 4

    def __init__(self, options: list):
        super().__init__()

        self.voice = options[0][0]
        self.language = options[1][0]
+        self.to_force_duration = options[2][0]
+        self.duration = options[3]

        os.environ["COQUI_TOS_AGREED"] = "1"

@@ -106,11 +108,13 @@ class CoquiTTSEngine(BaseTTSEngine):

    def synthesize(self, text: str, path: str) -> str:
        #      self.tts.tts_to_file(text=text, file_path=path, lang=self.language, speaker=self.voice)
+        if self.to_force_duration:
+            self.force_duration(float(self.duration), path)
        return path

    @classmethod
    def get_options(cls) -> list:
-        return [
+        options = [
            gr.Dropdown(
                label="Voice",
                choices=cls.voices,
@@ -124,3 +128,13 @@ class CoquiTTSEngine(BaseTTSEngine):
                value=cls.languages[0],
            ),
        ]
+    
+        duration_checkbox = gr.Checkbox(value=False)
+        duration = gr.Number(label="Duration", value=57, step=1, minimum=10, visible=False)
+        duration_switch = lambda x: gr.update(visible=x)
+        duration_checkbox.change(duration_switch, inputs=[duration_checkbox], outputs=[duration])
+        duration_checkbox_group = gr.CheckboxGroup([duration_checkbox], label="Force duration")
+
+        options.append(duration_checkbox_group)
+        options.append(duration)
+        return options