Formatting

2026-03-03 02:14:54 +00:00 · 2024-02-15 17:54:13 +01:00
parent a32f339981
commit 45a48cfa49
1 changed files with 44 additions and 41 deletions
--- a/src/engines/TTSEngine/BaseTTSEngine.py
+++ b/src/engines/TTSEngine/BaseTTSEngine.py
@@ -7,52 +7,53 @@ from abc import ABC, abstractmethod
 from ..BaseEngine import BaseEngine
 class Word(TypedDict):
    start: str
    end: str
    text: str
 class BaseTTSEngine(BaseEngine):
 class BaseTTSEngine(BaseEngine):
    @abstractmethod
    def synthesize(self, text: str, path: str) -> str:
        pass
-    
+
    def time_with_whisper(self, path: str) -> list[Word]:
-            """
+        """
-            Transcribes the audio file at the given path using a pre-trained model and returns a list of words.
+        Transcribes the audio file at the given path using a pre-trained model and returns a list of words.
-            Args:
+        Args:
-                path (str): The path to the audio file.
+            path (str): The path to the audio file.
-            Returns:
+        Returns:
-                list[Word]: A list of Word objects representing the transcribed words.
+            list[Word]: A list of Word objects representing the transcribed words.
-                Example:
+            Example:
-                ```json
+            ```json
-                [
+            [
-                    {
+                {
-                        "start": "0.00",
+                    "start": "0.00",
-                        "end": "0.50",
+                    "end": "0.50",
-                        "text": "Hello"
+                    "text": "Hello"
-                    },
+                },
-                    {
+                {
-                        "start": "0.50",
+                    "start": "0.50",
-                        "end": "1.00",
+                    "end": "1.00",
-                        "text": "world"
+                    "text": "world"
-                    }
+                }
-                ]
+            ]
-                ```
+            ```
-            """
+        """
-            device = "cuda" if is_available() else "cpu"
+        device = "cuda" if is_available() else "cpu"
-            audio = wt.load_audio(path)
+        audio = wt.load_audio(path)
-            model = wt.load_model("tiny", device=device)
+        model = wt.load_model("tiny", device=device)
-            
+
-            result = wt.transcribe(model=model, audio=audio)
+        result = wt.transcribe(model=model, audio=audio)
-            results = [word for chunk in result for word in chunk["words"]]
+        results = [word for chunk in result for word in chunk["words"]]
-            for result in results:
+        for result in results:
-                # Not needed for the current use case
+            # Not needed for the current use case
-                del result["confidence"]
+            del result["confidence"]
-            return results
+        return results
    def force_duration(self, duration: float, path: str):
        """
@@ -66,12 +67,14 @@ class BaseTTSEngine(BaseEngine):
            None
        """
        audio_clip = mp.AudioFileClip(path)
-        
+
        if audio_clip.duration > duration:
            speed_factor = audio_clip.duration / duration
-            
+
-            new_audio = audio_clip.fx(mp.vfx.speedx, speed_factor, final_duration=duration)
+            new_audio = audio_clip.fx(
-            
+                mp.vfx.speedx, speed_factor, final_duration=duration
-            new_audio.write_audiofile(path, codec='libmp3lame')
+            )
-            
+
-        audio_clip.close()
+            new_audio.write_audiofile(path, codec="libmp3lame")
        audio_clip.close()