Formatting

This commit is contained in:
2024-02-15 17:54:13 +01:00
parent a32f339981
commit 45a48cfa49

View File

@@ -7,52 +7,53 @@ from abc import ABC, abstractmethod
from ..BaseEngine import BaseEngine from ..BaseEngine import BaseEngine
class Word(TypedDict): class Word(TypedDict):
start: str start: str
end: str end: str
text: str text: str
class BaseTTSEngine(BaseEngine):
class BaseTTSEngine(BaseEngine):
@abstractmethod @abstractmethod
def synthesize(self, text: str, path: str) -> str: def synthesize(self, text: str, path: str) -> str:
pass pass
def time_with_whisper(self, path: str) -> list[Word]: def time_with_whisper(self, path: str) -> list[Word]:
""" """
Transcribes the audio file at the given path using a pre-trained model and returns a list of words. Transcribes the audio file at the given path using a pre-trained model and returns a list of words.
Args: Args:
path (str): The path to the audio file. path (str): The path to the audio file.
Returns: Returns:
list[Word]: A list of Word objects representing the transcribed words. list[Word]: A list of Word objects representing the transcribed words.
Example: Example:
```json ```json
[ [
{ {
"start": "0.00", "start": "0.00",
"end": "0.50", "end": "0.50",
"text": "Hello" "text": "Hello"
}, },
{ {
"start": "0.50", "start": "0.50",
"end": "1.00", "end": "1.00",
"text": "world" "text": "world"
} }
] ]
``` ```
""" """
device = "cuda" if is_available() else "cpu" device = "cuda" if is_available() else "cpu"
audio = wt.load_audio(path) audio = wt.load_audio(path)
model = wt.load_model("tiny", device=device) model = wt.load_model("tiny", device=device)
result = wt.transcribe(model=model, audio=audio) result = wt.transcribe(model=model, audio=audio)
results = [word for chunk in result for word in chunk["words"]] results = [word for chunk in result for word in chunk["words"]]
for result in results: for result in results:
# Not needed for the current use case # Not needed for the current use case
del result["confidence"] del result["confidence"]
return results return results
def force_duration(self, duration: float, path: str): def force_duration(self, duration: float, path: str):
""" """
@@ -66,12 +67,14 @@ class BaseTTSEngine(BaseEngine):
None None
""" """
audio_clip = mp.AudioFileClip(path) audio_clip = mp.AudioFileClip(path)
if audio_clip.duration > duration: if audio_clip.duration > duration:
speed_factor = audio_clip.duration / duration speed_factor = audio_clip.duration / duration
new_audio = audio_clip.fx(mp.vfx.speedx, speed_factor, final_duration=duration) new_audio = audio_clip.fx(
mp.vfx.speedx, speed_factor, final_duration=duration
new_audio.write_audiofile(path, codec='libmp3lame') )
audio_clip.close() new_audio.write_audiofile(path, codec="libmp3lame")
audio_clip.close()