Files
viralfactory/src/engines/TTSEngine/BaseTTSEngine.py

45 lines
1.0 KiB
Python
Raw Normal View History

2024-02-23 13:12:48 +01:00
from abc import abstractmethod
2024-02-23 09:50:43 +01:00
from typing import TypedDict
2024-03-02 15:19:30 +01:00
import moviepy as mp
2024-02-15 14:11:16 +01:00
import whisper_timestamped as wt
from torch.cuda import is_available
2024-02-13 14:15:27 +01:00
from ..BaseEngine import BaseEngine
2024-02-15 17:54:13 +01:00
2024-02-15 14:11:16 +01:00
class Word(TypedDict):
start: str
end: str
text: str
2024-02-13 14:15:27 +01:00
2024-02-15 17:54:13 +01:00
class BaseTTSEngine(BaseEngine):
2024-02-13 14:15:27 +01:00
@abstractmethod
2024-04-21 21:51:05 +02:00
def synthesize(self, text: str, path: str) -> float:
2024-02-14 17:49:51 +01:00
pass
2024-02-20 14:47:54 +01:00
def force_duration(self, duration: float, path: str):
2024-02-15 14:11:16 +01:00
"""
Forces the audio clip at the given path to have the specified duration.
Args:
duration (float): The desired duration in seconds.
path (str): The path to the audio clip file.
Returns:
None
"""
audio_clip = mp.AudioFileClip(path)
2024-02-15 17:54:13 +01:00
if audio_clip.duration > duration:
speed_factor = audio_clip.duration / duration
2024-02-15 17:54:13 +01:00
new_audio = audio_clip.fx(
mp.vfx.speedx, speed_factor, final_duration=duration
)
new_audio.write_audiofile(path, codec="libmp3lame")
audio_clip.close()