mirror of
https://github.com/Paillat-dev/viralfactory.git
synced 2026-01-02 09:16:19 +00:00
🐛 fix(GenerationContext.py): fix import statements and add support for captioning engine
✨ feat(GenerationContext.py): add support for captioning engine in the GenerationContext class
The import statement for the `moviepy.editor` module is changed to `moviepy.editor as mp` to improve code readability. Additionally, the `gradio` module is imported as `gr` to improve code readability. The `GenerationContext` class now includes a `captioningengine` parameter and initializes a `captioningengine` attribute. The `setup_dir` method is modified to include a call to create a directory for the output files. The `get_file_path` method is modified to return the file path based on the output directory. The `process` method is modified to include additional steps for captioning. The `timed_script` attribute is added to store the result of the `ttsengine.synthesize` method. The `captioningengine` is used to generate captions and store them in the `captions` attribute. The final video is rendered using the `moviepy` library and saved as "final.mp4" in the output directory.
This commit is contained in:
@@ -16,8 +16,22 @@ class Word(TypedDict):
|
||||
|
||||
class BaseTTSEngine(BaseEngine):
|
||||
@abstractmethod
|
||||
def synthesize(self, text: str, path: str) -> str:
|
||||
def synthesize(self, text: str, path: str) -> list[Word]:
|
||||
pass
|
||||
|
||||
def remove_punctuation(self, text: str) -> str:
|
||||
return text.translate(str.maketrans("", "", ".,!?;:"))
|
||||
|
||||
def fix_captions(self, script: str, captions: list[Word]) -> list[Word]:
|
||||
script = script.split(" ")
|
||||
new_captions = []
|
||||
for i, word in enumerate(script):
|
||||
original_word = self.remove_punctuation(word.lower())
|
||||
stt_word = self.remove_punctuation(word.lower())
|
||||
if stt_word in original_word:
|
||||
captions[i]["text"] = word
|
||||
new_captions.append(captions[i])
|
||||
#elif there is a word more in the stt than in the original, we
|
||||
|
||||
def time_with_whisper(self, path: str) -> list[Word]:
|
||||
"""
|
||||
@@ -46,7 +60,7 @@ class BaseTTSEngine(BaseEngine):
|
||||
"""
|
||||
device = "cuda" if is_available() else "cpu"
|
||||
audio = wt.load_audio(path)
|
||||
model = wt.load_model("tiny", device=device)
|
||||
model = wt.load_model("small", device=device)
|
||||
|
||||
result = wt.transcribe(model=model, audio=audio)
|
||||
results = [word for chunk in result["segments"] for word in chunk["words"]]
|
||||
|
||||
Reference in New Issue
Block a user