🐛 fix(GenerationContext.py): fix import statements and add support for captioning engine

 feat(GenerationContext.py): add support for captioning engine in the GenerationContext class
The import statement for the `moviepy.editor` module is changed to `moviepy.editor as mp` to improve code readability. Additionally, the `gradio` module is imported as `gr` to improve code readability. The `GenerationContext` class now includes a `captioningengine` parameter and initializes a `captioningengine` attribute. The `setup_dir` method is modified to include a call to create a directory for the output files. The `get_file_path` method is modified to return the file path based on the output directory. The `process` method is modified to include additional steps for captioning. The `timed_script` attribute is added to store the result of the `ttsengine.synthesize` method. The `captioningengine` is used to generate captions and store them in the `captions` attribute. The final video is rendered using the `moviepy` library and saved as "final.mp4" in the output directory.
This commit is contained in:
2024-02-17 18:47:30 +01:00
parent eedbc99121
commit e3229518d4
12 changed files with 261 additions and 34 deletions

View File

@@ -0,0 +1,95 @@
import gradio as gr
from moviepy.editor import TextClip
from PIL import ImageFont
from . import BaseCaptioningEngine
class SimpleCaptioningEngine(BaseCaptioningEngine):
name = "SimpleCaptioningEngine"
description = "A basic captioning engine with nothing too fancy."
num_options = 5
def __init__(self, options: list[list | tuple | str | int | float | bool | None]):
self.font = options[0]
self.font_size = options[1]
self.stroke_width = options[2]
self.font_color = options[3]
self.stroke_color = options[4]
super().__init__()
def build_caption_object(self, text: str, start: float, end: float) -> TextClip:
return TextClip(
text,
fontsize=self.font_size,
color=self.font_color,
font=self.font,
method="caption",
size=(self.ctx.width /3 * 2, None),
).set_position(('center', 0.65), relative=True).set_start(start).set_duration(end - start)
def ends_with_punctuation(self, text: str) -> bool:
punctuations = (".", "?", "!", ",", ":", ";")
return text.strip().endswith(tuple(punctuations))
def get_captions(self) -> list[TextClip]:
#3 words per 1000 px, we do the math
max_words = int(self.ctx.width / 1000 * 3)
clips = []
words = (
self.ctx.timed_script.copy()
) # List of dicts with "start", "end", and "text"
current_line = ""
current_start = words[0]["start"]
current_end = words[0]["end"]
for i, word in enumerate(words):
# Use PIL to measure the text size
line_with_new_word = (
current_line + " " + word["text"] if current_line else word["text"]
)
pause = self.ends_with_punctuation(current_line.strip())
if len(line_with_new_word.split(" ")) > max_words or pause:
clips.append(self.build_caption_object(current_line.strip(), current_start, current_end))
current_line = word["text"] # Start a new line with the current word
current_start = word["start"]
current_end = word["end"]
else:
# If the line isn't too long, add the word to the current line
current_line = line_with_new_word
current_end = word["end"]
# Don't forget to add the last line if it exists
if current_line:
clips.append(
self.build_caption_object(current_line.strip(), current_start, words[-1]["end"])
)
return clips
@classmethod
def get_options(cls) -> list:
with gr.Column() as font_options:
with gr.Group():
font = gr.Dropdown(
label="Font",
choices=TextClip.list('font'),
value="Arial",
)
font_size = gr.Number(
label="Font Size",
minimum=70,
maximum=200,
step=1,
value=110,
)
font_color = gr.ColorPicker(label="Font Color", value="#ffffff")
with gr.Column() as font_stroke_options:
with gr.Group():
font_stroke_width = gr.Number(
label="Stroke Width",
minimum=0,
maximum=40,
step=1,
value=4,
)
font_stroke_color = gr.ColorPicker(label="Stroke Color", value="#000000")
return [font, font_size, font_stroke_width, font_color, font_stroke_color]