mirror of
https://github.com/Paillat-dev/viralfactory.git
synced 2026-01-02 01:06:19 +00:00
270 lines
10 KiB
Python
270 lines
10 KiB
Python
import os
|
|
|
|
import gradio as gr
|
|
import moviepy as mp
|
|
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
|
|
from . import BasePipeline
|
|
from ... import engines
|
|
from ...chore import GenerationContext
|
|
from ...utils.prompting import get_prompt, get_prompts
|
|
|
|
|
|
class ScriptedVideoPipeline(BasePipeline):
|
|
name = "Scripted Long Form Pipeline"
|
|
description = (
|
|
"A pipeline that generates a long form video based on a script instruction."
|
|
)
|
|
num_options = 5
|
|
|
|
def __init__(self, options: list) -> None:
|
|
self.user_instructions = options[0]
|
|
self.assets_instructions = options[1]
|
|
# ratio = options[2] we don't need this
|
|
self.width = options[3]
|
|
self.height = options[4]
|
|
super().__init__()
|
|
|
|
def get_asset(self, asset: dict[str, str | float], i) -> mp.VideoClip:
|
|
if asset["type"] == "stock":
|
|
return self.ctx.stockimageengine.get(
|
|
asset["query"], asset["start"], asset["end"], i
|
|
)
|
|
elif asset["type"] == "ai":
|
|
return self.ctx.aiimageengine.generate(
|
|
asset["prompt"], asset["start"], asset["end"], i
|
|
)
|
|
|
|
def get_assets_concurrent(self, assets: list[dict[str, str]]) -> list[mp.VideoClip]:
|
|
results = []
|
|
with ThreadPoolExecutor() as executor:
|
|
futures = [
|
|
executor.submit(self.get_asset, asset, i)
|
|
for i, asset in enumerate(assets)
|
|
]
|
|
for future in as_completed(futures):
|
|
try:
|
|
results.append(future.result())
|
|
except Exception as e:
|
|
gr.Warning(f"Failed to generate an asset: {e}")
|
|
return results
|
|
|
|
def launch(self, ctx: GenerationContext) -> None:
|
|
|
|
ctx.progress(0.1, "Loading settings...")
|
|
ctx.setup_dir()
|
|
ctx.width = self.width
|
|
ctx.height = self.height
|
|
|
|
prompts = get_prompts("long_form", by_file_location=__file__)
|
|
ctx.progress(0.2, "Generating chapters...")
|
|
system = prompts["chapters"]["system"]
|
|
chat = prompts["chapters"]["chat"]
|
|
chat = chat.replace("{user_instructions}", str(self.user_instructions))
|
|
chapters: list[dict[str, str]] = ctx.powerfulllmengine.generate(
|
|
system_prompt=system,
|
|
chat_prompt=chat,
|
|
json_mode=True,
|
|
temperature=1,
|
|
max_tokens=4096,
|
|
)["chapters"]
|
|
ctx.script = ""
|
|
|
|
text_audio = []
|
|
|
|
ctx.duration = 0
|
|
|
|
for i, chapter in enumerate(chapters):
|
|
ctx.progress(0.2, f"Generating chapter: {chapter['title']}...")
|
|
system = prompts["writer"]["system"]
|
|
chat = prompts["writer"]["chat"]
|
|
chat = (
|
|
chat.replace("{user_instructions}", str(self.user_instructions))
|
|
.replace("{chapter_title}", chapter["title"])
|
|
.replace("{chapter_instructions}", chapter["explanation"])
|
|
)
|
|
script = ctx.powerfulllmengine.generate(
|
|
system_prompt=system,
|
|
chat_prompt=chat,
|
|
temperature=1,
|
|
max_tokens=4096,
|
|
json_mode=True,
|
|
)["chapter"]
|
|
ctx.script += script
|
|
ctx.script += "\n"
|
|
|
|
ctx.progress(0.3, "Synthesizing voice...")
|
|
duration = ctx.ttsengine.synthesize(
|
|
script, ctx.get_file_path(f"tts_{i}.wav")
|
|
)
|
|
audioclip = mp.AudioFileClip(ctx.get_file_path(f"tts_{i}.wav"))
|
|
audioclip = audioclip.with_start(ctx.duration)
|
|
text_audio.append(audioclip)
|
|
ctx.progress(0.2, f"Transcribing chapter: {chapter['title']}...")
|
|
timed_script = ctx.transcriptionengine.transcribe(
|
|
ctx.get_file_path(f"tts_{i}.wav"), fast=False, words=True
|
|
)
|
|
|
|
sentence_split_script = []
|
|
current_sentence = None
|
|
|
|
for word in timed_script.copy():
|
|
if current_sentence is None:
|
|
# Initialize the first sentence
|
|
current_sentence = {
|
|
"text": word["text"],
|
|
"end": word["end"],
|
|
"start": word["start"],
|
|
}
|
|
elif word["text"].endswith((".", "!", "?")):
|
|
# Add the word to the current sentence and finalize it
|
|
current_sentence["text"] += f" {word['text']}"
|
|
current_sentence["end"] = word["end"]
|
|
sentence_split_script.append(current_sentence)
|
|
current_sentence = None # Prepare to start a new sentence
|
|
else:
|
|
# Continue adding words to the current sentence
|
|
current_sentence["text"] += f" {word['text']}"
|
|
current_sentence["end"] = word["end"]
|
|
|
|
# If the last sentence didn't end with a punctuation mark
|
|
if current_sentence is not None:
|
|
sentence_split_script.append(current_sentence)
|
|
|
|
ctx.progress(0.2, f"Generating video for chapter: {chapter['title']}...")
|
|
system = prompts["imager"]["system"]
|
|
chat = prompts["imager"]["chat"]
|
|
chat = chat.replace("{user_instructions}", str(self.user_instructions))
|
|
chat = chat.replace("{assets_instructions}", str(self.assets_instructions))
|
|
chat = chat.replace("{video_transcript}", str(sentence_split_script))
|
|
assets: list[dict[str, str | float]] = ctx.powerfulllmengine.generate(
|
|
system_prompt=system,
|
|
chat_prompt=chat,
|
|
temperature=1,
|
|
max_tokens=4096,
|
|
json_mode=True,
|
|
)["assets"]
|
|
for asset in assets:
|
|
asset["start"] += ctx.duration
|
|
asset["end"] += ctx.duration
|
|
ctx.progress(0.2, f"Generating assets for chapter: {chapter['title']}...")
|
|
clips = self.get_assets_concurrent(assets)
|
|
ctx.index_5.extend(clips)
|
|
|
|
ctx.duration += duration + 0.5
|
|
ctx.audio.extend(text_audio)
|
|
if not isinstance(ctx.audiobackgroundengine, engines.NoneEngine):
|
|
ctx.progress(0.6, "Generating audio background...")
|
|
ctx.audio.append(ctx.audiobackgroundengine.get_background())
|
|
|
|
if not isinstance(ctx.backgroundengine, engines.NoneEngine):
|
|
ctx.progress(0.65, "Generating background...")
|
|
ctx.index_0.append(ctx.backgroundengine.get_background())
|
|
|
|
ctx.progress(0.7, "Rendering video...")
|
|
clips = [
|
|
*ctx.index_0,
|
|
*ctx.index_1,
|
|
*ctx.index_2,
|
|
*ctx.index_3,
|
|
*ctx.index_4,
|
|
*ctx.index_5,
|
|
*ctx.index_6,
|
|
*ctx.index_7,
|
|
*ctx.index_8,
|
|
*ctx.index_9,
|
|
]
|
|
audio = mp.CompositeAudioClip(ctx.audio)
|
|
clip = (
|
|
mp.CompositeVideoClip(clips, size=(ctx.width, ctx.height))
|
|
.with_duration(ctx.duration)
|
|
.with_audio(audio)
|
|
)
|
|
clip.write_videofile(
|
|
ctx.get_file_path("final.mp4"), fps=60, threads=16, codec="av1_nvenc"
|
|
)
|
|
system = prompts["description"]["system"]
|
|
chat = prompts["description"]["chat"]
|
|
chat.replace("{script}", ctx.script)
|
|
metadata = ctx.powerfulllmengine.generate(
|
|
system_prompt=system, chat_prompt=chat, json_mode=True, temperature=1
|
|
)
|
|
ctx.title = metadata["title"]
|
|
ctx.description = metadata["description"]
|
|
|
|
ctx.description = ctx.description + "\n" + ctx.credits
|
|
ctx.progress(0.9, "Uploading video...")
|
|
for engine in ctx.uploadengine:
|
|
try:
|
|
engine.upload(
|
|
ctx.title, ctx.description, ctx.get_file_path("final.mp4")
|
|
)
|
|
except Exception as e:
|
|
gr.Warning(f"{engine.name} failed to upload the video.")
|
|
|
|
ctx.progress(0.99, "Storing in database...")
|
|
ctx.store_in_db()
|
|
ctx.progress(1, "Done!")
|
|
|
|
command = "start" if os.name == "nt" else "open"
|
|
os.system(f"{command} {os.path.abspath(ctx.dir)}")
|
|
|
|
@classmethod
|
|
def get_options(cls):
|
|
def change_resolution(chosen_ratio: str) -> list[gr.update]:
|
|
match chosen_ratio:
|
|
case "1920x1080":
|
|
return [
|
|
gr.update(value=1920, visible=False),
|
|
gr.update(value=1080, visible=False),
|
|
]
|
|
case "1080x1920":
|
|
return [
|
|
gr.update(value=1080, visible=False),
|
|
gr.update(value=1920, visible=False),
|
|
]
|
|
case "1280x720":
|
|
return [
|
|
gr.update(value=1280, visible=False),
|
|
gr.update(value=720, visible=False),
|
|
]
|
|
case "720x1280":
|
|
return [
|
|
gr.update(value=720, visible=False),
|
|
gr.update(value=1280, visible=False),
|
|
]
|
|
case "custom":
|
|
return [gr.update(visible=True), gr.update(visible=True)]
|
|
|
|
with gr.Row():
|
|
ratio = gr.Dropdown(
|
|
choices=["1920x1080", "1080x1920", "1280x720", "720x1280", "custom"],
|
|
label="Resolution",
|
|
)
|
|
width = gr.Number(
|
|
value=1080, minimum=720, maximum=3840, label="Width", step=1
|
|
)
|
|
height = gr.Number(
|
|
value=1920, minimum=720, maximum=3840, label="Height", step=1
|
|
)
|
|
ratio.change(change_resolution, inputs=[ratio], outputs=[width, height])
|
|
|
|
return [
|
|
gr.Textbox(
|
|
lines=4,
|
|
max_lines=6,
|
|
label="Video instructions",
|
|
info="Explain what the video should be about, how many chapters, and any specific instructions.",
|
|
),
|
|
gr.Textbox(
|
|
lines=4,
|
|
max_lines=6,
|
|
label="Assets only instructions",
|
|
info="Explain how the assets should be used in the video. When, how many, and of what type (stock images, AI or both)",
|
|
),
|
|
ratio,
|
|
width,
|
|
height,
|
|
]
|