mirror of
https://github.com/Paillat-dev/FABLE.git
synced 2026-01-02 01:06:20 +00:00
feat(audio_prompts): add default audio prompts for narrator
feat(audio_prompts): add en_narrator_deep audio prompt for narrator feat(audio_prompts): add en_narrator_light_bg audio prompt for narrator fix(video.py): fix indentation and add prompt for generating thumbnail fix(montage.py): fix indentation and add prompt for generating thumbnail fix(montage.py): fix image download for wikimage slides fix(speak.py): remove unused import statement fix(speak.py): remove unused variable 'fakenames' feat(speak.py): add function 'remove_blank_moments' to remove silent parts from audio file feat(speak.py): add function 'optimize_string_groups' to optimize string groups for audio generation fix(speak.py): fix comment indentation in 'generate_voice' function fix(speak.py): remove unused imports in 'generate_voice' function fix(speak.py): remove unused variable 'reduced_noise' in 'generate_voice' function fix(speak.py): remove unused import statements in 'generate_voice' function fix(speak.py): remove unused import statement for 'logging' module fix(speak.py): remove unused print statements in 'main' function fix(speak.py): remove unused import statement for 'logging' module fix(speak.py): remove unused print statements in 'main' function fix(speak.py): fix(wiki_downloader.py): fix Google search URL to include correct query parameter fix(wiki_downloader.py): reduce sleep time after page load to 1 second fix(wiki_downloader.py): increase sleep time after image click to 5 seconds
This commit is contained in:
@@ -10,7 +10,7 @@ from moviepy.editor import concatenate_videoclips, CompositeAudioClip, concatena
|
||||
from moviepy.audio.io.AudioFileClip import AudioFileClip
|
||||
from moviepy.audio.fx.all import volumex, audio_fadein, audio_fadeout # type: ignore
|
||||
from utils.misc import getenv
|
||||
|
||||
from utils.wiki_downloader import download_image as wiki_download_image
|
||||
|
||||
unsplash_access = getenv("unsplash_access_key")
|
||||
if not unsplash_access:
|
||||
@@ -22,64 +22,82 @@ async def prepare(path):
|
||||
script = json.load(f)
|
||||
f.close()
|
||||
if not os.path.exists(path + "/slides"): os.mkdir(path + "/slides")
|
||||
fresh = False
|
||||
if not os.path.exists(path + "/audio"):
|
||||
os.mkdir(path + "/audio")
|
||||
fresh = True
|
||||
with open("prompts/marp.md", 'r', encoding='utf-8') as f:
|
||||
if not os.path.exists(path + "/audio"): os.mkdir(path + "/audio")
|
||||
choosen_voice = random.choice(voices)
|
||||
with open(os.path.join(os.getcwd(), "prompts", "marp.md"), 'r', encoding='utf-8') as f:
|
||||
marp = f.read()
|
||||
f.close()
|
||||
if fresh:
|
||||
choosen_voice = random.choice(voices)
|
||||
generator = VoiceGenerator(speaker=choosen_voice)
|
||||
for i in range(len(script)):
|
||||
audio_path = path + "/audio/audio" + str(i) + ".wav"
|
||||
if not os.path.exists(audio_path):
|
||||
generator.generate_voice(audio_path, script[i]['spoken'])
|
||||
if "image" in script[i]:
|
||||
if os.path.exists(path + "/slides/assets/slide" + str(i) + ".md"):
|
||||
#skip this slide
|
||||
f.close()
|
||||
for i in range(len(script)):
|
||||
audio_path = os.path.join(path, "audio", "audio" + str(i) + ".wav")
|
||||
generator = None
|
||||
if not os.path.exists(audio_path):
|
||||
if not generator:
|
||||
generator = VoiceGenerator(speaker=choosen_voice)
|
||||
print("Generating audio for slide " + str(i))
|
||||
generator.generate_voice(audio_path, script[i]['spoken'])
|
||||
if "image" in script[i]:
|
||||
if os.path.exists(os.path.join(path, "slides", "slide" + str(i) + ".md")) and os.path.exists(os.path.join(path, "slides", "slide" + str(i) + ".png")):
|
||||
#skip this slide
|
||||
continue
|
||||
if not os.path.exists(path + "/slides/assets"):
|
||||
os.mkdir(path + "/slides/assets")
|
||||
url= unsplash_url + script[i]['image'].replace("+", ",")
|
||||
r = requests.get(url)
|
||||
real_url = r.json()['urls']['raw']
|
||||
with open(path + "/slides/assets/slide" + str(i) + ".jpg", 'wb') as f:
|
||||
f.write(requests.get(real_url).content)
|
||||
f.close()
|
||||
content = marp + f"\n\n"
|
||||
with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
elif "wikimage" in script[i]:
|
||||
if os.path.exists(os.path.join(path, "slides", "slide" + str(i) + ".md")) and os.path.exists(os.path.join(path, "slides", "slide" + str(i) + ".png")):
|
||||
#skip this slide
|
||||
continue
|
||||
if not os.path.exists(path + "/slides/assets"):
|
||||
os.mkdir(path + "/slides/assets")
|
||||
r = 0
|
||||
while True:
|
||||
try:
|
||||
print("Trying to download image for slide " + str(i))
|
||||
wiki_download_image(script[i]['wikimage'], os.path.abspath(os.path.join(path, "slides", "assets", "slide" + str(i) + ".jpg")))
|
||||
print("Downloaded image for slide with wikiimage " + str(i))
|
||||
break
|
||||
except:
|
||||
r += 1
|
||||
if r > 5:
|
||||
break
|
||||
continue
|
||||
if not os.path.exists(path + "/slides/assets"):
|
||||
os.mkdir(path + "/slides/assets")
|
||||
url= unsplash_url + script[i]['image']
|
||||
r = requests.get(url)
|
||||
real_url = r.json()['urls']['raw']
|
||||
with open(path + "/slides/assets/slide" + str(i) + ".jpg", 'wb') as f:
|
||||
f.write(requests.get(real_url).content)
|
||||
f.close()
|
||||
content = marp + f"\n\n"
|
||||
with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
elif "markdown" in script[i]:
|
||||
if os.path.exists(path + "/slides/slide" + str(i) + ".md"):
|
||||
#skip this slide
|
||||
continue
|
||||
with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f:
|
||||
f.write(marp + "\n\n" + script[i]['markdown'])
|
||||
elif "huge" in script[i]:
|
||||
#use fit
|
||||
if os.path.exists(path + "/slides/slide" + str(i) + ".md"):
|
||||
#skip this slide
|
||||
continue
|
||||
with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f:
|
||||
f.write(marp + "\n\n# <!-- fit --> " + script[i]['huge'])
|
||||
else:
|
||||
if os.path.exists(path + "/slides/slide" + str(i) + ".md"):
|
||||
#skip this slide
|
||||
continue
|
||||
with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f:
|
||||
f.write(marp + "\n\n") # blank slide
|
||||
content = marp + f"\n\n"
|
||||
with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
elif "markdown" in script[i]:
|
||||
if os.path.exists(path + "/slides/slide" + str(i) + ".md") and os.path.exists(path + "/slides/slide" + str(i) + ".png"):
|
||||
#skip this slide
|
||||
continue
|
||||
with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f:
|
||||
f.write(marp + "\n\n" + script[i]['markdown'])
|
||||
elif "huge" in script[i]:
|
||||
#use fit
|
||||
if os.path.exists(path + "/slides/slide" + str(i) + ".md") and os.path.exists(path + "/slides/slide" + str(i) + ".png"):
|
||||
#skip this slide
|
||||
continue
|
||||
with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f:
|
||||
f.write(marp + "\n\n# <!-- fit --> " + script[i]['huge'])
|
||||
else:
|
||||
if os.path.exists(path + "/slides/slide" + str(i) + ".md") and os.path.exists(path + "/slides/slide" + str(i) + ".png"):
|
||||
#skip this slide
|
||||
continue
|
||||
with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f:
|
||||
f.write(marp + "\n\n") # blank slide
|
||||
for i in range(len(script)):
|
||||
markdown_path = os.path.join(path, f"slides/slide{i}.md")
|
||||
markdown_path = os.path.abspath(markdown_path)
|
||||
image_path = os.path.join(path, f"slides/slide{i}.png")
|
||||
image_path = os.path.abspath(image_path)
|
||||
if os.path.exists(markdown_path):
|
||||
#skip this slide
|
||||
continue
|
||||
command = f'marp.exe "{markdown_path}" -o "{image_path}" --allow-local-files'
|
||||
os.system(command)
|
||||
if not os.path.exists(image_path):
|
||||
command = f'marp.exe "{markdown_path}" -o "{image_path}" --allow-local-files'
|
||||
os.system(command)
|
||||
return script
|
||||
|
||||
def convert_seconds_to_time_string(seconds):
|
||||
@@ -113,8 +131,6 @@ async def mount(path, script):
|
||||
])
|
||||
length = complete_audio.duration
|
||||
total_length += length
|
||||
print(script[i])
|
||||
print(script[i]['spoken'])
|
||||
srt = subs(length, total_length, script[i]['spoken'], srt, i)
|
||||
slide = ImageClip(path + "/slides/slide" + str(i) + ".png").set_duration(length)
|
||||
slide = slide.set_audio(complete_audio)
|
||||
@@ -122,7 +138,7 @@ async def mount(path, script):
|
||||
randmusic = random.choice(os.listdir("musics"))
|
||||
while randmusic.endswith(".txt"): randmusic = random.choice(os.listdir("musics"))
|
||||
randpath = "musics/" + randmusic
|
||||
music = AudioFileClip(randpath).set_duration(total_length)
|
||||
music = AudioFileClip(randpath)
|
||||
music = audio_fadein(music, 20)
|
||||
music = audio_fadeout(music, 20)
|
||||
music = volumex(music, 0.2)
|
||||
@@ -131,6 +147,7 @@ async def mount(path, script):
|
||||
for i in range(int(total_length / music.duration)):
|
||||
musics.append(music)
|
||||
music = concatenate_audioclips(musics)
|
||||
music = music.set_duration(total_length)
|
||||
final_clip = concatenate_videoclips(clips, method="compose")
|
||||
existing_audio = final_clip.audio
|
||||
final_audio = CompositeAudioClip([existing_audio, music])
|
||||
@@ -142,4 +159,4 @@ async def mount(path, script):
|
||||
f.close()
|
||||
return music_credit or ""
|
||||
else:
|
||||
return None
|
||||
return ""
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
import os
|
||||
from pydub import AudioSegment, silence
|
||||
|
||||
fakenames = {
|
||||
"Alexander": "p230",
|
||||
@@ -11,16 +11,70 @@ fakenames = {
|
||||
|
||||
voices = ["Alexander", "Benjamin", "Amelia", "Katherine", "Johanne"]
|
||||
|
||||
|
||||
def remove_blank_moments(file_path, silence_thresh= -50, silence_chunk_len=500):
|
||||
# Load audio file
|
||||
audio = AudioSegment.from_wav(file_path)
|
||||
|
||||
# Detect non-silent parts
|
||||
nonsilent_data = silence.detect_nonsilent(audio, min_silence_len=silence_chunk_len, silence_thresh=silence_thresh)
|
||||
|
||||
# Create new audio file
|
||||
final_audio = AudioSegment.empty()
|
||||
|
||||
# Iterate over non-silent parts and append to the final_audio with 0.5 seconds before and after each segment
|
||||
for idx, (start_i, end_i) in enumerate(nonsilent_data):
|
||||
start_i = max(0, start_i - 500) # 0.5 seconds before
|
||||
end_i += 500 # 0.5 seconds after
|
||||
|
||||
segment = audio[start_i:end_i]
|
||||
|
||||
# Only append silence after the first segment
|
||||
if idx > 0:
|
||||
final_audio += AudioSegment.silent(duration=500)
|
||||
|
||||
final_audio += segment
|
||||
# Save the result
|
||||
if not os.path.exists(os.path.abspath(os.path.join(os.getcwd(), "temp"))):
|
||||
os.mkdir(os.path.abspath(os.path.join(os.getcwd(), "temp")))
|
||||
tempfile_path = os.path.abspath(os.path.join(os.getcwd(), "temp", "temp.wav"))
|
||||
final_audio.export(tempfile_path, format="wav")
|
||||
os.remove(file_path)
|
||||
os.rename(tempfile_path, file_path)
|
||||
|
||||
|
||||
def optimize_string_groups(strings):
|
||||
optimized_groups = []
|
||||
current_group = []
|
||||
current_length = 0
|
||||
|
||||
for string in strings:
|
||||
string_length = len(string) + len(current_group) # Account for spaces between strings
|
||||
if current_length + string_length <= 100:
|
||||
current_group.append(string)
|
||||
current_length += string_length
|
||||
else:
|
||||
optimized_groups.append(' '.join(current_group)) # Join strings with spaces
|
||||
current_group = [string]
|
||||
current_length = len(string)
|
||||
|
||||
if current_group:
|
||||
optimized_groups.append(' '.join(current_group))
|
||||
|
||||
return optimized_groups
|
||||
|
||||
class VoiceGenerator:
|
||||
def __init__(self, mode="Bark", speaker=""):
|
||||
self.mode = mode
|
||||
self.speaker = speaker
|
||||
if mode == "Bark":
|
||||
os.environ["XDG_CACHE_HOME"] = os.path.join(os.getcwd(), "bark_cache")
|
||||
from bark import preload_models, generation
|
||||
|
||||
from bark import preload_models
|
||||
print("Loading Bark voice generator")
|
||||
preload_models()
|
||||
self.speaker = "v2/en_speaker_6"
|
||||
#self.speaker = os.path.abspath(os.path.join(os.getcwd(), "audio_prompts", "en_male_professional_reader.npz"))
|
||||
self.speaker = os.path.join(os.getcwd(), "audio_prompts", "en_narrator_light_bg.npz")
|
||||
print(f"Generating voice for Bark with speaker {self.speaker}")
|
||||
else:
|
||||
from TTS.api import TTS
|
||||
model = "tts_models/en/vctk/vits"
|
||||
@@ -43,20 +97,27 @@ class VoiceGenerator:
|
||||
import numpy as np
|
||||
import nltk
|
||||
sentences = nltk.sent_tokenize(text)
|
||||
sentences = optimize_string_groups(sentences)
|
||||
print(sentences)
|
||||
pieces = []
|
||||
silence = np.zeros(int(0.25 * SAMPLE_RATE)) # quarter second of silence
|
||||
for sentence in sentences:
|
||||
audio_array = generate_audio(sentence, history_prompt=self.speaker)
|
||||
pieces += [audio_array, silence.copy()]
|
||||
if not sentence == "":
|
||||
audio_array = generate_audio(sentence, history_prompt=self.speaker)
|
||||
pieces += [audio_array, silence.copy()]
|
||||
audio_array = np.concatenate(pieces)
|
||||
soundfile.write(path, audio_array, SAMPLE_RATE, format="WAV", subtype="PCM_16")
|
||||
rate, data = wavread(path)
|
||||
reduced_noise = nr.reduce_noise(y=data, sr=rate)
|
||||
os.remove(path)
|
||||
wavwrite(path, rate, reduced_noise)
|
||||
'''
|
||||
remove silence
|
||||
'''
|
||||
remove_blank_moments(path)
|
||||
else:
|
||||
self.tts.tts_to_file(text=text, file_path=path, speaker=self.speaker, speed=1, emotion="Happy")
|
||||
if __name__ == "__main__":
|
||||
import logging
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
print("Testing voice generator")
|
||||
generator = VoiceGenerator()
|
||||
generator.generate_voice("test/test_r.wav", "Hello there!")
|
||||
generator.generate_voice("test/teste_r.wav", "This is a test. I like the words python, django and flask. Betty bought a bit of butter but the butter was bitter. So she bought some better butter to make the bitter butter better.")
|
||||
print("Loaded voice generator")
|
||||
# generator.generate_voice("test/test_r.wav", "Hello there!")
|
||||
generator.generate_voice("test/tast_timbernerslee.wav", "But his greatest claim to fame is undoubtedly his invention of the World Wide Web back in 1989. Can you imagine a world without the internet? [Laughs] No, thank you!")
|
||||
@@ -28,6 +28,11 @@ Answer without anything else, just with the 2 textes. Answer with text1 on the f
|
||||
Here is the title of the video: [TITLE]
|
||||
Here is the description of the video: [DESCRIPTION]'''
|
||||
|
||||
|
||||
# TODO: make jpg qith 90% quality default when generating the image to avoid having to convert it later
|
||||
|
||||
|
||||
|
||||
async def rand_gradient(image):
|
||||
randr = random.SystemRandom().randint(1, 20)
|
||||
randg = random.SystemRandom().randint(1, 20)
|
||||
@@ -110,11 +115,20 @@ async def generate_image(path, text1, text2):
|
||||
drawtext2.text((imgtext2.size[0]//8*2.5, imgtext2.size[1]//5*2), text2def, font=font2, fill=(textcolor2[0], textcolor2[1], textcolor2[2]))
|
||||
imgtext2 = imgtext2.rotate(5, expand=True)
|
||||
#paste the textes on the image
|
||||
img.paste(bcg, (0, 0), bcg)
|
||||
bcg = bcg.convert('RGBA')
|
||||
#also set the bcg size to the image size
|
||||
bcg = bcg.resize((1920, 1080))
|
||||
img.paste(bcg, (0, 0), bcg) # TODO: make it work with standard pngs (non rgba)
|
||||
img.paste(imgtext1, (0, 0-img.size[1]//8), imgtext1)
|
||||
if len(text1def.split("\n")) > 2: #if the text is too long, put the second text on the third line
|
||||
img.paste(imgtext2, (0, img.size[1]//8), imgtext2)
|
||||
else:
|
||||
img.paste(imgtext2, (0, 0), imgtext2)
|
||||
img.save(path + "/miniature.png")
|
||||
return path + "/miniature.png"
|
||||
#disable the alpha channel
|
||||
img = img.convert('RGB')
|
||||
img_path = os.path.abspath(os.path.join(path, "thumbnail.jpg"))
|
||||
for quality in range(100, 0, -1):
|
||||
img.save(img_path, quality=quality)
|
||||
if os.path.getsize(img_path) < 2000000:
|
||||
break
|
||||
return img_path
|
||||
Reference in New Issue
Block a user