mirror of
https://github.com/Paillat-dev/FABLE.git
synced 2026-01-02 01:06:20 +00:00
chore(.gitignore): add bark_cache directory to gitignore
feat(video.py): use wav format instead of mp3 for generated audio files feat(montage.py): use Bark TTS instead of 🐸TTS feat(speak.py): add support for Bark TTS fix(speak.py): remove unused 🐸TTS import and variable fix(main.py): fix asyncio.run() call placement docs: update requirements.txt with new dependencies
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -162,3 +162,4 @@ ideas/
|
|||||||
montageTEMP_MPY_wvf_snd.mp3
|
montageTEMP_MPY_wvf_snd.mp3
|
||||||
marp.exe
|
marp.exe
|
||||||
channels/
|
channels/
|
||||||
|
bark_cache/
|
||||||
@@ -31,7 +31,7 @@ class Video:
|
|||||||
os.makedirs( self.path)
|
os.makedirs( self.path)
|
||||||
script = None
|
script = None
|
||||||
if os.path.exists(os.path.join( self.path, "script.json")):
|
if os.path.exists(os.path.join( self.path, "script.json")):
|
||||||
if input("Video script already exists. Do you want to overwrite it ? (y/N) : ") == "y":
|
if input("Video script already exists. Do you want to overwrite it ? (y/N) : ").lower() == "y":
|
||||||
os.remove(os.path.join( self.path, "script.json"))
|
os.remove(os.path.join( self.path, "script.json"))
|
||||||
|
|
||||||
if not os.path.exists(os.path.join( self.path, "script.json")):
|
if not os.path.exists(os.path.join( self.path, "script.json")):
|
||||||
@@ -41,14 +41,17 @@ class Video:
|
|||||||
script_prompt = f.read()
|
script_prompt = f.read()
|
||||||
f.close()
|
f.close()
|
||||||
if script_prompt:
|
if script_prompt:
|
||||||
|
printm("Using custom script prompt")
|
||||||
script = await generate_script(self.idea['title'], self.idea['description'], script_prompt)
|
script = await generate_script(self.idea['title'], self.idea['description'], script_prompt)
|
||||||
else:
|
else:
|
||||||
|
printm("Using default script prompt")
|
||||||
script = await generate_script(self.idea['title'], self.idea['description'])
|
script = await generate_script(self.idea['title'], self.idea['description'])
|
||||||
|
script = json.loads(script)
|
||||||
with open(os.path.join( self.path, "script.json"), "w") as f:
|
with open(os.path.join( self.path, "script.json"), "w") as f:
|
||||||
json.dump(json.loads(script), f)
|
json.dump(script, f)
|
||||||
f.close()
|
f.close()
|
||||||
else:
|
else:
|
||||||
with open(os.path.join( self.path, "script.json"), "r") as f:
|
with open(os.path.join(self.path, "script.json"), "r") as f:
|
||||||
script = json.load(f)
|
script = json.load(f)
|
||||||
f.close()
|
f.close()
|
||||||
await prepare( self.path)
|
await prepare( self.path)
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ import requests
|
|||||||
import pysrt
|
import pysrt
|
||||||
import random
|
import random
|
||||||
|
|
||||||
from generators.speak import generate_voice, voices
|
from generators.speak import VoiceGenerator, voices
|
||||||
from moviepy.video.VideoClip import ImageClip
|
from moviepy.video.VideoClip import ImageClip
|
||||||
from moviepy.editor import concatenate_videoclips, CompositeAudioClip, concatenate_audioclips
|
from moviepy.editor import concatenate_videoclips, CompositeAudioClip, concatenate_audioclips
|
||||||
from moviepy.audio.io.AudioFileClip import AudioFileClip
|
from moviepy.audio.io.AudioFileClip import AudioFileClip
|
||||||
@@ -31,10 +31,11 @@ async def prepare(path):
|
|||||||
f.close()
|
f.close()
|
||||||
if fresh:
|
if fresh:
|
||||||
choosen_voice = random.choice(voices)
|
choosen_voice = random.choice(voices)
|
||||||
|
generator = VoiceGenerator(speaker=choosen_voice)
|
||||||
for i in range(len(script)):
|
for i in range(len(script)):
|
||||||
audio_path = path + "/audio/audio" + str(i) + ".mp3"
|
audio_path = path + "/audio/audio" + str(i) + ".wav"
|
||||||
if not os.path.exists(audio_path):
|
if not os.path.exists(audio_path):
|
||||||
generate_voice(audio_path, script[i]['spoken'], choosen_voice)
|
generator.generate_voice(audio_path, script[i]['spoken'])
|
||||||
if "image" in script[i]:
|
if "image" in script[i]:
|
||||||
if os.path.exists(path + "/slides/assets/slide" + str(i) + ".md"):
|
if os.path.exists(path + "/slides/assets/slide" + str(i) + ".md"):
|
||||||
#skip this slide
|
#skip this slide
|
||||||
@@ -70,11 +71,14 @@ async def prepare(path):
|
|||||||
with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f:
|
with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f:
|
||||||
f.write(marp + "\n\n") # blank slide
|
f.write(marp + "\n\n") # blank slide
|
||||||
for i in range(len(script)):
|
for i in range(len(script)):
|
||||||
marrkdown_path = os.path.join(path, f"slides/slide{i}.md")
|
markdown_path = os.path.join(path, f"slides/slide{i}.md")
|
||||||
if os.path.exists(f"./{path}/slides/slide{i}.png"):
|
markdown_path = os.path.abspath(markdown_path)
|
||||||
|
image_path = os.path.join(path, f"slides/slide{i}.png")
|
||||||
|
image_path = os.path.abspath(image_path)
|
||||||
|
if os.path.exists(markdown_path):
|
||||||
#skip this slide
|
#skip this slide
|
||||||
continue
|
continue
|
||||||
command = f"marp.exe {marrkdown_path} -o {path}/slides/slide{i}.png --allow-local-files"
|
command = f'marp.exe "{markdown_path}" -o "{image_path}" --allow-local-files'
|
||||||
os.system(command)
|
os.system(command)
|
||||||
return script
|
return script
|
||||||
|
|
||||||
@@ -101,7 +105,7 @@ async def mount(path, script):
|
|||||||
srt = pysrt.SubRipFile()
|
srt = pysrt.SubRipFile()
|
||||||
total_length = 0
|
total_length = 0
|
||||||
for i in range(num_slides):
|
for i in range(num_slides):
|
||||||
audio = AudioFileClip(path + "/audio/audio" + str(i) + ".mp3")
|
audio = AudioFileClip(path + "/audio/audio" + str(i) + ".wav")
|
||||||
complete_audio = CompositeAudioClip([
|
complete_audio = CompositeAudioClip([
|
||||||
AudioFileClip("silence.mp3").set_duration(1),
|
AudioFileClip("silence.mp3").set_duration(1),
|
||||||
audio,
|
audio,
|
||||||
@@ -109,6 +113,8 @@ async def mount(path, script):
|
|||||||
])
|
])
|
||||||
length = complete_audio.duration
|
length = complete_audio.duration
|
||||||
total_length += length
|
total_length += length
|
||||||
|
print(script[i])
|
||||||
|
print(script[i]['spoken'])
|
||||||
srt = subs(length, total_length, script[i]['spoken'], srt, i)
|
srt = subs(length, total_length, script[i]['spoken'], srt, i)
|
||||||
slide = ImageClip(path + "/slides/slide" + str(i) + ".png").set_duration(length)
|
slide = ImageClip(path + "/slides/slide" + str(i) + ".png").set_duration(length)
|
||||||
slide = slide.set_audio(complete_audio)
|
slide = slide.set_audio(complete_audio)
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
from TTS.api import TTS
|
|
||||||
|
|
||||||
# Running a multi-speaker and multi-lingual model
|
import os
|
||||||
|
|
||||||
# List available 🐸TTS models and choose the first one
|
|
||||||
model_best_multi = "tts_models/en/vctk/vits"
|
|
||||||
fakenames = {
|
fakenames = {
|
||||||
"Alexander": "p230",
|
"Alexander": "p230",
|
||||||
"Benjamin": "p240",
|
"Benjamin": "p240",
|
||||||
@@ -14,17 +11,52 @@ fakenames = {
|
|||||||
|
|
||||||
voices = ["Alexander", "Benjamin", "Amelia", "Katherine", "Johanne"]
|
voices = ["Alexander", "Benjamin", "Amelia", "Katherine", "Johanne"]
|
||||||
|
|
||||||
# Init TTS
|
class VoiceGenerator:
|
||||||
|
def __init__(self, mode="Bark", speaker=""):
|
||||||
|
self.mode = mode
|
||||||
|
self.speaker = speaker
|
||||||
|
if mode == "Bark":
|
||||||
|
os.environ["XDG_CACHE_HOME"] = os.path.join(os.getcwd(), "bark_cache")
|
||||||
|
from bark import preload_models, generation
|
||||||
|
|
||||||
def generate_voice(path, text, speaker="Alexander"):
|
preload_models()
|
||||||
model = model_best_multi
|
self.speaker = "v2/en_speaker_6"
|
||||||
speaker = fakenames[speaker] if speaker in fakenames else speaker
|
else:
|
||||||
print(f"Generating voice for {model} with speaker {speaker}")
|
from TTS.api import TTS
|
||||||
try:
|
model = "tts_models/en/vctk/vits"
|
||||||
tts = TTS(model, gpu=True)
|
self.speaker = fakenames[speaker] if speaker in fakenames else speaker
|
||||||
except:
|
print(f"Generating voice for {model} with speaker {speaker}")
|
||||||
tts = TTS(model, gpu=False)
|
try:
|
||||||
tts.tts_to_file(text=text, file_path=path, speaker=speaker, speed=1, emotion="Happy")
|
self.tts = TTS(model, gpu=True)
|
||||||
|
except:
|
||||||
|
self.tts = TTS(model, gpu=False)
|
||||||
|
if self.speaker == "": self.speaker = "p230"
|
||||||
|
else:
|
||||||
|
self.speaker = fakenames[self.speaker] if self.speaker in fakenames else fakenames["Alexander"]
|
||||||
|
|
||||||
|
def generate_voice(self, path, text):
|
||||||
|
if self.mode == "Bark":
|
||||||
|
from bark import SAMPLE_RATE, generate_audio, preload_models
|
||||||
|
from scipy.io.wavfile import read as wavread, write as wavwrite
|
||||||
|
import noisereduce as nr
|
||||||
|
import soundfile
|
||||||
|
import numpy as np
|
||||||
|
import nltk
|
||||||
|
sentences = nltk.sent_tokenize(text)
|
||||||
|
pieces = []
|
||||||
|
silence = np.zeros(int(0.25 * SAMPLE_RATE)) # quarter second of silence
|
||||||
|
for sentence in sentences:
|
||||||
|
audio_array = generate_audio(sentence, history_prompt=self.speaker)
|
||||||
|
pieces += [audio_array, silence.copy()]
|
||||||
|
audio_array = np.concatenate(pieces)
|
||||||
|
soundfile.write(path, audio_array, SAMPLE_RATE, format="WAV", subtype="PCM_16")
|
||||||
|
rate, data = wavread(path)
|
||||||
|
reduced_noise = nr.reduce_noise(y=data, sr=rate)
|
||||||
|
os.remove(path)
|
||||||
|
wavwrite(path, rate, reduced_noise)
|
||||||
|
else:
|
||||||
|
self.tts.tts_to_file(text=text, file_path=path, speaker=self.speaker, speed=1, emotion="Happy")
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
generate_voice("test/test.mp3", "This is a test. I like the words python, django and flask. Betty bought a bit of butter but the butter was bitter. So she bought some better butter to make the bitter butter better.")
|
generator = VoiceGenerator()
|
||||||
|
generator.generate_voice("test/test_r.wav", "Hello there!")
|
||||||
|
generator.generate_voice("test/teste_r.wav", "This is a test. I like the words python, django and flask. Betty bought a bit of butter but the butter was bitter. So she bought some better butter to make the bitter butter better.")
|
||||||
4
main.py
4
main.py
@@ -64,12 +64,12 @@ async def main():
|
|||||||
video = await channel.generate_video(idea)
|
video = await channel.generate_video(idea)
|
||||||
printm("Done!")
|
printm("Done!")
|
||||||
printm("Here is the video:")
|
printm("Here is the video:")
|
||||||
printm(video)
|
printm(video.url)
|
||||||
input("Press enter to continue...")
|
input("Press enter to continue...")
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
while True:
|
while True:
|
||||||
asyncio.run(main())
|
|
||||||
try:
|
try:
|
||||||
|
asyncio.run(main())
|
||||||
input("Press enter to continue or type ctrl+c to quit : ")
|
input("Press enter to continue or type ctrl+c to quit : ")
|
||||||
clear_screen()
|
clear_screen()
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
|
|||||||
@@ -6,3 +6,5 @@ openai
|
|||||||
pillow
|
pillow
|
||||||
python-dotenv
|
python-dotenv
|
||||||
google-api-python-client
|
google-api-python-client
|
||||||
|
git+https://github.com/suno-ai/bark.git
|
||||||
|
noisereduce
|
||||||
Reference in New Issue
Block a user