From 866f1b413850f37672382d8c50b7bf60aa9692e8 Mon Sep 17 00:00:00 2001 From: Paillat Date: Sun, 25 Jun 2023 21:59:52 +0200 Subject: [PATCH] =?UTF-8?q?chore(.gitignore):=20add=20bark=5Fcache=20direc?= =?UTF-8?q?tory=20to=20gitignore=20feat(video.py):=20use=20wav=20format=20?= =?UTF-8?q?instead=20of=20mp3=20for=20generated=20audio=20files=20feat(mon?= =?UTF-8?q?tage.py):=20use=20Bark=20TTS=20instead=20of=20=F0=9F=90=B8TTS?= =?UTF-8?q?=20feat(speak.py):=20add=20support=20for=20Bark=20TTS=20fix(spe?= =?UTF-8?q?ak.py):=20remove=20unused=20=F0=9F=90=B8TTS=20import=20and=20va?= =?UTF-8?q?riable=20fix(main.py):=20fix=20asyncio.run()=20call=20placement?= =?UTF-8?q?=20docs:=20update=20requirements.txt=20with=20new=20dependencie?= =?UTF-8?q?s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 3 +- classes/video.py | 9 ++++-- generators/montage.py | 20 +++++++++----- generators/speak.py | 64 ++++++++++++++++++++++++++++++++----------- main.py | 4 +-- requirements.txt | 4 ++- 6 files changed, 74 insertions(+), 30 deletions(-) diff --git a/.gitignore b/.gitignore index 10b250d..d58ab6f 100644 --- a/.gitignore +++ b/.gitignore @@ -161,4 +161,5 @@ test/ ideas/ montageTEMP_MPY_wvf_snd.mp3 marp.exe -channels/ \ No newline at end of file +channels/ +bark_cache/ \ No newline at end of file diff --git a/classes/video.py b/classes/video.py index 0bfe838..736b53c 100644 --- a/classes/video.py +++ b/classes/video.py @@ -31,7 +31,7 @@ class Video: os.makedirs( self.path) script = None if os.path.exists(os.path.join( self.path, "script.json")): - if input("Video script already exists. Do you want to overwrite it ? (y/N) : ") == "y": + if input("Video script already exists. Do you want to overwrite it ? (y/N) : ").lower() == "y": os.remove(os.path.join( self.path, "script.json")) if not os.path.exists(os.path.join( self.path, "script.json")): @@ -41,14 +41,17 @@ class Video: script_prompt = f.read() f.close() if script_prompt: + printm("Using custom script prompt") script = await generate_script(self.idea['title'], self.idea['description'], script_prompt) else: + printm("Using default script prompt") script = await generate_script(self.idea['title'], self.idea['description']) + script = json.loads(script) with open(os.path.join( self.path, "script.json"), "w") as f: - json.dump(json.loads(script), f) + json.dump(script, f) f.close() else: - with open(os.path.join( self.path, "script.json"), "r") as f: + with open(os.path.join(self.path, "script.json"), "r") as f: script = json.load(f) f.close() await prepare( self.path) diff --git a/generators/montage.py b/generators/montage.py index e92dce3..bb24a15 100644 --- a/generators/montage.py +++ b/generators/montage.py @@ -4,7 +4,7 @@ import requests import pysrt import random -from generators.speak import generate_voice, voices +from generators.speak import VoiceGenerator, voices from moviepy.video.VideoClip import ImageClip from moviepy.editor import concatenate_videoclips, CompositeAudioClip, concatenate_audioclips from moviepy.audio.io.AudioFileClip import AudioFileClip @@ -31,10 +31,11 @@ async def prepare(path): f.close() if fresh: choosen_voice = random.choice(voices) + generator = VoiceGenerator(speaker=choosen_voice) for i in range(len(script)): - audio_path = path + "/audio/audio" + str(i) + ".mp3" + audio_path = path + "/audio/audio" + str(i) + ".wav" if not os.path.exists(audio_path): - generate_voice(audio_path, script[i]['spoken'], choosen_voice) + generator.generate_voice(audio_path, script[i]['spoken']) if "image" in script[i]: if os.path.exists(path + "/slides/assets/slide" + str(i) + ".md"): #skip this slide @@ -70,11 +71,14 @@ async def prepare(path): with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f: f.write(marp + "\n\n") # blank slide for i in range(len(script)): - marrkdown_path = os.path.join(path, f"slides/slide{i}.md") - if os.path.exists(f"./{path}/slides/slide{i}.png"): + markdown_path = os.path.join(path, f"slides/slide{i}.md") + markdown_path = os.path.abspath(markdown_path) + image_path = os.path.join(path, f"slides/slide{i}.png") + image_path = os.path.abspath(image_path) + if os.path.exists(markdown_path): #skip this slide continue - command = f"marp.exe {marrkdown_path} -o {path}/slides/slide{i}.png --allow-local-files" + command = f'marp.exe "{markdown_path}" -o "{image_path}" --allow-local-files' os.system(command) return script @@ -101,7 +105,7 @@ async def mount(path, script): srt = pysrt.SubRipFile() total_length = 0 for i in range(num_slides): - audio = AudioFileClip(path + "/audio/audio" + str(i) + ".mp3") + audio = AudioFileClip(path + "/audio/audio" + str(i) + ".wav") complete_audio = CompositeAudioClip([ AudioFileClip("silence.mp3").set_duration(1), audio, @@ -109,6 +113,8 @@ async def mount(path, script): ]) length = complete_audio.duration total_length += length + print(script[i]) + print(script[i]['spoken']) srt = subs(length, total_length, script[i]['spoken'], srt, i) slide = ImageClip(path + "/slides/slide" + str(i) + ".png").set_duration(length) slide = slide.set_audio(complete_audio) diff --git a/generators/speak.py b/generators/speak.py index fc70347..9b06176 100644 --- a/generators/speak.py +++ b/generators/speak.py @@ -1,9 +1,6 @@ -from TTS.api import TTS -# Running a multi-speaker and multi-lingual model +import os -# List available 🐸TTS models and choose the first one -model_best_multi = "tts_models/en/vctk/vits" fakenames = { "Alexander": "p230", "Benjamin": "p240", @@ -14,17 +11,52 @@ fakenames = { voices = ["Alexander", "Benjamin", "Amelia", "Katherine", "Johanne"] -# Init TTS - -def generate_voice(path, text, speaker="Alexander"): - model = model_best_multi - speaker = fakenames[speaker] if speaker in fakenames else speaker - print(f"Generating voice for {model} with speaker {speaker}") - try: - tts = TTS(model, gpu=True) - except: - tts = TTS(model, gpu=False) - tts.tts_to_file(text=text, file_path=path, speaker=speaker, speed=1, emotion="Happy") +class VoiceGenerator: + def __init__(self, mode="Bark", speaker=""): + self.mode = mode + self.speaker = speaker + if mode == "Bark": + os.environ["XDG_CACHE_HOME"] = os.path.join(os.getcwd(), "bark_cache") + from bark import preload_models, generation + preload_models() + self.speaker = "v2/en_speaker_6" + else: + from TTS.api import TTS + model = "tts_models/en/vctk/vits" + self.speaker = fakenames[speaker] if speaker in fakenames else speaker + print(f"Generating voice for {model} with speaker {speaker}") + try: + self.tts = TTS(model, gpu=True) + except: + self.tts = TTS(model, gpu=False) + if self.speaker == "": self.speaker = "p230" + else: + self.speaker = fakenames[self.speaker] if self.speaker in fakenames else fakenames["Alexander"] + + def generate_voice(self, path, text): + if self.mode == "Bark": + from bark import SAMPLE_RATE, generate_audio, preload_models + from scipy.io.wavfile import read as wavread, write as wavwrite + import noisereduce as nr + import soundfile + import numpy as np + import nltk + sentences = nltk.sent_tokenize(text) + pieces = [] + silence = np.zeros(int(0.25 * SAMPLE_RATE)) # quarter second of silence + for sentence in sentences: + audio_array = generate_audio(sentence, history_prompt=self.speaker) + pieces += [audio_array, silence.copy()] + audio_array = np.concatenate(pieces) + soundfile.write(path, audio_array, SAMPLE_RATE, format="WAV", subtype="PCM_16") + rate, data = wavread(path) + reduced_noise = nr.reduce_noise(y=data, sr=rate) + os.remove(path) + wavwrite(path, rate, reduced_noise) + else: + self.tts.tts_to_file(text=text, file_path=path, speaker=self.speaker, speed=1, emotion="Happy") if __name__ == "__main__": - generate_voice("test/test.mp3", "This is a test. I like the words python, django and flask. Betty bought a bit of butter but the butter was bitter. So she bought some better butter to make the bitter butter better.") \ No newline at end of file + generator = VoiceGenerator() + generator.generate_voice("test/test_r.wav", "Hello there!") + generator.generate_voice("test/teste_r.wav", "This is a test. I like the words python, django and flask. Betty bought a bit of butter but the butter was bitter. So she bought some better butter to make the bitter butter better.") \ No newline at end of file diff --git a/main.py b/main.py index 04af359..55c3cff 100644 --- a/main.py +++ b/main.py @@ -64,12 +64,12 @@ async def main(): video = await channel.generate_video(idea) printm("Done!") printm("Here is the video:") - printm(video) + printm(video.url) input("Press enter to continue...") if __name__ == "__main__": while True: - asyncio.run(main()) try: + asyncio.run(main()) input("Press enter to continue or type ctrl+c to quit : ") clear_screen() except KeyboardInterrupt: diff --git a/requirements.txt b/requirements.txt index e856c8f..c59c1b0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,6 @@ deepl openai pillow python-dotenv -google-api-python-client \ No newline at end of file +google-api-python-client +git+https://github.com/suno-ai/bark.git +noisereduce \ No newline at end of file