From 866f1b413850f37672382d8c50b7bf60aa9692e8 Mon Sep 17 00:00:00 2001
From: Paillat <git@electronique.cc>
Date: Sun, 25 Jun 2023 21:59:52 +0200
Subject: [PATCH] =?UTF-8?q?chore(.gitignore):=20add=20bark=5Fcache=20direc?=
 =?UTF-8?q?tory=20to=20gitignore=20feat(video.py):=20use=20wav=20format=20?=
 =?UTF-8?q?instead=20of=20mp3=20for=20generated=20audio=20files=20feat(mon?=
 =?UTF-8?q?tage.py):=20use=20Bark=20TTS=20instead=20of=20=F0=9F=90=B8TTS?=
 =?UTF-8?q?=20feat(speak.py):=20add=20support=20for=20Bark=20TTS=20fix(spe?=
 =?UTF-8?q?ak.py):=20remove=20unused=20=F0=9F=90=B8TTS=20import=20and=20va?=
 =?UTF-8?q?riable=20fix(main.py):=20fix=20asyncio.run()=20call=20placement?=
 =?UTF-8?q?=20docs:=20update=20requirements.txt=20with=20new=20dependencie?=
 =?UTF-8?q?s?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .gitignore            |  3 +-
 classes/video.py      |  9 ++++--
 generators/montage.py | 20 +++++++++-----
 generators/speak.py   | 64 ++++++++++++++++++++++++++++++++-----------
 main.py               |  4 +--
 requirements.txt      |  4 ++-
 6 files changed, 74 insertions(+), 30 deletions(-)

diff --git a/.gitignore b/.gitignore
index 10b250d..d58ab6f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -161,4 +161,5 @@ test/
 ideas/
 montageTEMP_MPY_wvf_snd.mp3
 marp.exe
-channels/
\ No newline at end of file
+channels/
+bark_cache/
\ No newline at end of file
diff --git a/classes/video.py b/classes/video.py
index 0bfe838..736b53c 100644
--- a/classes/video.py
+++ b/classes/video.py
@@ -31,7 +31,7 @@ class Video:
             os.makedirs( self.path)
         script = None
         if os.path.exists(os.path.join( self.path, "script.json")):
-            if input("Video script already exists. Do you want to overwrite it ? (y/N) : ") == "y":
+            if input("Video script already exists. Do you want to overwrite it ? (y/N) : ").lower() == "y":
                 os.remove(os.path.join( self.path, "script.json"))
     
         if not os.path.exists(os.path.join( self.path, "script.json")):
@@ -41,14 +41,17 @@ class Video:
                     script_prompt = f.read()
                     f.close()
             if script_prompt:
+                printm("Using custom script prompt")
                 script = await generate_script(self.idea['title'], self.idea['description'], script_prompt)
             else:
+                printm("Using default script prompt")
                 script = await generate_script(self.idea['title'], self.idea['description'])
+            script = json.loads(script)
             with open(os.path.join( self.path, "script.json"), "w") as f:
-                json.dump(json.loads(script), f)
+                json.dump(script, f)
                 f.close()
         else:
-            with open(os.path.join( self.path, "script.json"), "r") as f:
+            with open(os.path.join(self.path, "script.json"), "r") as f:
                 script = json.load(f)
                 f.close()
         await prepare( self.path)
diff --git a/generators/montage.py b/generators/montage.py
index e92dce3..bb24a15 100644
--- a/generators/montage.py
+++ b/generators/montage.py
@@ -4,7 +4,7 @@ import requests
 import pysrt
 import random
 
-from generators.speak import generate_voice, voices
+from generators.speak import VoiceGenerator, voices
 from moviepy.video.VideoClip import ImageClip
 from moviepy.editor import concatenate_videoclips, CompositeAudioClip, concatenate_audioclips
 from moviepy.audio.io.AudioFileClip import AudioFileClip
@@ -31,10 +31,11 @@ async def prepare(path):
         f.close() 
     if fresh:
         choosen_voice = random.choice(voices)
+        generator = VoiceGenerator(speaker=choosen_voice)
         for i in range(len(script)):
-            audio_path = path + "/audio/audio" + str(i) + ".mp3"
+            audio_path = path + "/audio/audio" + str(i) + ".wav"
             if not os.path.exists(audio_path):
-                generate_voice(audio_path, script[i]['spoken'], choosen_voice)
+                generator.generate_voice(audio_path, script[i]['spoken'])
             if "image" in script[i]:
                 if os.path.exists(path + "/slides/assets/slide" + str(i) + ".md"):
                     #skip this slide
@@ -70,11 +71,14 @@ async def prepare(path):
                 with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f:
                     f.write(marp + "\n\n") # blank slide
     for i in range(len(script)):
-        marrkdown_path = os.path.join(path, f"slides/slide{i}.md")
-        if os.path.exists(f"./{path}/slides/slide{i}.png"):
+        markdown_path = os.path.join(path, f"slides/slide{i}.md")
+        markdown_path = os.path.abspath(markdown_path)
+        image_path = os.path.join(path, f"slides/slide{i}.png")
+        image_path = os.path.abspath(image_path)
+        if os.path.exists(markdown_path):
             #skip this slide
             continue
-        command = f"marp.exe {marrkdown_path} -o {path}/slides/slide{i}.png --allow-local-files"
+        command = f'marp.exe "{markdown_path}" -o "{image_path}" --allow-local-files'
         os.system(command)
     return script
 
@@ -101,7 +105,7 @@ async def mount(path, script):
         srt = pysrt.SubRipFile()
         total_length = 0
         for i in range(num_slides):
-            audio = AudioFileClip(path + "/audio/audio" + str(i) + ".mp3")
+            audio = AudioFileClip(path + "/audio/audio" + str(i) + ".wav")
             complete_audio = CompositeAudioClip([
                 AudioFileClip("silence.mp3").set_duration(1),
                 audio,
@@ -109,6 +113,8 @@ async def mount(path, script):
             ])
             length = complete_audio.duration
             total_length += length
+            print(script[i])
+            print(script[i]['spoken'])
             srt = subs(length, total_length, script[i]['spoken'], srt, i)
             slide = ImageClip(path + "/slides/slide" + str(i) + ".png").set_duration(length)
             slide = slide.set_audio(complete_audio)
diff --git a/generators/speak.py b/generators/speak.py
index fc70347..9b06176 100644
--- a/generators/speak.py
+++ b/generators/speak.py
@@ -1,9 +1,6 @@
-from TTS.api import TTS
 
-# Running a multi-speaker and multi-lingual model
+import os
 
-# List available 🐸TTS models and choose the first one
-model_best_multi = "tts_models/en/vctk/vits"
 fakenames = {
     "Alexander": "p230",
     "Benjamin": "p240",
@@ -14,17 +11,52 @@ fakenames = {
 
 voices = ["Alexander", "Benjamin", "Amelia", "Katherine", "Johanne"]
 
-# Init TTS
-
-def generate_voice(path, text, speaker="Alexander"):
-    model = model_best_multi
-    speaker = fakenames[speaker] if speaker in fakenames else speaker
-    print(f"Generating voice for {model} with speaker {speaker}")
-    try:
-        tts = TTS(model, gpu=True)
-    except:
-        tts = TTS(model, gpu=False)
-    tts.tts_to_file(text=text, file_path=path, speaker=speaker, speed=1, emotion="Happy")
+class VoiceGenerator:
+    def __init__(self, mode="Bark", speaker=""):
+        self.mode = mode
+        self.speaker = speaker
+        if mode == "Bark":
+            os.environ["XDG_CACHE_HOME"] = os.path.join(os.getcwd(), "bark_cache")
+            from bark import preload_models, generation
 
+            preload_models()
+            self.speaker = "v2/en_speaker_6"
+        else:
+            from TTS.api import TTS
+            model = "tts_models/en/vctk/vits"
+            self.speaker = fakenames[speaker] if speaker in fakenames else speaker
+            print(f"Generating voice for {model} with speaker {speaker}")
+            try:
+                self.tts = TTS(model, gpu=True)
+            except:
+                self.tts = TTS(model, gpu=False)
+            if self.speaker == "": self.speaker = "p230"
+            else:
+                self.speaker = fakenames[self.speaker] if self.speaker in fakenames else fakenames["Alexander"]
+    
+    def generate_voice(self, path, text):
+        if self.mode == "Bark":
+            from bark import SAMPLE_RATE, generate_audio, preload_models
+            from scipy.io.wavfile import read as wavread, write as wavwrite
+            import noisereduce as nr
+            import soundfile
+            import numpy as np
+            import nltk
+            sentences = nltk.sent_tokenize(text)
+            pieces = []
+            silence = np.zeros(int(0.25 * SAMPLE_RATE))  # quarter second of silence
+            for sentence in sentences:
+                audio_array = generate_audio(sentence, history_prompt=self.speaker)
+                pieces += [audio_array, silence.copy()]
+            audio_array = np.concatenate(pieces)
+            soundfile.write(path, audio_array, SAMPLE_RATE, format="WAV", subtype="PCM_16")
+            rate, data = wavread(path)
+            reduced_noise = nr.reduce_noise(y=data, sr=rate)
+            os.remove(path)
+            wavwrite(path, rate, reduced_noise)
+        else:
+            self.tts.tts_to_file(text=text, file_path=path, speaker=self.speaker, speed=1, emotion="Happy")
 if __name__ == "__main__":
-    generate_voice("test/test.mp3", "This is a test. I like the words python, django and flask. Betty bought a bit of butter but the butter was bitter. So she bought some better butter to make the bitter butter better.")
\ No newline at end of file
+    generator = VoiceGenerator()
+    generator.generate_voice("test/test_r.wav", "Hello there!")
+    generator.generate_voice("test/teste_r.wav", "This is a test. I like the words python, django and flask. Betty bought a bit of butter but the butter was bitter. So she bought some better butter to make the bitter butter better.")
\ No newline at end of file
diff --git a/main.py b/main.py
index 04af359..55c3cff 100644
--- a/main.py
+++ b/main.py
@@ -64,12 +64,12 @@ async def main():
     video = await channel.generate_video(idea)
     printm("Done!")
     printm("Here is the video:")
-    printm(video)
+    printm(video.url)
     input("Press enter to continue...")
 if __name__ == "__main__":
     while True:
-        asyncio.run(main())
         try:
+            asyncio.run(main())
             input("Press enter to continue or type ctrl+c to quit : ")
             clear_screen()
         except KeyboardInterrupt:
diff --git a/requirements.txt b/requirements.txt
index e856c8f..c59c1b0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,4 +5,6 @@ deepl
 openai
 pillow
 python-dotenv
-google-api-python-client
\ No newline at end of file
+google-api-python-client
+git+https://github.com/suno-ai/bark.git
+noisereduce
\ No newline at end of file