diff --git a/audio_prompts/default.npz b/audio_prompts/default.npz new file mode 100644 index 0000000..930d10d Binary files /dev/null and b/audio_prompts/default.npz differ diff --git a/audio_prompts/en_narrator_deep.npz b/audio_prompts/en_narrator_deep.npz new file mode 100644 index 0000000..93f7080 Binary files /dev/null and b/audio_prompts/en_narrator_deep.npz differ diff --git a/audio_prompts/en_narrator_light_bg.npz b/audio_prompts/en_narrator_light_bg.npz new file mode 100644 index 0000000..930d10d Binary files /dev/null and b/audio_prompts/en_narrator_light_bg.npz differ diff --git a/classes/video.py b/classes/video.py index 736b53c..afb5518 100644 --- a/classes/video.py +++ b/classes/video.py @@ -48,7 +48,7 @@ class Video: script = await generate_script(self.idea['title'], self.idea['description']) script = json.loads(script) with open(os.path.join( self.path, "script.json"), "w") as f: - json.dump(script, f) + json.dump(script, f, indent=4) f.close() else: with open(os.path.join(self.path, "script.json"), "r") as f: @@ -60,7 +60,8 @@ class Video: "title": self.idea['title'], "description": self.idea['description'] + "\n\n" + credits, } - await generate_thumbnail( self.path, self.idea['title'], self.idea['description']) + if input("Do you want to generate a thumbnail ? (y/N) : ").lower() == "y": + await generate_thumbnail( self.path, self.idea['title'], self.idea['description']) videoid = await upload_video( self.path, self.idea['title'], self.metadata['description'], 28, "", "private", self.parent.path) printm(f"Your video is ready! You can find it in { self.path}") video_meta_file = { diff --git a/generators/montage.py b/generators/montage.py index bb24a15..02ddd7d 100644 --- a/generators/montage.py +++ b/generators/montage.py @@ -10,7 +10,7 @@ from moviepy.editor import concatenate_videoclips, CompositeAudioClip, concatena from moviepy.audio.io.AudioFileClip import AudioFileClip from moviepy.audio.fx.all import volumex, audio_fadein, audio_fadeout # type: ignore from utils.misc import getenv - +from utils.wiki_downloader import download_image as wiki_download_image unsplash_access = getenv("unsplash_access_key") if not unsplash_access: @@ -22,64 +22,82 @@ async def prepare(path): script = json.load(f) f.close() if not os.path.exists(path + "/slides"): os.mkdir(path + "/slides") - fresh = False - if not os.path.exists(path + "/audio"): - os.mkdir(path + "/audio") - fresh = True - with open("prompts/marp.md", 'r', encoding='utf-8') as f: + if not os.path.exists(path + "/audio"): os.mkdir(path + "/audio") + choosen_voice = random.choice(voices) + with open(os.path.join(os.getcwd(), "prompts", "marp.md"), 'r', encoding='utf-8') as f: marp = f.read() - f.close() - if fresh: - choosen_voice = random.choice(voices) - generator = VoiceGenerator(speaker=choosen_voice) - for i in range(len(script)): - audio_path = path + "/audio/audio" + str(i) + ".wav" - if not os.path.exists(audio_path): - generator.generate_voice(audio_path, script[i]['spoken']) - if "image" in script[i]: - if os.path.exists(path + "/slides/assets/slide" + str(i) + ".md"): - #skip this slide + f.close() + for i in range(len(script)): + audio_path = os.path.join(path, "audio", "audio" + str(i) + ".wav") + generator = None + if not os.path.exists(audio_path): + if not generator: + generator = VoiceGenerator(speaker=choosen_voice) + print("Generating audio for slide " + str(i)) + generator.generate_voice(audio_path, script[i]['spoken']) + if "image" in script[i]: + if os.path.exists(os.path.join(path, "slides", "slide" + str(i) + ".md")) and os.path.exists(os.path.join(path, "slides", "slide" + str(i) + ".png")): + #skip this slide + continue + if not os.path.exists(path + "/slides/assets"): + os.mkdir(path + "/slides/assets") + url= unsplash_url + script[i]['image'].replace("+", ",") + r = requests.get(url) + real_url = r.json()['urls']['raw'] + with open(path + "/slides/assets/slide" + str(i) + ".jpg", 'wb') as f: + f.write(requests.get(real_url).content) + f.close() + content = marp + f"\n\n![bg 70%](assets/slide{i}.jpg)" + with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f: + f.write(content) + elif "wikimage" in script[i]: + if os.path.exists(os.path.join(path, "slides", "slide" + str(i) + ".md")) and os.path.exists(os.path.join(path, "slides", "slide" + str(i) + ".png")): + #skip this slide + continue + if not os.path.exists(path + "/slides/assets"): + os.mkdir(path + "/slides/assets") + r = 0 + while True: + try: + print("Trying to download image for slide " + str(i)) + wiki_download_image(script[i]['wikimage'], os.path.abspath(os.path.join(path, "slides", "assets", "slide" + str(i) + ".jpg"))) + print("Downloaded image for slide with wikiimage " + str(i)) + break + except: + r += 1 + if r > 5: + break continue - if not os.path.exists(path + "/slides/assets"): - os.mkdir(path + "/slides/assets") - url= unsplash_url + script[i]['image'] - r = requests.get(url) - real_url = r.json()['urls']['raw'] - with open(path + "/slides/assets/slide" + str(i) + ".jpg", 'wb') as f: - f.write(requests.get(real_url).content) - f.close() - content = marp + f"\n\n![bg 70%](assets/slide{i}.jpg)" - with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f: - f.write(content) - elif "markdown" in script[i]: - if os.path.exists(path + "/slides/slide" + str(i) + ".md"): - #skip this slide - continue - with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f: - f.write(marp + "\n\n" + script[i]['markdown']) - elif "huge" in script[i]: - #use fit - if os.path.exists(path + "/slides/slide" + str(i) + ".md"): - #skip this slide - continue - with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f: - f.write(marp + "\n\n# " + script[i]['huge']) - else: - if os.path.exists(path + "/slides/slide" + str(i) + ".md"): - #skip this slide - continue - with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f: - f.write(marp + "\n\n") # blank slide + content = marp + f"\n\n![bg 70%](assets/slide{i}.jpg)" + with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f: + f.write(content) + elif "markdown" in script[i]: + if os.path.exists(path + "/slides/slide" + str(i) + ".md") and os.path.exists(path + "/slides/slide" + str(i) + ".png"): + #skip this slide + continue + with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f: + f.write(marp + "\n\n" + script[i]['markdown']) + elif "huge" in script[i]: + #use fit + if os.path.exists(path + "/slides/slide" + str(i) + ".md") and os.path.exists(path + "/slides/slide" + str(i) + ".png"): + #skip this slide + continue + with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f: + f.write(marp + "\n\n# " + script[i]['huge']) + else: + if os.path.exists(path + "/slides/slide" + str(i) + ".md") and os.path.exists(path + "/slides/slide" + str(i) + ".png"): + #skip this slide + continue + with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f: + f.write(marp + "\n\n") # blank slide for i in range(len(script)): markdown_path = os.path.join(path, f"slides/slide{i}.md") markdown_path = os.path.abspath(markdown_path) image_path = os.path.join(path, f"slides/slide{i}.png") image_path = os.path.abspath(image_path) - if os.path.exists(markdown_path): - #skip this slide - continue - command = f'marp.exe "{markdown_path}" -o "{image_path}" --allow-local-files' - os.system(command) + if not os.path.exists(image_path): + command = f'marp.exe "{markdown_path}" -o "{image_path}" --allow-local-files' + os.system(command) return script def convert_seconds_to_time_string(seconds): @@ -113,8 +131,6 @@ async def mount(path, script): ]) length = complete_audio.duration total_length += length - print(script[i]) - print(script[i]['spoken']) srt = subs(length, total_length, script[i]['spoken'], srt, i) slide = ImageClip(path + "/slides/slide" + str(i) + ".png").set_duration(length) slide = slide.set_audio(complete_audio) @@ -122,7 +138,7 @@ async def mount(path, script): randmusic = random.choice(os.listdir("musics")) while randmusic.endswith(".txt"): randmusic = random.choice(os.listdir("musics")) randpath = "musics/" + randmusic - music = AudioFileClip(randpath).set_duration(total_length) + music = AudioFileClip(randpath) music = audio_fadein(music, 20) music = audio_fadeout(music, 20) music = volumex(music, 0.2) @@ -131,6 +147,7 @@ async def mount(path, script): for i in range(int(total_length / music.duration)): musics.append(music) music = concatenate_audioclips(musics) + music = music.set_duration(total_length) final_clip = concatenate_videoclips(clips, method="compose") existing_audio = final_clip.audio final_audio = CompositeAudioClip([existing_audio, music]) @@ -142,4 +159,4 @@ async def mount(path, script): f.close() return music_credit or "" else: - return None \ No newline at end of file + return "" \ No newline at end of file diff --git a/generators/speak.py b/generators/speak.py index 9b06176..83d8604 100644 --- a/generators/speak.py +++ b/generators/speak.py @@ -1,5 +1,5 @@ - import os +from pydub import AudioSegment, silence fakenames = { "Alexander": "p230", @@ -11,16 +11,70 @@ fakenames = { voices = ["Alexander", "Benjamin", "Amelia", "Katherine", "Johanne"] + +def remove_blank_moments(file_path, silence_thresh= -50, silence_chunk_len=500): + # Load audio file + audio = AudioSegment.from_wav(file_path) + + # Detect non-silent parts + nonsilent_data = silence.detect_nonsilent(audio, min_silence_len=silence_chunk_len, silence_thresh=silence_thresh) + + # Create new audio file + final_audio = AudioSegment.empty() + + # Iterate over non-silent parts and append to the final_audio with 0.5 seconds before and after each segment + for idx, (start_i, end_i) in enumerate(nonsilent_data): + start_i = max(0, start_i - 500) # 0.5 seconds before + end_i += 500 # 0.5 seconds after + + segment = audio[start_i:end_i] + + # Only append silence after the first segment + if idx > 0: + final_audio += AudioSegment.silent(duration=500) + + final_audio += segment + # Save the result + if not os.path.exists(os.path.abspath(os.path.join(os.getcwd(), "temp"))): + os.mkdir(os.path.abspath(os.path.join(os.getcwd(), "temp"))) + tempfile_path = os.path.abspath(os.path.join(os.getcwd(), "temp", "temp.wav")) + final_audio.export(tempfile_path, format="wav") + os.remove(file_path) + os.rename(tempfile_path, file_path) + + +def optimize_string_groups(strings): + optimized_groups = [] + current_group = [] + current_length = 0 + + for string in strings: + string_length = len(string) + len(current_group) # Account for spaces between strings + if current_length + string_length <= 100: + current_group.append(string) + current_length += string_length + else: + optimized_groups.append(' '.join(current_group)) # Join strings with spaces + current_group = [string] + current_length = len(string) + + if current_group: + optimized_groups.append(' '.join(current_group)) + + return optimized_groups + class VoiceGenerator: def __init__(self, mode="Bark", speaker=""): self.mode = mode self.speaker = speaker if mode == "Bark": os.environ["XDG_CACHE_HOME"] = os.path.join(os.getcwd(), "bark_cache") - from bark import preload_models, generation - + from bark import preload_models + print("Loading Bark voice generator") preload_models() - self.speaker = "v2/en_speaker_6" + #self.speaker = os.path.abspath(os.path.join(os.getcwd(), "audio_prompts", "en_male_professional_reader.npz")) + self.speaker = os.path.join(os.getcwd(), "audio_prompts", "en_narrator_light_bg.npz") + print(f"Generating voice for Bark with speaker {self.speaker}") else: from TTS.api import TTS model = "tts_models/en/vctk/vits" @@ -43,20 +97,27 @@ class VoiceGenerator: import numpy as np import nltk sentences = nltk.sent_tokenize(text) + sentences = optimize_string_groups(sentences) + print(sentences) pieces = [] silence = np.zeros(int(0.25 * SAMPLE_RATE)) # quarter second of silence for sentence in sentences: - audio_array = generate_audio(sentence, history_prompt=self.speaker) - pieces += [audio_array, silence.copy()] + if not sentence == "": + audio_array = generate_audio(sentence, history_prompt=self.speaker) + pieces += [audio_array, silence.copy()] audio_array = np.concatenate(pieces) soundfile.write(path, audio_array, SAMPLE_RATE, format="WAV", subtype="PCM_16") - rate, data = wavread(path) - reduced_noise = nr.reduce_noise(y=data, sr=rate) - os.remove(path) - wavwrite(path, rate, reduced_noise) + ''' + remove silence + ''' + remove_blank_moments(path) else: self.tts.tts_to_file(text=text, file_path=path, speaker=self.speaker, speed=1, emotion="Happy") if __name__ == "__main__": + import logging + logging.basicConfig(level=logging.INFO) + print("Testing voice generator") generator = VoiceGenerator() - generator.generate_voice("test/test_r.wav", "Hello there!") - generator.generate_voice("test/teste_r.wav", "This is a test. I like the words python, django and flask. Betty bought a bit of butter but the butter was bitter. So she bought some better butter to make the bitter butter better.") \ No newline at end of file + print("Loaded voice generator") +# generator.generate_voice("test/test_r.wav", "Hello there!") + generator.generate_voice("test/tast_timbernerslee.wav", "But his greatest claim to fame is undoubtedly his invention of the World Wide Web back in 1989. Can you imagine a world without the internet? [Laughs] No, thank you!") \ No newline at end of file diff --git a/generators/thumbnail.py b/generators/thumbnail.py index 64afedc..101452a 100644 --- a/generators/thumbnail.py +++ b/generators/thumbnail.py @@ -28,6 +28,11 @@ Answer without anything else, just with the 2 textes. Answer with text1 on the f Here is the title of the video: [TITLE] Here is the description of the video: [DESCRIPTION]''' + +# TODO: make jpg qith 90% quality default when generating the image to avoid having to convert it later + + + async def rand_gradient(image): randr = random.SystemRandom().randint(1, 20) randg = random.SystemRandom().randint(1, 20) @@ -110,11 +115,20 @@ async def generate_image(path, text1, text2): drawtext2.text((imgtext2.size[0]//8*2.5, imgtext2.size[1]//5*2), text2def, font=font2, fill=(textcolor2[0], textcolor2[1], textcolor2[2])) imgtext2 = imgtext2.rotate(5, expand=True) #paste the textes on the image - img.paste(bcg, (0, 0), bcg) + bcg = bcg.convert('RGBA') + #also set the bcg size to the image size + bcg = bcg.resize((1920, 1080)) + img.paste(bcg, (0, 0), bcg) # TODO: make it work with standard pngs (non rgba) img.paste(imgtext1, (0, 0-img.size[1]//8), imgtext1) if len(text1def.split("\n")) > 2: #if the text is too long, put the second text on the third line img.paste(imgtext2, (0, img.size[1]//8), imgtext2) else: img.paste(imgtext2, (0, 0), imgtext2) - img.save(path + "/miniature.png") - return path + "/miniature.png" \ No newline at end of file + #disable the alpha channel + img = img.convert('RGB') + img_path = os.path.abspath(os.path.join(path, "thumbnail.jpg")) + for quality in range(100, 0, -1): + img.save(img_path, quality=quality) + if os.path.getsize(img_path) < 2000000: + break + return img_path \ No newline at end of file diff --git a/main.py b/main.py index 55c3cff..828eaad 100644 --- a/main.py +++ b/main.py @@ -11,15 +11,15 @@ from utils.openaicaller import openai logging.basicConfig(level=logging.INFO) async def main(): - printm("Loading...") - await asyncio.sleep(1) - clear_screen() + #printm("Loading...") + #await asyncio.sleep(1) + #clear_screen() printm(loadingmessage) - await asyncio.sleep(4) - clear_screen() - await asyncio.sleep(1) + #await asyncio.sleep(4) + #clear_screen() + await asyncio.sleep(0.5) printm("Welcome in FABLE, the Film and Artistic Bot for Lively Entertainment!") - await asyncio.sleep(1) + await asyncio.sleep(0.5) printm(f"This program will generate for you complete {bcolors.FAIL}{bcolors.BOLD}YouTube{bcolors.ENDC} videos, as well as uploading them to YouTube.") if not os.path.exists('env.yaml'): printm("It looks like you don't have an OpenAI API key yet. Please paste it here:") @@ -57,9 +57,17 @@ async def main(): await channel.load(channel_name) printm("Now, let's create a video!") printm("Here are all the ideas you have:") + printm("0. Generate new ideas") for i, idea in enumerate(channel.ideas): printm(f"{i+1}. {idea['title']}") index = input("Which idea do you want to create a video for : ") + if index == "0": + printm("Generating new ideas...") + await channel.generate_ideas() + printm("Here are your new ideas:") + for i, idea in enumerate(channel.ideas): + printm(f"{i+1}. {idea['title']}") + index = input("Which idea do you want to create a video for : ") idea = channel.ideas[int(index)-1] video = await channel.generate_video(idea) printm("Done!") diff --git a/musics/Ghostrifter-Official-Lost-In-Thought.txt b/musics/Ghostrifter-Official-Lost-In-Thought.txt index d07e286..68bf08d 100644 --- a/musics/Ghostrifter-Official-Lost-In-Thought.txt +++ b/musics/Ghostrifter-Official-Lost-In-Thought.txt @@ -1,3 +1,3 @@ -Lost In Thought by Ghostrifter bit.ly/ghostrifter-yt +Lost In Thought by Ghostrifter Creative Commons — Attribution-NoDerivs 3.0 Unported — CC BY-ND 3.0 -Music promoted by https://www.chosic.com/free-music/all/ \ No newline at end of file +Music promoted by chosic \ No newline at end of file diff --git a/musics/When-I-Was-A-Boy.txt b/musics/When-I-Was-A-Boy.txt index ffcde35..b239dd2 100644 --- a/musics/When-I-Was-A-Boy.txt +++ b/musics/When-I-Was-A-Boy.txt @@ -1,4 +1,3 @@ -When I Was A Boy by Tokyo Music Walker | https://soundcloud.com/user-356546060 -Music promoted by https://www.chosic.com/free-music/all/ -Creative Commons CC BY 3.0 -https://creativecommons.org/licenses/by/3.0/ \ No newline at end of file +When I Was A Boy by Tokyo Music Walker +Music promoted by free-stock-music +Creative Commons CC BY 3.0 \ No newline at end of file diff --git a/musics/aila-scott-sin-and-sensitivity-rendition-of-bachs-air.txt b/musics/aila-scott-sin-and-sensitivity-rendition-of-bachs-air.txt index acff10d..07f6267 100644 --- a/musics/aila-scott-sin-and-sensitivity-rendition-of-bachs-air.txt +++ b/musics/aila-scott-sin-and-sensitivity-rendition-of-bachs-air.txt @@ -1,4 +1,3 @@ -Sin and Sensitivity (Rendition of Bach’s "Air") by Aila Scott • Johann Sebastian Bach | https://ailascott.com -Music promoted by https://www.free-stock-music.com -Creative Commons / Attribution 4.0 International (CC BY 4.0) -https://creativecommons.org/licenses/by/4.0/ \ No newline at end of file +Sin and Sensitivity (Rendition of Bach’s "Air") by Aila Scott • Johann Sebastian Bach +Music promoted by free-stock-music +Creative Commons / Attribution 4.0 International (CC BY 4.0) \ No newline at end of file diff --git a/utils/uploader.py b/utils/uploader.py index 53ecc1d..6908c4c 100644 --- a/utils/uploader.py +++ b/utils/uploader.py @@ -41,7 +41,10 @@ VALID_PRIVACY_STATUSES = ('public', 'private', 'unlisted') async def get_authenticated_service(credentialsPath="", force_refresh=False): CLIENT_SECRETS_FILE = "" try: - CLIENT_SECRETS_FILE=os.path.join(credentialsPath, "client_secret.json") + if os.path.exists(os.path.join(credentialsPath, "client_secret.json")): + CLIENT_SECRETS_FILE=os.path.join(credentialsPath, "client_secret.json") + else: + raise FileNotFoundError("No client_secret.json file found in the specified path !") except: listdir = os.listdir(credentialsPath) for file in listdir: @@ -146,25 +149,16 @@ async def upload_video(path, title, description, category, keywords, privacyStat 'keywords': keywords, 'privacyStatus': privacyStatus } - refresh = False - while True: - try: - youtube = await get_authenticated_service(credentials_path, force_refresh=refresh) - videoid = await initialize_upload(youtube, options) - await upload_thumbnail(videoid, path + "/miniature.png", credentials_path, youtube) - return videoid - except HttpError as e: - print('An HTTP error %d occurred:\n%s' % (e.resp.status, e.content)) - #escape the loop - break - except: - #refresh the token - if not refresh: - refresh = True - else: - #escape the loop - break - + youtube = await get_authenticated_service(credentials_path, force_refresh=False) + print("Uploading video...") + try: + videoid = await initialize_upload(youtube, options) + except: + youtube = await get_authenticated_service(credentials_path, force_refresh=True) + videoid = await initialize_upload(youtube, options) + thumb_path = os.path.abspath(os.path.join(path, "thumbnail.jpg")) + await upload_thumbnail(videoid, thumb_path, credentials_path, youtube) + return videoid async def upload_thumbnail(video_id, file, credentials_path="", youtube=None): diff --git a/utils/wiki_downloader.py b/utils/wiki_downloader.py index ab07663..86b08bc 100644 --- a/utils/wiki_downloader.py +++ b/utils/wiki_downloader.py @@ -13,7 +13,7 @@ def download_image(query, download_path): driver = uc.Chrome(options=options) try: - driver.get(f"https://www.google.com/search?site=&tbm=isch&source=hp&biw=1873&bih=990&tbs=isz:l&q=site:wikipedia.org+{query.replace(' ', '+')}") + driver.get(f"https://www.google.com/search?site=&tbm=isch&source=hp&biw=1873&bih=99&q=site:wikipedia.org+{query.replace(' ', '+')}") time.sleep(2) tos = driver.find_elements(By.CLASS_NAME, "VfPpkd-vQzf8d") @@ -21,11 +21,10 @@ def download_image(query, download_path): if to.text.lower() == "tout refuser": to.click() break - - time.sleep(10) + time.sleep(1) image = driver.find_element(By.CLASS_NAME, "rg_i") image.click() - time.sleep(2) + time.sleep(5) image = driver.find_element(By.CLASS_NAME, "r48jcc").get_attribute("src") or "" image_content = None