From a82c6446bac9a31d4add623b84d59f199bdbd6ac Mon Sep 17 00:00:00 2001 From: Paillat Date: Sat, 8 Jul 2023 20:58:45 +0200 Subject: [PATCH] feat(FABLE.cmd): add FABLE.cmd script to run FABLE.ps1 and pause execution feat(FABLE.ps1): add script to activate environment and run main.py fix(montage.py): fix issue with downloading images and creating slide assets fix(montage.py): fix issue with generating markdown slides fix(montage.py): fix issue with generating huge slides fix(montage.py): fix issue with generating blank slides fix(thumbnail.py): remove TODO comment fix(marp.md): remove unused CSS styles fix(wiki_downloader.py): add user agent header to requests --- FABLE.cmd | 4 ++ FABLE.ps1 | 7 +++ generators/montage.py | 119 ++++++++++++++++++++++----------------- generators/thumbnail.py | 1 - prompts/marp.md | 25 -------- utils/wiki_downloader.py | 17 ++++-- 6 files changed, 92 insertions(+), 81 deletions(-) create mode 100644 FABLE.cmd create mode 100644 FABLE.ps1 diff --git a/FABLE.cmd b/FABLE.cmd new file mode 100644 index 0000000..04d734e --- /dev/null +++ b/FABLE.cmd @@ -0,0 +1,4 @@ +powershell ./FABLE.ps1 + +pause +echo HEHE \ No newline at end of file diff --git a/FABLE.ps1 b/FABLE.ps1 new file mode 100644 index 0000000..46ede22 --- /dev/null +++ b/FABLE.ps1 @@ -0,0 +1,7 @@ +d:/09._AI_projects/FABLE/youtuber/Scripts/Activate.ps1 + +python main.py + +pause + +echo This line will be executed after you press any key. diff --git a/generators/montage.py b/generators/montage.py index 3547e72..a50e2df 100644 --- a/generators/montage.py +++ b/generators/montage.py @@ -17,6 +17,34 @@ if not unsplash_access: raise Exception("UNSPLASH_ACCESS_KEY is not set in .env file") unsplash_url = "https://source.unsplash.com/random/?" + +marp_image = """ + + +
+ +
+""" async def prepare(path): with open(os.path.join(path, "script.json"), 'r', encoding='utf-8') as f: script = json.load(f) @@ -35,72 +63,61 @@ async def prepare(path): generator = VoiceGenerator(speaker=choosen_voice) print("Generating audio for slide " + str(i)) generator.generate_voice(audio_path, script[i]['spoken']) + if os.path.exists(os.path.join(path, "slides", "slide" + str(i) + ".md")): + #skip this slide + #continue + # TODO: Do not skip for now, add support for also checking for assets + pass if "image" in script[i]: - if os.path.exists(os.path.join(path, "slides", "slide" + str(i) + ".md")) and os.path.exists(os.path.join(path, "slides", "slide" + str(i) + ".png")): - #skip this slide - continue if not os.path.exists(path + "/slides/assets"): os.mkdir(path + "/slides/assets") - url= unsplash_url + script[i]['image'].replace("+", ",") - #r = requests.get(url) - #real_url = r.json()['urls']['raw'] - real_url = url - with open(path + "/slides/assets/slide" + str(i) + ".jpg", 'wb') as f: - f.write(requests.get(real_url, allow_redirects=True).content) - f.close() - content = marp.replace("[imagesrc]", "assets/slide" + str(i) + ".jpg") - with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f: - f.write(content) + slide_asset_path = os.path.abspath(os.path.join(path, "slides", "assets", "slide" + str(i) + ".jpg")) + w = 0 + while (not os.path.exists(slide_asset_path) or w < 5) and not os.path.exists(path + "/slides/slide" + str(i) + ".md"): + url= unsplash_url + script[i]['image'].replace("+", ",") + real_url = url + with open(slide_asset_path, 'wb') as f: + f.write(requests.get(real_url, allow_redirects=True).content) + f.close() + content = marp + content += "\n\n" + marp_image + content = content.replace("[imagesrc]", "assets/slide" + str(i) + ".jpg") + with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f: + f.write(content) + w += 1 elif "wikimage" in script[i]: - if os.path.exists(os.path.join(path, "slides", "slide" + str(i) + ".md")) and os.path.exists(os.path.join(path, "slides", "slide" + str(i) + ".png")): - #skip this slide - continue if not os.path.exists(path + "/slides/assets"): os.mkdir(path + "/slides/assets") - r = 0 - while True: - try: - print("Trying to download image for slide " + str(i)) - wiki_download_image(script[i]['wikimage'], os.path.abspath(os.path.join(path, "slides", "assets", "slide" + str(i) + ".jpg"))) - print("Downloaded image for slide with wikiimage " + str(i)) - if not os.path.exists(os.path.join(path, "slides", "assets", "slide" + str(i) + ".jpg")): - raise FileNotFoundError - else: - break - except: - r += 1 - if r > 5: - break - continue - content = marp + f"\n\n![bg 70%](assets/slide{i}.jpg)" - with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f: - f.write(content) + w = 0 + slide_asset_path = os.path.abspath(os.path.join(path, "slides", "assets", "slide" + str(i) + ".jpg")) + while not (os.path.exists(os.path.join(path, "slides", "assets", "slide" + str(i) + ".jpg")) and os.path.exists(os.path.abspath(os.path.join(path, "slides", "slide" + str(i) + ".md")))): + print("Trying to download image for slide " + str(i)) + wiki_download_image(script[i]['wikimage'], slide_asset_path) + content = marp + content += "\n\n" + marp_image + content = content.replace("[imagesrc]", "assets/slide" + str(i) + ".jpg") + with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f: + f.write(content) + w += 1 elif "markdown" in script[i]: - if os.path.exists(path + "/slides/slide" + str(i) + ".md") and os.path.exists(path + "/slides/slide" + str(i) + ".png"): - #skip this slide - continue - with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f: - f.write(marp + "\n\n" + script[i]['markdown']) + while not os.path.exists(path + "/slides/slide" + str(i) + ".md"): + with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f: + f.write(marp + "\n\n" + script[i]['markdown']) elif "huge" in script[i]: - #use fit - if os.path.exists(path + "/slides/slide" + str(i) + ".md") and os.path.exists(path + "/slides/slide" + str(i) + ".png"): - #skip this slide - continue - with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f: - f.write(marp + "\n\n# " + script[i]['huge']) + while not os.path.exists(path + "/slides/slide" + str(i) + ".md"): + with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f: + f.write(marp + "\n\n# " + script[i]['huge']) else: - if os.path.exists(path + "/slides/slide" + str(i) + ".md") and os.path.exists(path + "/slides/slide" + str(i) + ".png"): - #skip this slide - continue - with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f: - f.write(marp + "\n\n") # blank slide + while not os.path.exists(path + "/slides/slide" + str(i) + ".md"): + with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f: + f.write(marp + "\n\n") # blank slide for i in range(len(script)): markdown_path = os.path.join(path, f"slides/slide{i}.md") markdown_path = os.path.abspath(markdown_path) image_path = os.path.join(path, f"slides/slide{i}.png") image_path = os.path.abspath(image_path) if not os.path.exists(image_path): - command = f'marp.exe "{markdown_path}" -o "{image_path}" --allow-local-files' + command = f'marp.exe --html "{markdown_path}" -o "{image_path}" --allow-local-files' os.system(command) return script diff --git a/generators/thumbnail.py b/generators/thumbnail.py index 101452a..8eac339 100644 --- a/generators/thumbnail.py +++ b/generators/thumbnail.py @@ -29,7 +29,6 @@ Here is the title of the video: [TITLE] Here is the description of the video: [DESCRIPTION]''' -# TODO: make jpg qith 90% quality default when generating the image to avoid having to convert it later diff --git a/prompts/marp.md b/prompts/marp.md index 4f3c028..ce4c2dc 100644 --- a/prompts/marp.md +++ b/prompts/marp.md @@ -6,28 +6,3 @@ class: - invert backgroundImage: url(https://images.unsplash.com/photo-1651604454911-fdfb0edde727) --- - - -
- -
\ No newline at end of file diff --git a/utils/wiki_downloader.py b/utils/wiki_downloader.py index 412d2c5..1018943 100644 --- a/utils/wiki_downloader.py +++ b/utils/wiki_downloader.py @@ -13,8 +13,8 @@ def download_image(query, download_path): driver = uc.Chrome(options=options) try: - driver.get(f"https://www.google.com/search?site=&tbm=isch&source=hp&biw=1873&bih=99&q=site:wikipedia.org+{query.replace(' ', '+')}") time.sleep(2) + driver.get(f"https://www.google.com/search?site=&tbm=isch&source=hp&biw=1873&bih=99&q=site:wikipedia.org+{query.replace(' ', '+')}") tos = driver.find_elements(By.CLASS_NAME, "VfPpkd-vQzf8d") for to in tos: @@ -22,14 +22,20 @@ def download_image(query, download_path): to.click() break time.sleep(1) - while True: + f = 0 + while f < 10: try: + driver.get(f"https://www.google.com/search?site=&tbm=isch&source=hp&biw=1873&bih=99&q=site:wikipedia.org+{query.replace(' ', '+')}") + image = driver.find_element(By.CLASS_NAME, "rg_i").click() + break except: - pass + f += 1 finally: time.sleep(1) + if f == 10: + raise Exception("No image found") time.sleep(5) while True: try: @@ -46,7 +52,10 @@ def download_image(query, download_path): if image.startswith("data:"): image_content = base64.b64decode(image.split(",")[1]) else: - response = requests.get(image, stream=True) + #define a common user agent for all requests + headers = {'User-Agent': 'FABLE/1.2 (Website coming soon; me@paillat.dev)'} + + response = requests.get(image, stream=True, headers=headers) response.raise_for_status() image_content = response.content