From a82c6446bac9a31d4add623b84d59f199bdbd6ac Mon Sep 17 00:00:00 2001
From: Paillat <git@electronique.cc>
Date: Sat, 8 Jul 2023 20:58:45 +0200
Subject: [PATCH] feat(FABLE.cmd): add FABLE.cmd script to run FABLE.ps1 and
 pause execution feat(FABLE.ps1): add script to activate environment and run
 main.py fix(montage.py): fix issue with downloading images and creating slide
 assets fix(montage.py): fix issue with generating markdown slides
 fix(montage.py): fix issue with generating huge slides fix(montage.py): fix
 issue with generating blank slides fix(thumbnail.py): remove TODO comment
 fix(marp.md): remove unused CSS styles fix(wiki_downloader.py): add user
 agent header to requests

---
 FABLE.cmd                |   4 ++
 FABLE.ps1                |   7 +++
 generators/montage.py    | 119 ++++++++++++++++++++++-----------------
 generators/thumbnail.py  |   1 -
 prompts/marp.md          |  25 --------
 utils/wiki_downloader.py |  17 ++++--
 6 files changed, 92 insertions(+), 81 deletions(-)
 create mode 100644 FABLE.cmd
 create mode 100644 FABLE.ps1
diff --git a/FABLE.cmd b/FABLE.cmd
new file mode 100644
index 0000000..04d734e
--- /dev/null
+++ b/FABLE.cmd
@@ -0,0 +1,4 @@
+powershell ./FABLE.ps1
+
+pause
+echo HEHE
\ No newline at end of file
diff --git a/FABLE.ps1 b/FABLE.ps1
new file mode 100644
index 0000000..46ede22
--- /dev/null
+++ b/FABLE.ps1
@@ -0,0 +1,7 @@
+d:/09._AI_projects/FABLE/youtuber/Scripts/Activate.ps1
+
+python main.py
+
+pause
+
+echo This line will be executed after you press any key.
diff --git a/generators/montage.py b/generators/montage.py
index 3547e72..a50e2df 100644
--- a/generators/montage.py
+++ b/generators/montage.py
@@ -17,6 +17,34 @@ if not unsplash_access:
     raise Exception("UNSPLASH_ACCESS_KEY is not set in .env file")
 unsplash_url = "https://source.unsplash.com/random/?"
 
+
+marp_image = """
+<style>
+    section {
+        display: flex;
+        justify-content: center;
+        align-items: center;
+    }
+
+    .image-container {
+        width: 90%;
+        max-height: 90%;
+        display: flex;
+        justify-content: center;
+        align-items: center;
+    }
+
+    .image-container img {
+        object-fit: contain;
+        width: 100%;
+        height: 100%;
+    }
+</style>
+
+<div class="image-container">
+    <img src="[imagesrc]"/>
+</div>
+"""
 async def prepare(path):
     with open(os.path.join(path, "script.json"), 'r', encoding='utf-8') as f:
         script = json.load(f)
@@ -35,72 +63,61 @@ async def prepare(path):
                     generator = VoiceGenerator(speaker=choosen_voice)
             print("Generating audio for slide " + str(i))
             generator.generate_voice(audio_path, script[i]['spoken'])
+        if os.path.exists(os.path.join(path, "slides", "slide" + str(i) + ".md")):
+            #skip this slide
+            #continue
+            # TODO: Do not skip for now, add support for also checking for assets
+            pass
         if "image" in script[i]:
-            if os.path.exists(os.path.join(path, "slides", "slide" + str(i) + ".md")) and os.path.exists(os.path.join(path, "slides", "slide" + str(i) + ".png")):
-                #skip this slide
-                continue
             if not os.path.exists(path + "/slides/assets"):
                 os.mkdir(path + "/slides/assets")
-            url= unsplash_url + script[i]['image'].replace("+", ",")
-            #r = requests.get(url)
-            #real_url = r.json()['urls']['raw']
-            real_url = url
-            with open(path + "/slides/assets/slide" + str(i) + ".jpg", 'wb') as f:
-                f.write(requests.get(real_url, allow_redirects=True).content)
-                f.close()
-            content = marp.replace("[imagesrc]", "assets/slide" + str(i) + ".jpg")
-            with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f:
-                f.write(content)
+            slide_asset_path = os.path.abspath(os.path.join(path, "slides", "assets", "slide" + str(i) + ".jpg"))
+            w = 0
+            while (not os.path.exists(slide_asset_path) or w < 5) and not os.path.exists(path + "/slides/slide" + str(i) + ".md"):
+                url= unsplash_url + script[i]['image'].replace("+", ",")
+                real_url = url
+                with open(slide_asset_path, 'wb') as f:
+                    f.write(requests.get(real_url, allow_redirects=True).content)
+                    f.close()
+                content = marp
+                content += "\n\n" + marp_image
+                content = content.replace("[imagesrc]", "assets/slide" + str(i) + ".jpg")
+                with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f:
+                    f.write(content)
+                w += 1
         elif "wikimage" in script[i]:
-            if os.path.exists(os.path.join(path, "slides", "slide" + str(i) + ".md")) and os.path.exists(os.path.join(path, "slides", "slide" + str(i) + ".png")):
-                #skip this slide
-                continue
             if not os.path.exists(path + "/slides/assets"):
                 os.mkdir(path + "/slides/assets")
-            r = 0
-            while True:
-                try: 
-                    print("Trying to download image for slide " + str(i))
-                    wiki_download_image(script[i]['wikimage'], os.path.abspath(os.path.join(path, "slides", "assets", "slide" + str(i) + ".jpg")))
-                    print("Downloaded image for slide with wikiimage " + str(i))
-                    if not os.path.exists(os.path.join(path, "slides", "assets", "slide" + str(i) + ".jpg")):
-                        raise FileNotFoundError
-                    else:
-                        break
-                except:
-                    r += 1
-                    if r > 5:
-                        break
-                    continue
-            content = marp + f"\n\n![bg 70%](assets/slide{i}.jpg)"
-            with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f:
-                f.write(content)
+            w = 0
+            slide_asset_path = os.path.abspath(os.path.join(path, "slides", "assets", "slide" + str(i) + ".jpg"))
+            while not (os.path.exists(os.path.join(path, "slides", "assets", "slide" + str(i) + ".jpg")) and os.path.exists(os.path.abspath(os.path.join(path, "slides", "slide" + str(i) + ".md")))):
+                print("Trying to download image for slide " + str(i))
+                wiki_download_image(script[i]['wikimage'], slide_asset_path)
+                content = marp
+                content += "\n\n" + marp_image
+                content = content.replace("[imagesrc]", "assets/slide" + str(i) + ".jpg")
+                with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f:
+                    f.write(content)
+                w += 1
         elif "markdown" in script[i]:
-            if os.path.exists(path + "/slides/slide" + str(i) + ".md") and os.path.exists(path + "/slides/slide" + str(i) + ".png"):
-                #skip this slide
-                continue
-            with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f:
-                f.write(marp + "\n\n" + script[i]['markdown'])
+            while not os.path.exists(path + "/slides/slide" + str(i) + ".md"):
+                with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f:
+                    f.write(marp + "\n\n" + script[i]['markdown'])
         elif "huge" in script[i]:
-            #use fit
-            if os.path.exists(path + "/slides/slide" + str(i) + ".md") and os.path.exists(path + "/slides/slide" + str(i) + ".png"):
-                #skip this slide
-                continue
-            with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f:
-                f.write(marp + "\n\n# <!-- fit --> " + script[i]['huge'])
+            while not os.path.exists(path + "/slides/slide" + str(i) + ".md"):
+                with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f:
+                    f.write(marp + "\n\n# <!-- fit --> " + script[i]['huge'])
         else:
-            if os.path.exists(path + "/slides/slide" + str(i) + ".md") and os.path.exists(path + "/slides/slide" + str(i) + ".png"):
-                #skip this slide
-                continue
-            with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f:
-                f.write(marp + "\n\n") # blank slide
+            while not os.path.exists(path + "/slides/slide" + str(i) + ".md"):
+                with open(path + "/slides/slide" + str(i) + ".md", 'w', encoding='utf-8') as f:
+                    f.write(marp + "\n\n") # blank slide
     for i in range(len(script)):
         markdown_path = os.path.join(path, f"slides/slide{i}.md")
         markdown_path = os.path.abspath(markdown_path)
         image_path = os.path.join(path, f"slides/slide{i}.png")
         image_path = os.path.abspath(image_path)
         if not os.path.exists(image_path):
-            command = f'marp.exe "{markdown_path}" -o "{image_path}" --allow-local-files'
+            command = f'marp.exe --html "{markdown_path}" -o "{image_path}" --allow-local-files'
             os.system(command)
     return script
 
diff --git a/generators/thumbnail.py b/generators/thumbnail.py
index 101452a..8eac339 100644
--- a/generators/thumbnail.py
+++ b/generators/thumbnail.py
@@ -29,7 +29,6 @@ Here is the title of the video: [TITLE]
 Here is the description of the video: [DESCRIPTION]'''
 
 
-# TODO: make jpg qith 90% quality default when generating the image to avoid having to convert it later
 
 
 
diff --git a/prompts/marp.md b/prompts/marp.md
index 4f3c028..ce4c2dc 100644
--- a/prompts/marp.md
+++ b/prompts/marp.md
@@ -6,28 +6,3 @@ class:
     - invert
 backgroundImage: url(https://images.unsplash.com/photo-1651604454911-fdfb0edde727)
 ---
-<style>
-    section {
-        display: flex;
-        justify-content: center;
-        align-items: center;
-    }
-
-    .image-container {
-        width: 90%;
-        max-height: 90%;
-        display: flex;
-        justify-content: center;
-        align-items: center;
-    }
-
-    .image-container img {
-        object-fit: contain;
-        width: 100%;
-        height: 100%;
-    }
-</style>
-
-<div class="image-container">
-    <img src="[imagesrc]"/>
-</div>
\ No newline at end of file
diff --git a/utils/wiki_downloader.py b/utils/wiki_downloader.py
index 412d2c5..1018943 100644
--- a/utils/wiki_downloader.py
+++ b/utils/wiki_downloader.py
@@ -13,8 +13,8 @@ def download_image(query, download_path):
     driver = uc.Chrome(options=options)
 
     try:
-        driver.get(f"https://www.google.com/search?site=&tbm=isch&source=hp&biw=1873&bih=99&q=site:wikipedia.org+{query.replace(' ', '+')}")
         time.sleep(2)
+        driver.get(f"https://www.google.com/search?site=&tbm=isch&source=hp&biw=1873&bih=99&q=site:wikipedia.org+{query.replace(' ', '+')}")
 
         tos = driver.find_elements(By.CLASS_NAME, "VfPpkd-vQzf8d")
         for to in tos:
@@ -22,14 +22,20 @@ def download_image(query, download_path):
                 to.click()
                 break
         time.sleep(1)
-        while True:
+        f = 0
+        while f < 10:
             try:
+                driver.get(f"https://www.google.com/search?site=&tbm=isch&source=hp&biw=1873&bih=99&q=site:wikipedia.org+{query.replace(' ', '+')}")
+
                 image = driver.find_element(By.CLASS_NAME, "rg_i").click()
+                
                 break
             except:
-                pass
+                f += 1
             finally:
                 time.sleep(1)
+        if f == 10:
+                raise Exception("No image found")
         time.sleep(5)
         while True:
             try:
@@ -46,7 +52,10 @@ def download_image(query, download_path):
         if image.startswith("data:"):
             image_content = base64.b64decode(image.split(",")[1])
         else:
-            response = requests.get(image, stream=True)
+            #define a common user agent for all requests
+            headers = {'User-Agent': 'FABLE/1.2 (Website coming soon; me@paillat.dev)'}
+
+            response = requests.get(image, stream=True, headers=headers)
             response.raise_for_status()
             image_content = response.content