Fixed sme things and added dalle option

2026-03-03 02:44:54 +00:00 · 2023-01-20 13:30:45 +01:00
parent f4d47b1ce5
commit a33f95f024
3 changed files with 57 additions and 44 deletions
--- a/README.md
+++ b/README.md
@@ -12,13 +12,13 @@
 - We send the pdf and html files to the user

 # How to install
-**IMPORTANT** Linux installation isn't documented yet, if anyone with a nvidia gpu wants to complete the steps for linux, feel free to pull request.
+**IMPORTANT** Linux and MacOS installation isn't documented yet, if anyone wanths to complete it, feel free to do a pull request.
 ## Requirements
 - Python 3.8
 - Pip
 - A Discord bot token
 - An openai api key
- (Optional) An Nvidia GPU (for image generation)
+- (Optional) An Nvidia GPU (for local image generation)

 ## Installation
 - Clone the repository
@@ -42,7 +42,8 @@ scoop install marp
 use_images = False
 ```

-### Image generation (optional)
+## Image generation (optional)
+### With Stable Diffusion UI (powerful gpu option)
 - Install [Stable Diffusion UI](https://github.com/cmdr2/stable-diffusion-ui) and switch to the `beta` branch.
 - Copy the `./image_gen_api/main.py` file to the `stable-diffusion-ui` folder
 - Open the file called `Dev Console.cmd` in the `stable-diffusion-ui` folder and run the following commands:
@@ -50,6 +51,9 @@ use_images = False
 pip install uvicorn
 pip install fastapi
 ```
+- In the file `main.py`, at the first line, enable or disable the `sd` image generation.
+### With DALL·E 2 (costs dalle credits)
+- In the file `main.py`, at the first line, enable or disable the dalle image generation.

 # Running
 - Run the `main.py` file with :
@@ -57,7 +61,7 @@ pip install fastapi
 python main.py
 ```

-### Image generation (optional)
+### Local image generation (optional, only if you use the local image generation option)
 - Open the file called `Dev Console.cmd` in the `stable-diffusion-ui` folder and run the following commands:
 ```
 uvicorn main:app --reload
--- a/code/imagesGeneration.py
+++ b/code/imagesGeneration.py
@@ -1,7 +1,16 @@
 import requests
 import os
-async def generate(prompt,path):
-    r = requests.get(f"http://localhost:8000/generate_image?prompt={prompt}&path={path}")
-    return r.json()
-#print the current working directory
-print(os.getcwd())
+import openai
+async def generate(prompt, path, mode, openai_key):
+    #r = requests.get(f"http://localhost:8000/generate_image?prompt={prompt}&path={path}")
+    if mode == "sd": 
+        r = requests.get(f"https://localhost:8000/generate_image?prompt={prompt}&path={path}")
+        return "image generated"
+    if mode == "dalle":
+        openai.api_key = openai_key
+        img = await openai.Image.acreate(
+                prompt=prompt,
+                n=1,
+                size="1024x1024",
+            )
+        return img
--- a/code/main.py
+++ b/code/main.py
@@ -1,4 +1,7 @@
-use_images = True
+#Uncomment you prefered images generation method
+#use_images = "dalle" # generate images remotely with dalle 2 in your openai account
+use_images = "sd" # generate images locally with stable diffusion sdkit ui (instructions in the readme)
+#use_images = "No" # no images
 import openai
 # from openai import api_key
 import discord
@@ -10,10 +13,12 @@ import asyncio
 import logging
 import datetime
 import base64
-if use_images: import imagesGeneration
+import requests
+from dotenv import load_dotenv
+if use_images != "No": import imagesGeneration
 logging.basicConfig(level=logging.INFO)
 imageint = ""
-if use_images: imageint = "To add an image illustration , use ![bg left:50% 70%](a-description-of-the-image.png) at the beginning of the slide, just after "---"-It's not possible to add technical images but only illustrations. The images are generated by an ai, the name of the file should be a detailed description of the image wanted.  For example \" ![bg left:50% 100%](a-man-wearing-a hat-ryding-a-bicicle.png)\" but don't need to show a person necessairly."
+if use_images != "No": imageint = "To add an image illustration , use ![bg left:50% 70%](a-description-of-the-image.png) at the beginning of the slide, just after \"---\". Use only .png. It's not possible to add technical images but only illustrations. The images are generated by an ai, the name of the file should be a detailed description of the image wanted.  For example \" ![bg left:50% 100%](a-man-wearing-a hat-ryding-a-bicicle.png)\" but don't need to show a person necessairly."
 intstructions = f'''Here is a presentation with marp. It's not possible to make slides longer than 200 characters. to separate slides, 
 "

@@ -43,24 +48,30 @@ async def present(ctx: discord.ApplicationContext, subject: str, style: str = "d
    await ctx.defer()
    date = datetime.datetime.now()
    date = date.strftime("%Y-%m-%d-%H-%M-%S")
-    prompt = f"{intstructions} {indications} The subject of the presentation is: {subject} The Language is: {language} <|endofprompt|> \n"
-    #replace the spaces in the szbject with dashes
-    
+    marp = f'''---
+marp: true
+theme: {styles[styles.index(style)]}
+class:
+    - lead
+'''
+    if style in darkstyles: marp = marp + f"    - invert\n---"
+    else: marp = marp + "\n---"
+    prompt = f"{intstructions} {indications} The subject of the presentation is: {subject} The Language is: {language} <|endofprompt|> \n {marp}"    
    subject2 = subject
    subject = subject.replace(" ", "-")
    #we save teh subject in base64 in a variable
    b64 = base64.urlsafe_b64encode(subject.encode("utf-8"))
    #if dosen't exist, create a directory called "userid" where the userid is the id of the user who called the command
    uid = str(ctx.author.id)
-    if not os.path.exists("data/"+uid):
-        os.mkdir("data/"+uid)
+    if not os.path.exists("./data/"+uid):
+        os.mkdir("./data/"+uid)
    datenow = datetime.datetime.now()
    datenow = datenow.strftime("%Y-%m-%d-%H-%M-%S")
-    os.mkdir(f"data/{uid}/{b64}{datenow}")
+    os.mkdir(f"./data/{uid}/{b64}{datenow}")
    response = await openai.Completion.acreate(
        engine="text-davinci-003",
        prompt=prompt,
-        temperature=0.7,
+        temperature=0.6,
        max_tokens=1024,
        top_p=1,
        frequency_penalty=0,
@@ -69,19 +80,6 @@ async def present(ctx: discord.ApplicationContext, subject: str, style: str = "d
    )
    #we save the output in a variable
    output = response["choices"][0]["text"]
-    #if the output dosent start with --- or with \n--- or with \n\n--- we add it at the beginning of the output
-
-    #we add the marp header
-    marp = f'''---
-marp: true
-theme: {styles[styles.index(style)]}
-class:
-    - lead
-'''
-    if style in darkstyles:
-        marp = marp + f"    - invert\n"
-#    if not output.startswith("---") and not output.startswith("\n---") and not output.startswith("\n\n---"):
-#        output = "---\n" + output
    present = marp + output
    ##we save the output in a file called "subject.md"
    matches = re.finditer(r'!\[.*?\]\((.*?)\)', present)
@@ -89,23 +87,25 @@ class:
    for match in matches:
        image_filenames.append(match.group(1))
    #we create a text file with the image names and a md file for the presentation with utf8 encoding
-    with open(f"./data/{uid}/{b64}{datenow}/{subject}.md", "w", encoding="utf8") as f:
-        f.write(present)
    with open(f"./data/{uid}/{b64}{datenow}/{subject}-images.txt", "w", encoding="utf8") as f:
        for image in image_filenames:
            f.write(image + "\n")
    #now we generate the images, if there are any
-    if len(image_filenames) > 0 and use_images:
+    if len(image_filenames) > 0 and  use_images!="no":
        #now we first remove the extension from the image filenames by removing the last 4 characters
        image_filenames = [image[:-4] for image in image_filenames]
        print(image_filenames)
        for images in image_filenames:
            #we download the image
-            await imagesGeneration.generate(images, f"{os.getcwd()}\\data\\{uid}\\{b64}{datenow}\\")
-            #now we rename the image to remove the _0 from the end of the filename
-            os.rename(f"{os.getcwd()}\\data\\{uid}\\{b64}{datenow}\\{images}_0.png", f"{os.getcwd()}\\data\\{uid}\\{b64}{datenow}\\{images}.png")
-            #now we whait 10 seconds for discord to resume the websocket connection
-            # await asyncio.sleep(10)
+            print ("generating image" + images)
+            r = await imagesGeneration.generate(images, f"{os.getcwd()}\\data\\{uid}\\{b64}{datenow}\\", use_images, apikey)
+            if use_images == "sd": os.rename(f"{os.getcwd()}\\.\\data\\{uid}\\{b64}{datenow}\\{images}_0.png", f"{os.getcwd()}\\data\\{uid}\\{b64}{datenow}\\{images}.png")
+            if use_images == "dalle": 
+                image_url = r['data'][0]['url']
+                img_data = requests.get(image_url).content
+                with open(f'./data/{uid}/{b64}{datenow}/{images}.png', 'wb') as handler:
+                    handler.write(img_data)
+    with open(f"./data/{uid}/{b64}{datenow}/{subject}.md", "w", encoding="utf8") as f: f.write(present)
    #we execute the command to convert the markdown file to a pdf and html file and also generate the first slide image
    cmd = f"marp --pdf --allow-local-files ./data/{uid}/{b64}{datenow}/{subject}.md"
    os.system(cmd)
@@ -162,9 +162,9 @@ async def on_ready():
    if not os.path.exists("data"):
        os.mkdir("data")
 #get the openai key drom he key.env file
-with open("key.env", "r") as f:
-    apikey = f.read()
+load_dotenv()
+token = os.getenv("TOKEN")
+apikey = os.getenv("OPENAI")
 openai.api_key = apikey
-with open("token.env", "r") as f:
-    token = f.read()
+print(token)
 bot.run(token)