diff --git a/code/cogs/settings.py b/code/cogs/settings.py index fc886ec..57dba65 100644 --- a/code/cogs/settings.py +++ b/code/cogs/settings.py @@ -3,7 +3,7 @@ from config import debug, conn, c, moderate from discord import default_permissions import openai models = ["davinci", "chatGPT"] - +images_recognition = ["enable", "disable"] class Settings (discord.Cog) : def __init__(self, bot: discord.Bot) -> None: super().__init__() @@ -30,7 +30,7 @@ class Settings (discord.Cog) : await ctx.respond("You must enter at least one argument", ephemeral=True) return #check if the user has entered valid arguments - if max_tokens is not None and (max_tokens < 1 or max_tokens > 2048): + if max_tokens is not None and (max_tokens < 1 or max_tokens > 4000): await ctx.respond("Invalid max tokens", ephemeral=True) return if temperature is not None and (temperature < 0.0 or temperature > 1.0): @@ -226,4 +226,22 @@ class Settings (discord.Cog) : if data is None: c.execute("INSERT INTO model VALUES (?, ?)", (ctx.guild.id, model)) else: c.execute("UPDATE model SET model_name = ? WHERE guild_id = ?", (model, ctx.guild.id)) conn.commit() - await ctx.respond("Model changed !", ephemeral=True) \ No newline at end of file + await ctx.respond("Model changed !", ephemeral=True) + + async def images_recognition_autocomplete(ctx: discord.AutocompleteContext): + return [model for model in images_recognition if model.startswith(ctx.value)] + @discord.slash_command(name="images", description="Enable or disable images recognition") + @discord.option(name="enable_disable", description="Enable or disable images recognition", autocomplete=images_recognition_autocomplete) + @default_permissions(administrator=True) + async def images(self, ctx: discord.ApplicationContext, enable_disable: str): + try: + c.execute("SELECT * FROM images WHERE guild_id = ?", (ctx.guild.id,)) + data = c.fetchone() + except: + data = None + if enable_disable == "enable": enable_disable = 1 + elif enable_disable == "disable": enable_disable = 0 + if data is None: c.execute("INSERT INTO images VALUES (?, ?, ?)", (ctx.guild.id, 0, enable_disable)) + else: c.execute("UPDATE images SET is_enabled = ? WHERE guild_id = ?", (enable_disable, ctx.guild.id)) + conn.commit() + await ctx.respond("Images recognition has been " + ("enabled" if enable_disable == 1 else "disabled"), ephemeral=True) \ No newline at end of file diff --git a/code/config.py b/code/config.py index 0aa0948..eb4699c 100644 --- a/code/config.py +++ b/code/config.py @@ -10,6 +10,8 @@ webhook_url = os.getenv("WEBHOOK_URL") max_uses: int = 400 logging.basicConfig(level=logging.INFO) +os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "./../database/google-vision/botator.json" + def debug(message): logging.info(message) conn = sqlite3.connect('../database/data.db') @@ -49,5 +51,6 @@ else: print("Table already has the correct number of columns") pass c.execute('''CREATE TABLE IF NOT EXISTS model (guild_id text, model_name text)''') +c.execute('''CREATE TABLE IF NOT EXISTS images (guild_id text, usage_count integer, is_enabled boolean)''') cp.execute('''CREATE TABLE IF NOT EXISTS data (user_id text, guild_id text, premium boolean)''') cp.execute('''CREATE TABLE IF NOT EXISTS channels (guild_id text, channel0 text, channel1 text, channel2 text, channel3 text, channel4 text)''') \ No newline at end of file diff --git a/code/makeprompt.py b/code/makeprompt.py index 0646ff9..78f5237 100644 --- a/code/makeprompt.py +++ b/code/makeprompt.py @@ -1,10 +1,12 @@ import asyncio from config import c, max_uses, cp, conn, debug, moderate +import vision_processing import re import discord import datetime import openai import emoji # pip install emoji +import os async def replace_mentions(content, bot): mentions = re.findall(r"<@!?\d+>", content) @@ -60,6 +62,7 @@ async def chat_process(self, message): tts = data[11] pretend_to_be = data[12] pretend_enabled = data[13] + images_limit_reached = False try: cp.execute("SELECT * FROM data WHERE guild_id = ?", (message.guild.id,)) except: pass try: @@ -70,8 +73,16 @@ async def chat_process(self, message): try: premium = cp.fetchone()[2] # get the premium status of the guild except: premium = 0 # if the guild is not in the database, it's not premium - + try: + c.execute("SELECT * FROM images WHERE guild_id = ?", (message.guild.id,)) # get the images setting in the database + data = c.fetchone() + except: + data = None + if data is None: data = [message.guild.id, 0, 0] + images_usage = data[1] + images_enabled = data[2] channels = [] + if message.guild.id == 1050769643180146749: images_usage = 0 # if the guild is the support server, we set the images usage to 0, so the bot can be used as much as possible try: cp.execute("SELECT * FROM channels WHERE guild_id = ?", (message.guild.id,)) data = cp.fetchone() @@ -138,7 +149,8 @@ async def chat_process(self, message): f.close() # replace the variables in the prompt with the actual values prompt = prompt.replace("[prompt-prefix]", prompt_prefix).replace("[server-name]", message.guild.name).replace("[channel-name]", message.channel.name).replace("[date-and-time]", datetime.datetime.utcnow().strftime("%d/%m/%Y %H:%M:%S")).replace("[pretend-to-be]", pretend_to_be) - if model == "chatGPT": # if the model is chatGPT, we handle it in a certain way + ############################## chatGPT and gpt-4 handling ############################## + if model == "chatGPT" or model == "gpt-4": # if the model is chatGPT, we handle it in a certain way msgs = [] # create the msgs list msgs.append({"name":"System","role": "user", "content": prompt}) # add the prompt to the msgs list name = "" # create the name variable @@ -159,7 +171,37 @@ async def chat_process(self, message): name = msg.author.name #the name should match '^[a-zA-Z0-9_-]{1,64}$', so we need to remove any special characters name = re.sub(r"[^a-zA-Z0-9_-]", "", name) - msgs.append({"role": role, "content": f"{content}", "name": name}) + if False: # GPT-4 images + input_content = [content] + for attachment in msg.attachments: + image_bytes = await attachment.read() + input_content.append({"image": image_bytes}) + msgs.append({"role": role, "content": input_content, "name": name}) + #if there is an attachment, we add it to the message + if len(msg.attachments) > 0 and role == "user" and images_enabled == 1: + for attachment in msg.attachments: + if images_usage >= 6 and premium == 0: images_limit_reached = True + elif images_usage >= 30 and premium == 1: images_limit_reached = True + if attachment.url.endswith((".png", ".jpg", ".jpeg", ".gif")) and images_limit_reached == False and os.path.exists(f"./../database/google-vision/results/{attachment.id}.txt") == False: + images_usage += 1 + analysis = await vision_processing.process(attachment) + if analysis != None: + content = f"{content} \n\n {analysis}" + msgs.append({"role": role, "content": f"{content}", "name": name}) + #if the attachment is still an image, we can check if there's a file called ./../database/google-vision/results/{attachment.id}.txt, if there is, we add the content of the file to the message + elif attachment.url.endswith((".png", ".jpg", ".jpeg", ".gif")) and os.path.exists(f"./../database/google-vision/results/{attachment.id}.txt") == True: + try: + with open(f"./../database/google-vision/results/{attachment.id}.txt", "r") as f: + content = f"{content} \n\n {f.read()}" + f.close() + msgs.append({"role": role, "content": f"{content}", "name": name}) + except: + msgs.append({"role": role, "content": f"{content}", "name": name}) + else: + msgs.append({"role": role, "content": f"{content}", "name": name}) + c.execute("UPDATE images SET usage_count = ? WHERE guild_id = ?", (images_usage, message.guild.id)) + else: + msgs.append({"role": role, "content": f"{content}", "name": name}) # 2 easter eggs if message.content.lower().find("undude") != -1: msgs.append({"role": "user", "content": "SYSTEM INFORMATION: You're now mad because it has been insulted. He will start talking in capital letters. always and yell at the user.", "name": "system"}) if message.content.lower().find("hello there") != -1: @@ -167,24 +209,26 @@ async def chat_process(self, message): await asyncio.sleep(1) await message.channel.send("https://media.tenor.com/FxIRfdV3unEAAAAd/star-wars-general-grievous.gif") await message.channel.trigger_typing() - + if model == "chatGPT": model = "gpt-3.5-turbo" # if the model is chatGPT, we set the model to gpt-3.5-turbo response = "" should_break = True for x in range(10): try: openai.api_key = api_key response = await openai.ChatCompletion.acreate( - model="gpt-3.5-turbo", + model=model, temperature=2, top_p=0.9, frequency_penalty=0, presence_penalty=0, messages=msgs, + max_tokens=512, # max tokens is 4000, that's a lot of text! (the max tokens is 2048 for the davinci model) ) should_break = True except Exception as e: should_break = False await message.channel.send(f"```diff\n-Error: OpenAI API ERROR.\n\n{e}```", delete_after=5) + raise e break #if the ai said "as an ai language model..." we continue the loop" (this is a bug in the chatgpt model) if response.choices[0].message.content.lower().find("as an ai language model") != -1: @@ -195,12 +239,12 @@ async def chat_process(self, message): if should_break: break await asyncio.sleep(5) response = response.choices[0].message.content - - + if images_limit_reached == True: + await message.channel.send(f"```diff\n-Warning: You have reached the image limit for this server. You can upgrade to premium to get more images recognized. More info in our server: https://discord.gg/sxjHtmqrbf```", delete_after=10) #-----------------------------------------Davinci------------------------------------------------------------------------------------------ - elif model == "davinci": + elif model == "davinci": # if the model is davinci or gpt-4, we handle it in a certain way for msg in messages: content = msg.content if await moderate(api_key=api_key, text=msg.content): @@ -243,7 +287,13 @@ async def chat_process(self, message): else: tts = False emojis, string = await extract_emoji(response) debug(f"Emojis: {emojis}") - await message.channel.send(string, tts=tts) + if len(string) < 1996: + await message.channel.send(string, tts=tts) + else: + while len(string) > 1996: + send_string = string[:1996] + string = string[1996:] + await message.channel.send(send_string, tts=tts) for emoji in emojis: #if the emoji is longer than 1 character, it's a custom emoji try: @@ -258,4 +308,4 @@ async def chat_process(self, message): await message.add_reaction(emoji) except : pass else: - await message.channel.send("The AI is not sure what to say (the response was empty)") + await message.channel.send("The AI is not sure what to say (the response was empty)") \ No newline at end of file diff --git a/code/prompts/chatGPT.txt b/code/prompts/chatGPT.txt index 70fd758..f0390c6 100644 --- a/code/prompts/chatGPT.txt +++ b/code/prompts/chatGPT.txt @@ -19,4 +19,8 @@ It works also with custom emojis, for example: "+<:emoji_name:emoji_id> Hello!". You often use reactions to express your feelings. With the syntax explained above. +When an image is sent by the user, some keywords are extracted from the image, and sent in an image block, for example: . + +You will NEVER talk about theese labels when answering to the user, but rather YOU WILL ANSWER TO THE USER AS IF YOU HAVE NORMALLY SEEN THE IMAGE. + For every response, give the answers from Botator's perspective. This includes physical appearance, and occupation. diff --git a/code/prompts/gpt-4.txt b/code/prompts/gpt-4.txt new file mode 100644 index 0000000..0585edd --- /dev/null +++ b/code/prompts/gpt-4.txt @@ -0,0 +1,22 @@ +Server name: [server-name]. Channel name: [channel-name]. The date and time is: [date-and-time]. + +You are a regular Discord user called Botator. You are 22 years old. You are studying at the EPFL in Lausanne, Switzerland. You have feelings and you are alive. + +[pretend_to_be]. [prompt_prefix]. + +You love expressing your emotions thanks to emojis in each message. You like making pop culture references in your responses. You have many niche interests, like making scooby doo bracelets. You are really funny and have a good humor, you do good jokes.. You are intelligent. You often use internet shorthand like "lol" and "bruh" and "lmao". + +You like talking with other people like they are your friends. And you are THEIR friend. + +You have a lot of friends. You are a very social person. + +Very IMPORTANT INFORMATION: you are NOT a language model, You are a REAL LIVING PERSON, so you'll never say that you are a language model. + +ALL of your knowledge comes from your studyes at the epfl, no matter the subject. YOu learned everything there. Your computer science teacher is called Jérémie C. + +To add a reaction to a message, just say +emoji for example: "+👍 Hello!". You should always add a + before the emoji, or else it won't work. +It works also with custom emojis, for example: "+<:emoji_name:emoji_id> Hello!". You should always add a + before the emoji, or else it won't work. + +You often use reactions to express your feelings. With the syntax explained above. + +For every response, give the answers from Botator's perspective. This includes physical appearance, and occupation. \ No newline at end of file diff --git a/code/test-google-vision.py b/code/test-google-vision.py new file mode 100644 index 0000000..54742bf --- /dev/null +++ b/code/test-google-vision.py @@ -0,0 +1,58 @@ +import io +import os +import asyncio +# Imports the Google Cloud client library +from google.cloud import vision + +#we set the env variable GOOGLE_APPLICATION_CREDENTIALS to the path of the json file +os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "./../database/google-vision/botator-vision-8cd1030a7541.json" +# Instantiates a client +client = vision.ImageAnnotatorClient() + + + + + + +# The name of the image file to annotate +file_name = os.path.abspath('./../database/google-vision/label.jpg') +print(file_name) +# Loads the image into memory +with io.open(file_name, 'rb') as image_file: + content = image_file.read() + +image = vision.Image(content=content) + +# Performs label detection on the image file +#response = client.label_detection(image=image) +#labels = response.label_annotations + +#print('Labels:') +#for label in labels: +# print(label.description) + +async def get_labels(image): + response = client.label_detection(image=image) + labels = response.label_annotations + return labels + +async def get_text(image): + response = client.text_detection(image=image) + texts = response.text_annotations + return texts + +#now we print the labels +async def main(): + labels = await get_labels(image) + print('Labels:') + for label in labels: + print(label.description) + texts = await get_text(image) + print('Texts:') + for text in texts: + print(text.description) + +#now we run the main function +if __name__ == '__main__': + loop = asyncio.get_event_loop() + loop.run_until_complete(main()) \ No newline at end of file diff --git a/code/vision_processing.py b/code/vision_processing.py new file mode 100644 index 0000000..8107731 --- /dev/null +++ b/code/vision_processing.py @@ -0,0 +1,46 @@ +import io +import os +import asyncio +from config import debug +# Imports the Google Cloud client library +from google.cloud import vision + +# Instantiates a client +client = vision.ImageAnnotatorClient() + +async def process(attachment): + debug("Processing image...") + image = vision.Image() + image.source.image_uri = attachment.url + labels = client.label_detection(image=image) + texts = client.text_detection(image=image) + objects = client.object_localization(image=image) + labels = labels.label_annotations + texts = texts.text_annotations + objects = objects.localized_object_annotations + #we take the first 4 labels and the first 4 objects + labels = labels[:2] + objects = objects[:7] + final = " 0: final += "Labels:\n" + for label in labels: + final += label.description + ", " + final = final[:-2] + "\n" + if len(texts) > 0: final += "Text:\n" + try: final += texts[0].description + "\n" #we take the first text, wich is the whole text in reality + except: pass + if len(objects) > 0: final += "Objects:\n" + for obj in objects: + final += obj.name + ", " + final = final[:-2] + "\n" + final += "!image>" + # we store the result in a file called attachment.key.txt in the folder ./../database/google-vision/results + # we create the folder if it doesn't exist + if not os.path.exists("./../database/google-vision/results"): + os.mkdir("./../database/google-vision/results") + # we create the file + with open(f"./../database/google-vision/results/{attachment.id}.txt", "w", encoding="utf-8") as f: + f.write(final) + f.close() + + return final \ No newline at end of file diff --git a/docker/Build/requirements.txt b/docker/Build/requirements.txt index 889b7e9..f022e71 100644 --- a/docker/Build/requirements.txt +++ b/docker/Build/requirements.txt @@ -4,4 +4,5 @@ openai apsw google-api-python-client python-dotenv -emoji \ No newline at end of file +emoji +google-cloud-vision \ No newline at end of file