🚀 Maaany things

This commit is contained in:
2024-02-15 14:11:16 +01:00
parent 57bcf0af8e
commit 5be7937ffa
7 changed files with 272 additions and 50 deletions

View File

@@ -1,66 +1,174 @@
absl-py==2.1.0
aiofiles==23.2.1
aiohttp==3.9.3
aiosignal==1.3.1
altair==5.2.0
annotated-types==0.6.0
anyascii==0.3.2
anyio==4.2.0
async-timeout==4.0.3
attrs==23.2.0
audioread==3.0.1
Babel==2.14.0
bangla==0.0.2
blinker==1.7.0
blis==0.7.11
bnnumerizer==0.0.2
bnunicodenormalizer==0.1.6
catalogue==2.0.10
certifi==2024.2.2
cffi==1.16.0
charset-normalizer==3.3.2
click==8.1.7
cloudpathlib==0.16.0
colorama==0.4.6
confection==0.1.4
contourpy==1.2.0
coqpit==0.0.17
cycler==0.12.1
cymem==2.0.8
Cython==3.0.8
dateparser==1.1.8
decorator==4.4.2
distro==1.9.0
docopt==0.6.2
dtw-python==1.3.1
einops==0.7.0
encodec==0.1.1
exceptiongroup==1.2.0
fastapi==0.109.2
ffmpy==0.3.2
filelock==3.13.1
Flask==3.0.2
fonttools==4.48.1
frozenlist==1.4.1
fsspec==2024.2.0
gradio==4.18.0
g2pkk==0.1.2
gradio==4.19.0
gradio_client==0.10.0
grpcio==1.60.1
gruut==2.2.3
gruut-ipa==0.13.0
gruut-lang-de==2.0.0
gruut-lang-en==2.0.0
gruut-lang-es==2.0.0
gruut-lang-fr==2.0.2
h11==0.14.0
httpcore==1.0.2
hangul-romanize==0.1.0
httpcore==1.0.3
httpx==0.26.0
huggingface-hub==0.20.3
idna==3.6
imageio==2.34.0
imageio-ffmpeg==0.4.9
importlib-resources==6.1.1
inflect==7.0.0
itsdangerous==2.1.2
jamo==0.4.1
jieba==0.42.1
Jinja2==3.1.3
joblib==1.3.2
jsonlines==1.2.0
jsonschema==4.21.1
jsonschema-specifications==2023.12.1
kiwisolver==1.4.5
langcodes==3.3.0
lazy_loader==0.3
librosa==0.10.0
llvmlite==0.42.0
Markdown==3.5.2
markdown-it-py==3.0.0
MarkupSafe==2.1.5
matplotlib==3.8.2
matplotlib==3.8.3
mdurl==0.1.2
numpy==1.26.4
orjson==3.9.13
more-itertools==10.2.0
moviepy==1.0.3
mpmath==1.3.0
msgpack==1.0.7
multidict==6.0.5
murmurhash==1.0.10
networkx==2.8.8
nltk==3.8.1
num2words==0.5.13
numba==0.59.0
numpy==1.22.0
openai==1.12.0
openai-whisper==20231117
orjson==3.9.14
packaging==23.2
pandas==2.2.0
pandas==1.5.3
pillow==10.2.0
platformdirs==4.2.0
pooch==1.8.0
preshed==3.0.9
proglog==0.1.10
protobuf==4.25.2
psutil==5.9.8
pycparser==2.21
pydantic==2.6.1
pydantic_core==2.16.2
pydub==0.25.1
Pygments==2.17.2
pynndescent==0.5.11
pyparsing==3.1.1
pypinyin==0.50.0
pysbd==0.3.4
python-crfsuite==0.9.10
python-dateutil==2.8.2
python-multipart==0.0.9
pytz==2024.1
PyYAML==6.0.1
referencing==0.33.0
regex==2023.12.25
requests==2.31.0
rich==13.7.0
rpds-py==0.17.1
rpds-py==0.18.0
ruff==0.2.1
safetensors==0.4.2
scikit-learn==1.4.0
scipy==1.11.4
semantic-version==2.10.0
shellingham==1.5.4
six==1.16.0
smart-open==6.4.0
sniffio==1.3.0
soundfile==0.12.1
soxr==0.3.7
spacy==3.7.2
spacy-legacy==3.0.12
spacy-loggers==1.0.5
srsly==2.4.8
starlette==0.36.3
SudachiDict-core==20240109
SudachiPy==0.6.8
sympy==1.12
tensorboard==2.16.1
tensorboard-data-server==0.7.2
tf-keras==2.15.0
thinc==8.2.3
threadpoolctl==3.3.0
tiktoken==0.6.0
tokenizers==0.15.2
tomlkit==0.12.0
toolz==0.12.1
torch==2.2.0+cu118
torchaudio==2.2.0+cu118
torchvision==0.17.0+cu118
tqdm==4.66.2
trainer==0.0.36
transformers==4.37.2
TTS==0.22.0
typer==0.9.0
typing_extensions==4.9.0
tzdata==2024.1
tzlocal==5.2
umap-learn==0.5.5
Unidecode==1.3.8
urllib3==2.2.0
uvicorn==0.27.1
wasabi==1.1.2
weasel==0.3.4
websockets==11.0.3
TTS
Werkzeug==3.0.1
whisper-timestamped==1.14.4
yarl==1.9.4

View File

@@ -0,0 +1,57 @@
import anthropic
import gradio as gr
import orjson
from .BaseLLMEngine import BaseLLMEngine
# Assuming these are the models supported by Anthropics that you wish to include
ANTHROPIC_POSSIBLE_MODELS = [
"claude-2.1",
# Add more models as needed
]
class AnthropicsLLMEngine(BaseLLMEngine):
num_options = 1
name = "Anthropics"
description = "Anthropics language model engine."
def __init__(self, options: list) -> None:
self.model = options[0]
self.client = anthropic.Anthropic(api_key="YourAnthropicAPIKeyHere") # Ensure API key is securely managed
super().__init__()
def generate(self, system_prompt: str, chat_prompt: str, max_tokens: int = 1024, temperature: float = 1.0, json_mode: bool = False, top_p: float = 1, frequency_penalty: float = 0, presence_penalty: float = 0) -> str | dict:
# Note: Adjust the parameters as per Anthropics API capabilities
prompt = f"""{anthropic.HUMAN_PROMPT} {system_prompt} {anthropic.HUMAN_PROMPT} {chat_prompt} {anthropic.AI_PROMPT}"""
if json_mode:
# anthopic does not officially support JSON mode, but we can bias the output towards a JSON-like format
prompt += " {"
response: anthropic.types.Completion = self.client.completions.create(
max_tokens_to_sample=max_tokens,
prompt=prompt,
model=self.model,
top_p=top_p,
temperature=temperature,
frequency_penalty=frequency_penalty,
)
content = response.completion
if json_mode:
#we add back the opening curly brace wich is not included in the response since it is in the prompt
content = "{" + content
#we remove everything after the last closing curly brace
content = content[:content.rfind("}") + 1]
return orjson.loads(content)
else:
return content
@classmethod
def get_options(cls) -> list:
return [
gr.Dropdown(
label="Model",
choices=ANTHROPIC_POSSIBLE_MODELS,
max_choices=1,
value=ANTHROPIC_POSSIBLE_MODELS[0]
)
]

View File

@@ -6,5 +6,5 @@ import openai
class BaseLLMEngine(BaseEngine):
@abstractmethod
def generate(self, system_prompt: str, chat_prompt: str, max_tokens: int, temperature: float, top_p: float, frequency_penalty: float, presence_penalty: float) -> str:
def generate(self, system_prompt: str, chat_prompt: str, max_tokens: int, temperature: float, top_p: float, frequency_penalty: float, presence_penalty: float) -> str | dict:
pass

View File

@@ -1,49 +1,43 @@
import openai
import anthropic
import gradio as gr
import orjson
from abc import ABC, abstractmethod
from .BaseLLMEngine import BaseLLMEngine
OPENAI_POSSIBLE_MODELS = [
"gpt-3.5-turbo-0125",
"gpt-4-turbo-preview",
# Assuming these are the models supported by Anthropics that you wish to include
ANTHROPIC_POSSIBLE_MODELS = [
"claude-2.1",
# Add more models as needed
]
class OpenaiLLMEngine(BaseLLMEngine):
class AnthropicsLLMEngine(BaseLLMEngine):
num_options = 1
name = "OpenAI"
description = "OpenAI language model engine."
name = "Anthropics"
description = "Anthropics language model engine."
def __init__(self, options: list) -> None:
self.model = options[0]
self.client = anthropic.Anthropic(api_key="YourAnthropicAPIKeyHere") # Ensure API key is securely managed
super().__init__()
def generate(self, system_prompt: str, chat_prompt: str, max_tokens: int = 512, temperature: float = 1.0, json_mode: bool= False, top_p: float = 1, frequency_penalty: float = 0, presence_penalty: float = 0) -> str:
response = openai.chat.completions.create(
model=self.model,
def generate(self, system_prompt: str, chat_prompt: str, max_tokens: int = 1024, temperature: float = 1.0, json_mode: bool = False, top_p: float = 1, frequency_penalty: float = 0, presence_penalty: float = 0) -> str | dict:
# Note: Adjust the parameters as per Anthropics API capabilities
message = self.client.messages.create(
max_tokens=max_tokens,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": chat_prompt},
],
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
frequency_penalty=frequency_penalty,
presence_penalty=presence_penalty,
response_format={ "type": "json_object" } if json_mode else openai._types.NOT_GIVEN
model=self.model,
)
return response.choices[0].message.content if not json_mode else orjson.loads(response.choices[0].message.content)
return message.content
@classmethod
def get_options(cls) -> list:
return [
gr.Dropdown(
label="Model",
choices=OPENAI_POSSIBLE_MODELS,
choices=ANTHROPIC_POSSIBLE_MODELS,
max_choices=1,
value=OPENAI_POSSIBLE_MODELS[0]
value=ANTHROPIC_POSSIBLE_MODELS[0]
)
]

View File

@@ -1,8 +1,16 @@
import moviepy.editor as mp
import whisper_timestamped as wt
from typing import TypedDict
from torch.cuda import is_available
from abc import ABC, abstractmethod
# Assuming BaseEngine is defined elsewhere in your project
from ..BaseEngine import BaseEngine
class Word(TypedDict):
start: str
end: str
text: str
class BaseTTSEngine(BaseEngine):
@@ -10,7 +18,53 @@ class BaseTTSEngine(BaseEngine):
def synthesize(self, text: str, path: str) -> str:
pass
def time_with_whisper(self, path: str) -> list[Word]:
"""
Transcribes the audio file at the given path using a pre-trained model and returns a list of words.
Args:
path (str): The path to the audio file.
Returns:
list[Word]: A list of Word objects representing the transcribed words.
Example:
```json
[
{
"start": "0.00",
"end": "0.50",
"text": "Hello"
},
{
"start": "0.50",
"end": "1.00",
"text": "world"
}
]
```
"""
device = "cuda" if is_available() else "cpu"
audio = wt.load_audio(path)
model = wt.load_model("tiny", device=device)
result = wt.transcribe(model=model, audio=audio)
results = [word for chunk in result for word in chunk["words"]]
for result in results:
# Not needed for the current use case
del result["confidence"]
return results
def force_duration(self, duration: float, path: str):
"""
Forces the audio clip at the given path to have the specified duration.
Args:
duration (float): The desired duration in seconds.
path (str): The path to the audio clip file.
Returns:
None
"""
audio_clip = mp.AudioFileClip(path)
if audio_clip.duration > duration:

View File

@@ -1,9 +1,9 @@
import gradio as gr
# import TTS
import TTS
import os
# import torch
import torch
from .BaseTTSEngine import BaseTTSEngine
@@ -102,15 +102,25 @@ class CoquiTTSEngine(BaseTTSEngine):
os.environ["COQUI_TOS_AGREED"] = "1"
# self.tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
# device = "cuda" if torch.cuda.is_available() else "cpu"
# self.tts.to(device)
self.tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
device = "cuda" if torch.cuda.is_available() else "cpu"
self.tts.to(device)
def synthesize(self, text: str, path: str) -> str:
# self.tts.tts_to_file(text=text, file_path=path, lang=self.language, speaker=self.voice)
def synthesize(self, text: str, path: str):
"""
Synthesizes the given text into speech and saves it to the specified file path.
Args:
text (str): The text to synthesize into speech.
path (str): The file path to save the synthesized speech.
Returns:
float: The time taken to synthesize the speech with whispering effect.
"""
self.tts.tts_to_file(text=text, file_path=path, lang=self.language, speaker=self.voice)
if self.to_force_duration:
self.force_duration(float(self.duration), path)
return path
return self.time_with_whisper(path)
@classmethod
def get_options(cls) -> list:
@@ -129,12 +139,11 @@ class CoquiTTSEngine(BaseTTSEngine):
),
]
duration_checkbox = gr.Checkbox(value=False)
duration = gr.Number(label="Duration", value=57, step=1, minimum=10, visible=False)
duration_checkbox = gr.Checkbox(label="Force duration", info="Force the duration of the generated audio to be at most the specified value", value=False)
duration = gr.Number(label="Duration [s]", value=57, step=1, minimum=10, visible=False)
duration_switch = lambda x: gr.update(visible=x)
duration_checkbox.change(duration_switch, inputs=[duration_checkbox], outputs=[duration])
duration_checkbox_group = gr.CheckboxGroup([duration_checkbox], label="Force duration")
options.append(duration_checkbox_group)
options.append(duration_checkbox)
options.append(duration)
return options

View File

@@ -1,5 +1,5 @@
from . import TTSEngine
from .BaseEngine import BaseEngine
from . import TTSEngine
from . import ScriptEngine
from . import LLMEngine