mirror of
https://github.com/Paillat-dev/viralfactory.git
synced 2026-01-02 09:16:19 +00:00
🚀 Maaany things
This commit is contained in:
124
requirements.txt
124
requirements.txt
@@ -1,66 +1,174 @@
|
||||
absl-py==2.1.0
|
||||
aiofiles==23.2.1
|
||||
aiohttp==3.9.3
|
||||
aiosignal==1.3.1
|
||||
altair==5.2.0
|
||||
annotated-types==0.6.0
|
||||
anyascii==0.3.2
|
||||
anyio==4.2.0
|
||||
async-timeout==4.0.3
|
||||
attrs==23.2.0
|
||||
audioread==3.0.1
|
||||
Babel==2.14.0
|
||||
bangla==0.0.2
|
||||
blinker==1.7.0
|
||||
blis==0.7.11
|
||||
bnnumerizer==0.0.2
|
||||
bnunicodenormalizer==0.1.6
|
||||
catalogue==2.0.10
|
||||
certifi==2024.2.2
|
||||
cffi==1.16.0
|
||||
charset-normalizer==3.3.2
|
||||
click==8.1.7
|
||||
cloudpathlib==0.16.0
|
||||
colorama==0.4.6
|
||||
confection==0.1.4
|
||||
contourpy==1.2.0
|
||||
coqpit==0.0.17
|
||||
cycler==0.12.1
|
||||
cymem==2.0.8
|
||||
Cython==3.0.8
|
||||
dateparser==1.1.8
|
||||
decorator==4.4.2
|
||||
distro==1.9.0
|
||||
docopt==0.6.2
|
||||
dtw-python==1.3.1
|
||||
einops==0.7.0
|
||||
encodec==0.1.1
|
||||
exceptiongroup==1.2.0
|
||||
fastapi==0.109.2
|
||||
ffmpy==0.3.2
|
||||
filelock==3.13.1
|
||||
Flask==3.0.2
|
||||
fonttools==4.48.1
|
||||
frozenlist==1.4.1
|
||||
fsspec==2024.2.0
|
||||
gradio==4.18.0
|
||||
g2pkk==0.1.2
|
||||
gradio==4.19.0
|
||||
gradio_client==0.10.0
|
||||
grpcio==1.60.1
|
||||
gruut==2.2.3
|
||||
gruut-ipa==0.13.0
|
||||
gruut-lang-de==2.0.0
|
||||
gruut-lang-en==2.0.0
|
||||
gruut-lang-es==2.0.0
|
||||
gruut-lang-fr==2.0.2
|
||||
h11==0.14.0
|
||||
httpcore==1.0.2
|
||||
hangul-romanize==0.1.0
|
||||
httpcore==1.0.3
|
||||
httpx==0.26.0
|
||||
huggingface-hub==0.20.3
|
||||
idna==3.6
|
||||
imageio==2.34.0
|
||||
imageio-ffmpeg==0.4.9
|
||||
importlib-resources==6.1.1
|
||||
inflect==7.0.0
|
||||
itsdangerous==2.1.2
|
||||
jamo==0.4.1
|
||||
jieba==0.42.1
|
||||
Jinja2==3.1.3
|
||||
joblib==1.3.2
|
||||
jsonlines==1.2.0
|
||||
jsonschema==4.21.1
|
||||
jsonschema-specifications==2023.12.1
|
||||
kiwisolver==1.4.5
|
||||
langcodes==3.3.0
|
||||
lazy_loader==0.3
|
||||
librosa==0.10.0
|
||||
llvmlite==0.42.0
|
||||
Markdown==3.5.2
|
||||
markdown-it-py==3.0.0
|
||||
MarkupSafe==2.1.5
|
||||
matplotlib==3.8.2
|
||||
matplotlib==3.8.3
|
||||
mdurl==0.1.2
|
||||
numpy==1.26.4
|
||||
orjson==3.9.13
|
||||
more-itertools==10.2.0
|
||||
moviepy==1.0.3
|
||||
mpmath==1.3.0
|
||||
msgpack==1.0.7
|
||||
multidict==6.0.5
|
||||
murmurhash==1.0.10
|
||||
networkx==2.8.8
|
||||
nltk==3.8.1
|
||||
num2words==0.5.13
|
||||
numba==0.59.0
|
||||
numpy==1.22.0
|
||||
openai==1.12.0
|
||||
openai-whisper==20231117
|
||||
orjson==3.9.14
|
||||
packaging==23.2
|
||||
pandas==2.2.0
|
||||
pandas==1.5.3
|
||||
pillow==10.2.0
|
||||
platformdirs==4.2.0
|
||||
pooch==1.8.0
|
||||
preshed==3.0.9
|
||||
proglog==0.1.10
|
||||
protobuf==4.25.2
|
||||
psutil==5.9.8
|
||||
pycparser==2.21
|
||||
pydantic==2.6.1
|
||||
pydantic_core==2.16.2
|
||||
pydub==0.25.1
|
||||
Pygments==2.17.2
|
||||
pynndescent==0.5.11
|
||||
pyparsing==3.1.1
|
||||
pypinyin==0.50.0
|
||||
pysbd==0.3.4
|
||||
python-crfsuite==0.9.10
|
||||
python-dateutil==2.8.2
|
||||
python-multipart==0.0.9
|
||||
pytz==2024.1
|
||||
PyYAML==6.0.1
|
||||
referencing==0.33.0
|
||||
regex==2023.12.25
|
||||
requests==2.31.0
|
||||
rich==13.7.0
|
||||
rpds-py==0.17.1
|
||||
rpds-py==0.18.0
|
||||
ruff==0.2.1
|
||||
safetensors==0.4.2
|
||||
scikit-learn==1.4.0
|
||||
scipy==1.11.4
|
||||
semantic-version==2.10.0
|
||||
shellingham==1.5.4
|
||||
six==1.16.0
|
||||
smart-open==6.4.0
|
||||
sniffio==1.3.0
|
||||
soundfile==0.12.1
|
||||
soxr==0.3.7
|
||||
spacy==3.7.2
|
||||
spacy-legacy==3.0.12
|
||||
spacy-loggers==1.0.5
|
||||
srsly==2.4.8
|
||||
starlette==0.36.3
|
||||
SudachiDict-core==20240109
|
||||
SudachiPy==0.6.8
|
||||
sympy==1.12
|
||||
tensorboard==2.16.1
|
||||
tensorboard-data-server==0.7.2
|
||||
tf-keras==2.15.0
|
||||
thinc==8.2.3
|
||||
threadpoolctl==3.3.0
|
||||
tiktoken==0.6.0
|
||||
tokenizers==0.15.2
|
||||
tomlkit==0.12.0
|
||||
toolz==0.12.1
|
||||
torch==2.2.0+cu118
|
||||
torchaudio==2.2.0+cu118
|
||||
torchvision==0.17.0+cu118
|
||||
tqdm==4.66.2
|
||||
trainer==0.0.36
|
||||
transformers==4.37.2
|
||||
TTS==0.22.0
|
||||
typer==0.9.0
|
||||
typing_extensions==4.9.0
|
||||
tzdata==2024.1
|
||||
tzlocal==5.2
|
||||
umap-learn==0.5.5
|
||||
Unidecode==1.3.8
|
||||
urllib3==2.2.0
|
||||
uvicorn==0.27.1
|
||||
wasabi==1.1.2
|
||||
weasel==0.3.4
|
||||
websockets==11.0.3
|
||||
TTS
|
||||
Werkzeug==3.0.1
|
||||
whisper-timestamped==1.14.4
|
||||
yarl==1.9.4
|
||||
|
||||
57
src/engines/LLMEngine/AnthropicLLMEngine.py
Normal file
57
src/engines/LLMEngine/AnthropicLLMEngine.py
Normal file
@@ -0,0 +1,57 @@
|
||||
import anthropic
|
||||
import gradio as gr
|
||||
import orjson
|
||||
|
||||
from .BaseLLMEngine import BaseLLMEngine
|
||||
|
||||
# Assuming these are the models supported by Anthropics that you wish to include
|
||||
ANTHROPIC_POSSIBLE_MODELS = [
|
||||
"claude-2.1",
|
||||
# Add more models as needed
|
||||
]
|
||||
|
||||
class AnthropicsLLMEngine(BaseLLMEngine):
|
||||
num_options = 1
|
||||
name = "Anthropics"
|
||||
description = "Anthropics language model engine."
|
||||
|
||||
def __init__(self, options: list) -> None:
|
||||
self.model = options[0]
|
||||
self.client = anthropic.Anthropic(api_key="YourAnthropicAPIKeyHere") # Ensure API key is securely managed
|
||||
super().__init__()
|
||||
|
||||
def generate(self, system_prompt: str, chat_prompt: str, max_tokens: int = 1024, temperature: float = 1.0, json_mode: bool = False, top_p: float = 1, frequency_penalty: float = 0, presence_penalty: float = 0) -> str | dict:
|
||||
# Note: Adjust the parameters as per Anthropics API capabilities
|
||||
prompt = f"""{anthropic.HUMAN_PROMPT} {system_prompt} {anthropic.HUMAN_PROMPT} {chat_prompt} {anthropic.AI_PROMPT}"""
|
||||
if json_mode:
|
||||
# anthopic does not officially support JSON mode, but we can bias the output towards a JSON-like format
|
||||
prompt += " {"
|
||||
response: anthropic.types.Completion = self.client.completions.create(
|
||||
max_tokens_to_sample=max_tokens,
|
||||
prompt=prompt,
|
||||
model=self.model,
|
||||
top_p=top_p,
|
||||
temperature=temperature,
|
||||
frequency_penalty=frequency_penalty,
|
||||
)
|
||||
|
||||
content = response.completion
|
||||
if json_mode:
|
||||
#we add back the opening curly brace wich is not included in the response since it is in the prompt
|
||||
content = "{" + content
|
||||
#we remove everything after the last closing curly brace
|
||||
content = content[:content.rfind("}") + 1]
|
||||
return orjson.loads(content)
|
||||
else:
|
||||
return content
|
||||
|
||||
@classmethod
|
||||
def get_options(cls) -> list:
|
||||
return [
|
||||
gr.Dropdown(
|
||||
label="Model",
|
||||
choices=ANTHROPIC_POSSIBLE_MODELS,
|
||||
max_choices=1,
|
||||
value=ANTHROPIC_POSSIBLE_MODELS[0]
|
||||
)
|
||||
]
|
||||
@@ -6,5 +6,5 @@ import openai
|
||||
class BaseLLMEngine(BaseEngine):
|
||||
|
||||
@abstractmethod
|
||||
def generate(self, system_prompt: str, chat_prompt: str, max_tokens: int, temperature: float, top_p: float, frequency_penalty: float, presence_penalty: float) -> str:
|
||||
def generate(self, system_prompt: str, chat_prompt: str, max_tokens: int, temperature: float, top_p: float, frequency_penalty: float, presence_penalty: float) -> str | dict:
|
||||
pass
|
||||
@@ -1,49 +1,43 @@
|
||||
import openai
|
||||
import anthropic
|
||||
import gradio as gr
|
||||
import orjson
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from .BaseLLMEngine import BaseLLMEngine
|
||||
|
||||
OPENAI_POSSIBLE_MODELS = [
|
||||
"gpt-3.5-turbo-0125",
|
||||
"gpt-4-turbo-preview",
|
||||
# Assuming these are the models supported by Anthropics that you wish to include
|
||||
ANTHROPIC_POSSIBLE_MODELS = [
|
||||
"claude-2.1",
|
||||
# Add more models as needed
|
||||
]
|
||||
|
||||
class OpenaiLLMEngine(BaseLLMEngine):
|
||||
class AnthropicsLLMEngine(BaseLLMEngine):
|
||||
num_options = 1
|
||||
name = "OpenAI"
|
||||
description = "OpenAI language model engine."
|
||||
name = "Anthropics"
|
||||
description = "Anthropics language model engine."
|
||||
|
||||
def __init__(self, options: list) -> None:
|
||||
self.model = options[0]
|
||||
self.client = anthropic.Anthropic(api_key="YourAnthropicAPIKeyHere") # Ensure API key is securely managed
|
||||
super().__init__()
|
||||
|
||||
def generate(self, system_prompt: str, chat_prompt: str, max_tokens: int = 512, temperature: float = 1.0, json_mode: bool= False, top_p: float = 1, frequency_penalty: float = 0, presence_penalty: float = 0) -> str:
|
||||
response = openai.chat.completions.create(
|
||||
model=self.model,
|
||||
def generate(self, system_prompt: str, chat_prompt: str, max_tokens: int = 1024, temperature: float = 1.0, json_mode: bool = False, top_p: float = 1, frequency_penalty: float = 0, presence_penalty: float = 0) -> str | dict:
|
||||
# Note: Adjust the parameters as per Anthropics API capabilities
|
||||
message = self.client.messages.create(
|
||||
max_tokens=max_tokens,
|
||||
messages=[
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": chat_prompt},
|
||||
],
|
||||
max_tokens=max_tokens,
|
||||
temperature=temperature,
|
||||
top_p=top_p,
|
||||
frequency_penalty=frequency_penalty,
|
||||
presence_penalty=presence_penalty,
|
||||
response_format={ "type": "json_object" } if json_mode else openai._types.NOT_GIVEN
|
||||
model=self.model,
|
||||
)
|
||||
return response.choices[0].message.content if not json_mode else orjson.loads(response.choices[0].message.content)
|
||||
|
||||
return message.content
|
||||
|
||||
@classmethod
|
||||
def get_options(cls) -> list:
|
||||
return [
|
||||
gr.Dropdown(
|
||||
label="Model",
|
||||
choices=OPENAI_POSSIBLE_MODELS,
|
||||
choices=ANTHROPIC_POSSIBLE_MODELS,
|
||||
max_choices=1,
|
||||
value=OPENAI_POSSIBLE_MODELS[0]
|
||||
value=ANTHROPIC_POSSIBLE_MODELS[0]
|
||||
)
|
||||
]
|
||||
@@ -1,8 +1,16 @@
|
||||
import moviepy.editor as mp
|
||||
import whisper_timestamped as wt
|
||||
|
||||
from typing import TypedDict
|
||||
from torch.cuda import is_available
|
||||
from abc import ABC, abstractmethod
|
||||
# Assuming BaseEngine is defined elsewhere in your project
|
||||
|
||||
from ..BaseEngine import BaseEngine
|
||||
|
||||
class Word(TypedDict):
|
||||
start: str
|
||||
end: str
|
||||
text: str
|
||||
|
||||
class BaseTTSEngine(BaseEngine):
|
||||
|
||||
@@ -10,7 +18,53 @@ class BaseTTSEngine(BaseEngine):
|
||||
def synthesize(self, text: str, path: str) -> str:
|
||||
pass
|
||||
|
||||
def time_with_whisper(self, path: str) -> list[Word]:
|
||||
"""
|
||||
Transcribes the audio file at the given path using a pre-trained model and returns a list of words.
|
||||
|
||||
Args:
|
||||
path (str): The path to the audio file.
|
||||
|
||||
Returns:
|
||||
list[Word]: A list of Word objects representing the transcribed words.
|
||||
Example:
|
||||
```json
|
||||
[
|
||||
{
|
||||
"start": "0.00",
|
||||
"end": "0.50",
|
||||
"text": "Hello"
|
||||
},
|
||||
{
|
||||
"start": "0.50",
|
||||
"end": "1.00",
|
||||
"text": "world"
|
||||
}
|
||||
]
|
||||
```
|
||||
"""
|
||||
device = "cuda" if is_available() else "cpu"
|
||||
audio = wt.load_audio(path)
|
||||
model = wt.load_model("tiny", device=device)
|
||||
|
||||
result = wt.transcribe(model=model, audio=audio)
|
||||
results = [word for chunk in result for word in chunk["words"]]
|
||||
for result in results:
|
||||
# Not needed for the current use case
|
||||
del result["confidence"]
|
||||
return results
|
||||
|
||||
def force_duration(self, duration: float, path: str):
|
||||
"""
|
||||
Forces the audio clip at the given path to have the specified duration.
|
||||
|
||||
Args:
|
||||
duration (float): The desired duration in seconds.
|
||||
path (str): The path to the audio clip file.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
audio_clip = mp.AudioFileClip(path)
|
||||
|
||||
if audio_clip.duration > duration:
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import gradio as gr
|
||||
|
||||
# import TTS
|
||||
import TTS
|
||||
import os
|
||||
|
||||
# import torch
|
||||
import torch
|
||||
|
||||
from .BaseTTSEngine import BaseTTSEngine
|
||||
|
||||
@@ -102,15 +102,25 @@ class CoquiTTSEngine(BaseTTSEngine):
|
||||
|
||||
os.environ["COQUI_TOS_AGREED"] = "1"
|
||||
|
||||
# self.tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
|
||||
# device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
# self.tts.to(device)
|
||||
self.tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
self.tts.to(device)
|
||||
|
||||
def synthesize(self, text: str, path: str) -> str:
|
||||
# self.tts.tts_to_file(text=text, file_path=path, lang=self.language, speaker=self.voice)
|
||||
if self.to_force_duration:
|
||||
self.force_duration(float(self.duration), path)
|
||||
return path
|
||||
def synthesize(self, text: str, path: str):
|
||||
"""
|
||||
Synthesizes the given text into speech and saves it to the specified file path.
|
||||
|
||||
Args:
|
||||
text (str): The text to synthesize into speech.
|
||||
path (str): The file path to save the synthesized speech.
|
||||
|
||||
Returns:
|
||||
float: The time taken to synthesize the speech with whispering effect.
|
||||
"""
|
||||
self.tts.tts_to_file(text=text, file_path=path, lang=self.language, speaker=self.voice)
|
||||
if self.to_force_duration:
|
||||
self.force_duration(float(self.duration), path)
|
||||
return self.time_with_whisper(path)
|
||||
|
||||
@classmethod
|
||||
def get_options(cls) -> list:
|
||||
@@ -129,12 +139,11 @@ class CoquiTTSEngine(BaseTTSEngine):
|
||||
),
|
||||
]
|
||||
|
||||
duration_checkbox = gr.Checkbox(value=False)
|
||||
duration = gr.Number(label="Duration", value=57, step=1, minimum=10, visible=False)
|
||||
duration_checkbox = gr.Checkbox(label="Force duration", info="Force the duration of the generated audio to be at most the specified value", value=False)
|
||||
duration = gr.Number(label="Duration [s]", value=57, step=1, minimum=10, visible=False)
|
||||
duration_switch = lambda x: gr.update(visible=x)
|
||||
duration_checkbox.change(duration_switch, inputs=[duration_checkbox], outputs=[duration])
|
||||
duration_checkbox_group = gr.CheckboxGroup([duration_checkbox], label="Force duration")
|
||||
|
||||
options.append(duration_checkbox_group)
|
||||
options.append(duration_checkbox)
|
||||
options.append(duration)
|
||||
return options
|
||||
@@ -1,5 +1,5 @@
|
||||
from . import TTSEngine
|
||||
from .BaseEngine import BaseEngine
|
||||
from . import TTSEngine
|
||||
from . import ScriptEngine
|
||||
from . import LLMEngine
|
||||
|
||||
|
||||
Reference in New Issue
Block a user