This commit is contained in:
2024-02-13 14:29:49 +01:00
parent e8121e3e8d
commit 79d81b96b1
3 changed files with 112 additions and 14 deletions

65
requirements.txt Normal file
View File

@@ -0,0 +1,65 @@
aiofiles==23.2.1
altair==5.2.0
annotated-types==0.6.0
anyio==4.2.0
attrs==23.2.0
certifi==2024.2.2
charset-normalizer==3.3.2
click==8.1.7
colorama==0.4.6
contourpy==1.2.0
cycler==0.12.1
fastapi==0.109.2
ffmpy==0.3.2
filelock==3.13.1
fonttools==4.48.1
fsspec==2024.2.0
gradio==4.18.0
gradio_client==0.10.0
h11==0.14.0
httpcore==1.0.2
httpx==0.26.0
huggingface-hub==0.20.3
idna==3.6
importlib-resources==6.1.1
Jinja2==3.1.3
jsonschema==4.21.1
jsonschema-specifications==2023.12.1
kiwisolver==1.4.5
markdown-it-py==3.0.0
MarkupSafe==2.1.5
matplotlib==3.8.2
mdurl==0.1.2
numpy==1.26.4
orjson==3.9.13
packaging==23.2
pandas==2.2.0
pillow==10.2.0
pydantic==2.6.1
pydantic_core==2.16.2
pydub==0.25.1
Pygments==2.17.2
pyparsing==3.1.1
python-dateutil==2.8.2
python-multipart==0.0.9
pytz==2024.1
PyYAML==6.0.1
referencing==0.33.0
requests==2.31.0
rich==13.7.0
rpds-py==0.17.1
ruff==0.2.1
semantic-version==2.10.0
shellingham==1.5.4
six==1.16.0
sniffio==1.3.0
starlette==0.36.3
tomlkit==0.12.0
toolz==0.12.1
tqdm==4.66.2
typer==0.9.0
typing_extensions==4.9.0
tzdata==2024.1
urllib3==2.2.0
uvicorn==0.27.1
websockets==11.0.3

View File

@@ -89,23 +89,31 @@ class CoquiTTSEngine(BaseTTSEngine):
"hi", # Hindi "hi", # Hindi
] ]
options = [ options = [
gr.Dropdown( {
voices, value=voices[0], label="voice", max_choices=1 "type": "dropdown",
), "label": "Voice",
gr.Dropwdown( "choices": voices,
languages, value=languages[0], label="language", max_choices=1 "max": 1,
), },
{
"type": "dropdown",
"label": "Language",
"choices": languages,
"max": 1,
},
] ]
def __init__(self):
def __init__(self, options: list):
super().__init__() super().__init__()
self.voice = options[0][0]
self.language = options[1][0]
os.environ["COQUI_TOS_AGREED"] = "1" os.environ["COQUI_TOS_AGREED"] = "1"
self.tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2") self.tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
device = "cuda" if torch.cuda.is_available() else "cpu" device = "cuda" if torch.cuda.is_available() else "cpu"
self.tts.to(device) self.tts.to(device)
def synthesize(self, text: str, path: str) -> str: def synthesize(self, text: str, path: str) -> str:
voice = self.options[0].value self.tts.tts_to_file(text=text, file_path=path, lang=self.language, speaker=self.voice)
language = self.options[1].value
self.tts.tts_to_file(text=text, file_path=path, lang=language, speaker=voice)
return path return path

View File

@@ -1,12 +1,37 @@
from .BaseTTSEngine import AbstractTTSEngine from .BaseTTSEngine import BaseTTSEngine
import gradio as gr import gradio as gr
class ElevenLabsTTSEngine(AbstractTTSEngine): class ElevenLabsTTSEngine(BaseTTSEngine):
options = [gr.Radio(["Neutral", "Happy", "Sad"], label="emotion")] options = [
{
"type": "dropdown",
"label": "Voice",
"choices": [
"Zofija Kendrick",
"Narelle Moon",
"Barbora MacLean",
"Alexandra Hisakawa",
"Alma María",
"Rosemary Okafor",
"Ige Behringer",
"Filip Traverse",
"Damjan Chapman",
"Wulf Carlevaro",
"Aaron Dreschner",
"Kumar Dahl",
"Eugenio Mataracı",
"Ferran Simen",
"Xavier Hayasaka",
"Luis Moray",
"Marcos Rudaski",
],
}
]
name = "ElevenLabs" name = "ElevenLabs"
description = "ElevenLabs TTS engine." description = "ElevenLabs TTS engine."
def __init__(self): def __init__(self, options: list[list | tuple | str | int | float | bool | None]):
self.voice = options[0][0]
super().__init__() super().__init__()
def synthesize(self, text: str, path: str) -> str: def synthesize(self, text: str, path: str) -> str: