E

2026-03-03 02:14:54 +00:00 · 2024-02-13 14:29:49 +01:00
parent e8121e3e8d
commit 79d81b96b1
3 changed files with 112 additions and 14 deletions
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,65 @@
 aiofiles==23.2.1
 altair==5.2.0
 annotated-types==0.6.0
 anyio==4.2.0
 attrs==23.2.0
 certifi==2024.2.2
 charset-normalizer==3.3.2
 click==8.1.7
 colorama==0.4.6
 contourpy==1.2.0
 cycler==0.12.1
 fastapi==0.109.2
 ffmpy==0.3.2
 filelock==3.13.1
 fonttools==4.48.1
 fsspec==2024.2.0
 gradio==4.18.0
 gradio_client==0.10.0
 h11==0.14.0
 httpcore==1.0.2
 httpx==0.26.0
 huggingface-hub==0.20.3
 idna==3.6
 importlib-resources==6.1.1
 Jinja2==3.1.3
 jsonschema==4.21.1
 jsonschema-specifications==2023.12.1
 kiwisolver==1.4.5
 markdown-it-py==3.0.0
 MarkupSafe==2.1.5
 matplotlib==3.8.2
 mdurl==0.1.2
 numpy==1.26.4
 orjson==3.9.13
 packaging==23.2
 pandas==2.2.0
 pillow==10.2.0
 pydantic==2.6.1
 pydantic_core==2.16.2
 pydub==0.25.1
 Pygments==2.17.2
 pyparsing==3.1.1
 python-dateutil==2.8.2
 python-multipart==0.0.9
 pytz==2024.1
 PyYAML==6.0.1
 referencing==0.33.0
 requests==2.31.0
 rich==13.7.0
 rpds-py==0.17.1
 ruff==0.2.1
 semantic-version==2.10.0
 shellingham==1.5.4
 six==1.16.0
 sniffio==1.3.0
 starlette==0.36.3
 tomlkit==0.12.0
 toolz==0.12.1
 tqdm==4.66.2
 typer==0.9.0
 typing_extensions==4.9.0
 tzdata==2024.1
 urllib3==2.2.0
 uvicorn==0.27.1
 websockets==11.0.3
--- a/src/engines/TTSEngine/CoquiTTSEngine.py
+++ b/src/engines/TTSEngine/CoquiTTSEngine.py
@@ -89,23 +89,31 @@ class CoquiTTSEngine(BaseTTSEngine):
        "hi",  # Hindi
    ]
    options = [
-        gr.Dropdown(
+        {
-            voices, value=voices[0], label="voice", max_choices=1
+            "type": "dropdown",
-        ),
+            "label": "Voice",
-        gr.Dropwdown(
+            "choices": voices,
-            languages, value=languages[0], label="language", max_choices=1
+            "max": 1,
-        ),
+        },
        {
            "type": "dropdown",
            "label": "Language",
            "choices": languages,
            "max": 1,
        },
    ]
-    def __init__(self):
+
    def __init__(self, options: list):
        super().__init__()
        self.voice = options[0][0]
        self.language = options[1][0]
        os.environ["COQUI_TOS_AGREED"] = "1"
        self.tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
        device = "cuda" if torch.cuda.is_available() else "cpu"
        self.tts.to(device)
    def synthesize(self, text: str, path: str) -> str:
-        voice = self.options[0].value
+        self.tts.tts_to_file(text=text, file_path=path, lang=self.language, speaker=self.voice)
        language = self.options[1].value
        self.tts.tts_to_file(text=text, file_path=path, lang=language, speaker=voice)
        return path
--- a/src/engines/TTSEngine/ElevenLabsTTSEngine.py
+++ b/src/engines/TTSEngine/ElevenLabsTTSEngine.py
@@ -1,12 +1,37 @@
-from .BaseTTSEngine import AbstractTTSEngine
+from .BaseTTSEngine import BaseTTSEngine
 import gradio as gr
-class ElevenLabsTTSEngine(AbstractTTSEngine):
+class ElevenLabsTTSEngine(BaseTTSEngine):
-    options = [gr.Radio(["Neutral", "Happy", "Sad"], label="emotion")]
+    options = [
        {
            "type": "dropdown",
            "label": "Voice",
            "choices": [
                "Zofija Kendrick",
                "Narelle Moon",
                "Barbora MacLean",
                "Alexandra Hisakawa",
                "Alma María",
                "Rosemary Okafor",
                "Ige Behringer",
                "Filip Traverse",
                "Damjan Chapman",
                "Wulf Carlevaro",
                "Aaron Dreschner",
                "Kumar Dahl",
                "Eugenio Mataracı",
                "Ferran Simen",
                "Xavier Hayasaka",
                "Luis Moray",
                "Marcos Rudaski",
            ],
        }
    ]
    name = "ElevenLabs"
    description = "ElevenLabs TTS engine."
-    def __init__(self):
+    def __init__(self, options: list[list | tuple | str | int | float | bool | None]):
        self.voice = options[0][0]
        super().__init__()
    def synthesize(self, text: str, path: str) -> str: