2023-05-15 10:11:04 +02:00
2023-06-25 21:59:52 +02:00
import os
2023-05-15 10:11:04 +02:00
fakenames = {
" Alexander " : " p230 " ,
" Benjamin " : " p240 " ,
" Amelia " : " p270 " ,
2023-05-25 21:47:11 +02:00
" Katherine " : " p273 " ,
" Johanne " : " p347 " ,
2023-05-15 10:11:04 +02:00
}
2023-05-25 21:47:11 +02:00
voices = [ " Alexander " , " Benjamin " , " Amelia " , " Katherine " , " Johanne " ]
2023-05-15 10:11:04 +02:00
2023-06-25 21:59:52 +02:00
class VoiceGenerator :
def __init__ ( self , mode = " Bark " , speaker = " " ) :
self . mode = mode
self . speaker = speaker
if mode == " Bark " :
os . environ [ " XDG_CACHE_HOME " ] = os . path . join ( os . getcwd ( ) , " bark_cache " )
from bark import preload_models , generation
2023-05-25 21:47:11 +02:00
2023-06-25 21:59:52 +02:00
preload_models ( )
self . speaker = " v2/en_speaker_6 "
else :
from TTS . api import TTS
model = " tts_models/en/vctk/vits "
self . speaker = fakenames [ speaker ] if speaker in fakenames else speaker
print ( f " Generating voice for { model } with speaker { speaker } " )
try :
self . tts = TTS ( model , gpu = True )
except :
self . tts = TTS ( model , gpu = False )
if self . speaker == " " : self . speaker = " p230 "
else :
self . speaker = fakenames [ self . speaker ] if self . speaker in fakenames else fakenames [ " Alexander " ]
def generate_voice ( self , path , text ) :
if self . mode == " Bark " :
from bark import SAMPLE_RATE , generate_audio , preload_models
from scipy . io . wavfile import read as wavread , write as wavwrite
import noisereduce as nr
import soundfile
import numpy as np
import nltk
sentences = nltk . sent_tokenize ( text )
pieces = [ ]
silence = np . zeros ( int ( 0.25 * SAMPLE_RATE ) ) # quarter second of silence
for sentence in sentences :
audio_array = generate_audio ( sentence , history_prompt = self . speaker )
pieces + = [ audio_array , silence . copy ( ) ]
audio_array = np . concatenate ( pieces )
soundfile . write ( path , audio_array , SAMPLE_RATE , format = " WAV " , subtype = " PCM_16 " )
rate , data = wavread ( path )
reduced_noise = nr . reduce_noise ( y = data , sr = rate )
os . remove ( path )
wavwrite ( path , rate , reduced_noise )
else :
self . tts . tts_to_file ( text = text , file_path = path , speaker = self . speaker , speed = 1 , emotion = " Happy " )
2023-05-25 21:47:11 +02:00
if __name__ == " __main__ " :
2023-06-25 21:59:52 +02:00
generator = VoiceGenerator ( )
generator . generate_voice ( " test/test_r.wav " , " Hello there! " )
generator . generate_voice ( " test/teste_r.wav " , " This is a test. I like the words python, django and flask. Betty bought a bit of butter but the butter was bitter. So she bought some better butter to make the bitter butter better. " )