mod error; mod g2p; mod stream; mod synthesizer; mod tokenizer; mod transcription; mod voice; use { bincode::{config::standard, decode_from_slice}, ort::{execution_providers::CUDAExecutionProvider, session::Session}, std::{collections::HashMap, path::Path, sync::Arc, time::Duration}, tokio::{fs::read, sync::Mutex}, }; pub use {error::*, g2p::*, stream::*, tokenizer::*, transcription::*, voice::*}; pub struct KokoroTts { model: Arc>, voices: Arc>>>>, } impl KokoroTts { pub async fn new>(model_path: P, voices_path: P) -> Result { let voices = read(voices_path).await?; let (voices, _) = decode_from_slice(&voices, standard())?; let model = Session::builder()? .with_execution_providers([CUDAExecutionProvider::default().build()])? .commit_from_file(model_path)?; Ok(Self { model: Arc::new(model.into()), voices, }) } pub async fn new_from_bytes(model: B, voices: B) -> Result where B: AsRef<[u8]>, { let (voices, _) = decode_from_slice(voices.as_ref(), standard())?; let model = Session::builder()? .with_execution_providers([CUDAExecutionProvider::default().build()])? .commit_from_memory(model.as_ref())?; Ok(Self { model: Arc::new(model.into()), voices, }) } pub async fn synth(&self, text: S, voice: Voice) -> Result<(Vec, Duration), KokoroError> where S: AsRef, { let name = voice.get_name(); let pack = self .voices .get(name) .ok_or(KokoroError::VoiceNotFound(name.to_owned()))?; synthesizer::synth(Arc::downgrade(&self.model), text, pack, voice).await } pub fn stream(&self, voice: Voice) -> (SynthSink, SynthStream) where S: AsRef + Send + 'static, { let voices = Arc::downgrade(&self.voices); let model = Arc::downgrade(&self.model); start_synth_session(voice, move |text, voice| { let voices = voices.clone(); let model = model.clone(); async move { let name = voice.get_name(); let voices = voices.upgrade().ok_or(KokoroError::ModelReleased)?; let pack = voices .get(name) .ok_or(KokoroError::VoiceNotFound(name.to_owned()))?; synthesizer::synth(model, text, pack, voice).await } }) } }