From 8e65bd9ed4cd4245c61c9622691fab7f19714dab Mon Sep 17 00:00:00 2001 From: thewh1teagle <61390950+thewh1teagle@users.noreply.github.com> Date: Sat, 18 Jan 2025 18:37:17 +0200 Subject: [PATCH] feat: add new TTS examples for Kokoro, Vits, and Matcha models --- .gitignore | 2 + crates/sherpa-rs/Cargo.toml | 14 ++- crates/sherpa-rs/src/lib.rs | 62 ++++++++-- crates/sherpa-rs/src/tts.rs | 189 ----------------------------- crates/sherpa-rs/src/tts/kokoro.rs | 64 ++++++++++ crates/sherpa-rs/src/tts/matcha.rs | 76 ++++++++++++ crates/sherpa-rs/src/tts/mod.rs | 56 +++++++++ crates/sherpa-rs/src/tts/vits.rs | 72 +++++++++++ examples/tts.rs | 100 --------------- examples/tts_kokoro.rs | 27 +++++ examples/tts_matcha.rs | 24 ++++ examples/tts_vits.rs | 22 ++++ 12 files changed, 405 insertions(+), 303 deletions(-) delete mode 100644 crates/sherpa-rs/src/tts.rs create mode 100644 crates/sherpa-rs/src/tts/kokoro.rs create mode 100644 crates/sherpa-rs/src/tts/matcha.rs create mode 100644 crates/sherpa-rs/src/tts/mod.rs create mode 100644 crates/sherpa-rs/src/tts/vits.rs delete mode 100644 examples/tts.rs create mode 100644 examples/tts_kokoro.rs create mode 100644 examples/tts_matcha.rs create mode 100644 examples/tts_vits.rs diff --git a/.gitignore b/.gitignore index cf19ae2..4f51a68 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,5 @@ sherpa-onnx-kws-* jniLibs/ build/ kokoro-en-*/ +matcha-* +/ \ No newline at end of file diff --git a/crates/sherpa-rs/Cargo.toml b/crates/sherpa-rs/Cargo.toml index 0a613d3..20ef4f0 100644 --- a/crates/sherpa-rs/Cargo.toml +++ b/crates/sherpa-rs/Cargo.toml @@ -37,9 +37,19 @@ cuda = ["sherpa-rs-sys/cuda"] directml = ["sherpa-rs-sys/directml"] [[example]] -name = "tts" +name = "tts_kokoro" required-features = ["tts"] -path = "../../examples/tts.rs" +path = "../../examples/tts_kokoro.rs" + +[[example]] +name = "tts_vits" +required-features = ["tts"] +path = "../../examples/tts_vits.rs" + +[[example]] +name = "tts_matcha" +required-features = ["tts"] +path = "../../examples/tts_matcha.rs" [[example]] name = "audio_tag" diff --git a/crates/sherpa-rs/src/lib.rs b/crates/sherpa-rs/src/lib.rs index 70e7d1f..b10b803 100644 --- a/crates/sherpa-rs/src/lib.rs +++ b/crates/sherpa-rs/src/lib.rs @@ -18,19 +18,21 @@ pub mod tts; #[cfg(feature = "sys")] pub use sherpa_rs_sys; -use eyre::{bail, Result}; +use eyre::{ bail, Result }; pub fn get_default_provider() -> String { - if cfg!(feature = "cuda") { - "cuda" - } else if cfg!(target_os = "macos") { - "coreml" - } else if cfg!(feature = "directml") { - "directml" - } else { - "cpu" - } - .into() + "cpu".into() + // Other providers has many issues with different models!! + // if cfg!(feature = "cuda") { + // "cuda" + // } else if cfg!(target_os = "macos") { + // "coreml" + // } else if cfg!(feature = "directml") { + // "directml" + // } else { + // "cpu" + // } + // .into() } pub fn read_audio_file(path: &str) -> Result<(Vec, u32)> { @@ -45,8 +47,44 @@ pub fn read_audio_file(path: &str) -> Result<(Vec, u32)> { // Collect samples into a Vec let samples: Vec = reader .samples::() - .map(|s| s.unwrap() as f32 / i16::MAX as f32) + .map(|s| (s.unwrap() as f32) / (i16::MAX as f32)) .collect(); Ok((samples, sample_rate)) } + +pub fn write_audio_file(path: &str, samples: &[f32], sample_rate: u32) -> Result<()> { + // Create a WAV file writer + let spec = hound::WavSpec { + channels: 1, + sample_rate, + bits_per_sample: 16, + sample_format: hound::SampleFormat::Int, + }; + + let mut writer = hound::WavWriter::create(path, spec)?; + + // Convert samples from f32 to i16 and write them to the WAV file + for &sample in samples { + let scaled_sample = (sample * (i16::MAX as f32)).clamp( + i16::MIN as f32, + i16::MAX as f32 + ) as i16; + writer.write_sample(scaled_sample)?; + } + + writer.finalize()?; + Ok(()) +} + +pub struct OnnxConfig { + pub provider: String, + pub debug: bool, + pub num_threads: i32, +} + +impl Default for OnnxConfig { + fn default() -> Self { + Self { provider: get_default_provider(), debug: false, num_threads: 1 } + } +} diff --git a/crates/sherpa-rs/src/tts.rs b/crates/sherpa-rs/src/tts.rs deleted file mode 100644 index e9aeebb..0000000 --- a/crates/sherpa-rs/src/tts.rs +++ /dev/null @@ -1,189 +0,0 @@ -use std::ptr::null; - -use crate::{get_default_provider, utils::RawCStr}; -use eyre::{bail, Result}; -use hound::{WavSpec, WavWriter}; - -#[derive(Debug)] -pub struct OfflineTtsConfig { - pub model: String, - pub rule_fars: String, - pub rule_fsts: String, - pub max_num_sentences: i32, - pub num_threads: Option, - pub debug: bool, - pub provider: Option, -} - -#[derive(Debug)] -pub struct VitsConfig { - pub lexicon: String, - pub tokens: String, - pub data_dir: String, - pub dict_dir: String, - - pub noise_scale: f32, - pub noise_scale_w: f32, - pub length_scale: f32, -} - -impl Default for VitsConfig { - fn default() -> Self { - Self { - lexicon: String::new(), - tokens: String::new(), - data_dir: String::new(), - dict_dir: String::new(), - noise_scale: 0.0, - noise_scale_w: 0.0, - length_scale: 1.0, - } - } -} - -impl Default for OfflineTtsConfig { - fn default() -> Self { - Self { - model: String::new(), - rule_fars: String::new(), - rule_fsts: String::new(), - max_num_sentences: 2, - num_threads: None, - debug: false, - provider: None, - } - } -} - -#[derive(Debug)] -pub struct OfflineTts { - pub(crate) tts: *const sherpa_rs_sys::SherpaOnnxOfflineTts, -} - -impl OfflineTts { - pub fn new(config: OfflineTtsConfig, vits_config: VitsConfig) -> Self { - let provider = config.provider.unwrap_or(get_default_provider()); - - let data_dir = RawCStr::new(&vits_config.data_dir); - let dict_dir = RawCStr::new(&vits_config.dict_dir); - let lexicon = RawCStr::new(&vits_config.lexicon); - let model = RawCStr::new(&config.model); - let tokens = RawCStr::new(&vits_config.tokens); - let provider = RawCStr::new(&provider); - let rule_fars = RawCStr::new(&config.rule_fars); - let rule_fsts = RawCStr::new(&config.rule_fsts); - - let tts_config = sherpa_rs_sys::SherpaOnnxOfflineTtsConfig { - max_num_sentences: config.max_num_sentences, - model: sherpa_rs_sys::SherpaOnnxOfflineTtsModelConfig { - kokoro: sherpa_rs_sys::SherpaOnnxOfflineTtsKokoroModelConfig { - model: null(), - voices: null(), - tokens: null(), - data_dir: null(), - length_scale: 0.0, - }, - matcha: sherpa_rs_sys::SherpaOnnxOfflineTtsMatchaModelConfig { - acoustic_model: null(), - vocoder: null(), - lexicon: null(), - tokens: null(), - data_dir: null(), - noise_scale: 0.0, - length_scale: 0.0, - dict_dir: null(), - }, - vits: sherpa_rs_sys::SherpaOnnxOfflineTtsVitsModelConfig { - data_dir: data_dir.as_ptr(), - dict_dir: dict_dir.as_ptr(), - length_scale: vits_config.length_scale, - lexicon: lexicon.as_ptr(), - model: model.as_ptr(), - noise_scale: vits_config.noise_scale, - noise_scale_w: vits_config.noise_scale_w, - tokens: tokens.as_ptr(), - }, - num_threads: config.num_threads.unwrap_or(1), - debug: config.debug.into(), - provider: provider.as_ptr(), - }, - rule_fars: rule_fars.as_ptr(), - rule_fsts: rule_fsts.as_ptr(), - }; - - let tts = unsafe { sherpa_rs_sys::SherpaOnnxCreateOfflineTts(&tts_config) }; - Self { tts } - } - - pub fn generate(&mut self, text: String, sid: i32, speed: f32) -> Result { - unsafe { - let text = RawCStr::new(&text); - let audio_ptr = - sherpa_rs_sys::SherpaOnnxOfflineTtsGenerate(self.tts, text.as_ptr(), sid, speed); - - if audio_ptr.is_null() { - bail!("audio is null"); - } - let audio = audio_ptr.read(); - - if audio.n.is_negative() { - bail!("no samples found"); - } - if audio.samples.is_null() { - bail!("audio samples are null"); - } - let samples: &[f32] = std::slice::from_raw_parts(audio.samples, audio.n as usize); - let samples = samples.to_vec(); - let sample_rate = audio.sample_rate; - let duration = (samples.len() as i32) / sample_rate; - - // Free - sherpa_rs_sys::SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio_ptr); - - Ok(TtsSample { - samples, - sample_rate: sample_rate as u32, - duration, - }) - } - } -} - -#[derive(Debug)] -pub struct TtsSample { - pub samples: Vec, - pub sample_rate: u32, - pub duration: i32, -} - -impl TtsSample { - pub fn write_to_wav(&self, filename: &str) -> Result<()> { - let spec = WavSpec { - channels: 1, - sample_rate: self.sample_rate, - bits_per_sample: 32, - sample_format: hound::SampleFormat::Float, - }; - - let mut writer = WavWriter::create(filename, spec)?; - - for &sample in &self.samples { - writer.write_sample(sample)?; - } - - writer.finalize()?; - - Ok(()) - } -} - -unsafe impl Send for OfflineTts {} -unsafe impl Sync for OfflineTts {} - -impl Drop for OfflineTts { - fn drop(&mut self) { - unsafe { - sherpa_rs_sys::SherpaOnnxDestroyOfflineTts(self.tts); - } - } -} diff --git a/crates/sherpa-rs/src/tts/kokoro.rs b/crates/sherpa-rs/src/tts/kokoro.rs new file mode 100644 index 0000000..7584b0f --- /dev/null +++ b/crates/sherpa-rs/src/tts/kokoro.rs @@ -0,0 +1,64 @@ +use std::{ mem, ptr::null }; + +use eyre::Result; +use sherpa_rs_sys; +use crate::{ utils::RawCStr, OnnxConfig }; + +use super::TtsAudio; + +pub struct KokoroTts { + tts: *const sherpa_rs_sys::SherpaOnnxOfflineTts, +} + +#[derive(Default)] +pub struct KokoroTtsConfig { + pub model: String, + pub voices: String, + pub tokens: String, + pub data_dir: String, + pub length_scale: f32, + pub onnx_config: OnnxConfig, +} + +impl KokoroTts { + pub fn new(config: KokoroTtsConfig) -> Self { + let tts = unsafe { + let model = RawCStr::new(&config.model); + let voices = RawCStr::new(&config.voices); + let tokens = RawCStr::new(&config.tokens); + let data_dir = RawCStr::new(&config.data_dir); + + let provider = RawCStr::new(&config.onnx_config.provider); + + let model_config = sherpa_rs_sys::SherpaOnnxOfflineTtsModelConfig { + vits: mem::zeroed::<_>(), + num_threads: config.onnx_config.num_threads, + debug: config.onnx_config.debug.into(), + provider: provider.as_ptr(), + matcha: mem::zeroed::<_>(), + kokoro: sherpa_rs_sys::SherpaOnnxOfflineTtsKokoroModelConfig { + model: model.as_ptr(), + voices: voices.as_ptr(), + tokens: tokens.as_ptr(), + data_dir: data_dir.as_ptr(), + length_scale: config.length_scale, + }, + }; + let config = sherpa_rs_sys::SherpaOnnxOfflineTtsConfig { + max_num_sentences: 0, + model: model_config, + rule_fars: null(), + rule_fsts: null(), + }; + sherpa_rs_sys::SherpaOnnxCreateOfflineTts(&config) + }; + + Self { + tts, + } + } + + pub fn create(&mut self, text: &str, sid: i32, speed: f32) -> Result { + unsafe { super::create(self.tts, text, sid, speed) } + } +} diff --git a/crates/sherpa-rs/src/tts/matcha.rs b/crates/sherpa-rs/src/tts/matcha.rs new file mode 100644 index 0000000..d0ba5f2 --- /dev/null +++ b/crates/sherpa-rs/src/tts/matcha.rs @@ -0,0 +1,76 @@ +use std::{ mem, ptr::null }; + +use eyre::Result; +use sherpa_rs_sys; +use crate::{ utils::RawCStr, OnnxConfig }; + +use super::TtsAudio; + +pub struct MatchaTts { + tts: *const sherpa_rs_sys::SherpaOnnxOfflineTts, +} + +#[derive(Default)] +pub struct MatchaTtsConfig { + pub model: String, + pub lexicon: String, + pub dict_dir: String, + pub tokens: String, + pub data_dir: String, + pub acoustic_model: String, + pub vocoder: String, + pub length_scale: f32, + pub noise_scale: f32, + pub noise_scale_w: f32, + + pub onnx_config: OnnxConfig, +} + +impl MatchaTts { + pub fn new(config: MatchaTtsConfig) -> Self { + let tts = unsafe { + let tokens = RawCStr::new(&config.tokens); + let data_dir = RawCStr::new(&config.data_dir); + let lexicon = RawCStr::new(&config.lexicon); + let dict_dir = RawCStr::new(&config.dict_dir); + + let vocoder = RawCStr::new(&config.vocoder); + let acoustic_model = RawCStr::new(&config.acoustic_model); + + let provider = RawCStr::new(&config.onnx_config.provider); + + let model_config = sherpa_rs_sys::SherpaOnnxOfflineTtsModelConfig { + num_threads: config.onnx_config.num_threads, + vits: mem::zeroed::<_>(), + debug: config.onnx_config.debug.into(), + provider: provider.as_ptr(), + matcha: sherpa_rs_sys::SherpaOnnxOfflineTtsMatchaModelConfig { + acoustic_model: acoustic_model.as_ptr(), + vocoder: vocoder.as_ptr(), + lexicon: lexicon.as_ptr(), + tokens: tokens.as_ptr(), + data_dir: data_dir.as_ptr(), + noise_scale: config.noise_scale, + length_scale: config.length_scale, + dict_dir: dict_dir.as_ptr(), + }, + kokoro: mem::zeroed::<_>(), + }; + let config = sherpa_rs_sys::SherpaOnnxOfflineTtsConfig { + max_num_sentences: 0, + model: model_config, + rule_fars: null(), + rule_fsts: null(), + }; + sherpa_rs_sys::SherpaOnnxCreateOfflineTts(&config) + }; + + Self { + tts, + } + } + + pub fn create(&mut self, text: &str, sid: i32, speed: f32) -> Result { + unsafe { super::create(self.tts, text, sid, speed) } + } +} diff --git a/crates/sherpa-rs/src/tts/mod.rs b/crates/sherpa-rs/src/tts/mod.rs new file mode 100644 index 0000000..d1f573e --- /dev/null +++ b/crates/sherpa-rs/src/tts/mod.rs @@ -0,0 +1,56 @@ +mod kokoro; +mod vits; +mod matcha; + +use eyre::{ bail, Result }; + +pub use kokoro::{ KokoroTts, KokoroTtsConfig }; +pub use vits::{ VitsTts, VitsTtsConfig }; +pub use matcha::{ MatchaTts, MatchaTtsConfig }; + +use crate::utils::RawCStr; + +#[derive(Debug)] +pub struct TtsAudio { + pub samples: Vec, + pub sample_rate: u32, + pub duration: i32, +} + +/// # Safety +/// +/// This function dereference sherpa_rs_sys::SherpaOnnxOfflineTts +pub unsafe fn create( + tts: *const sherpa_rs_sys::SherpaOnnxOfflineTts, + text: &str, + sid: i32, + speed: f32 +) -> Result { + let text = RawCStr::new(text); + let audio_ptr = sherpa_rs_sys::SherpaOnnxOfflineTtsGenerate(tts, text.as_ptr(), sid, speed); + + if audio_ptr.is_null() { + bail!("audio is null"); + } + let audio = audio_ptr.read(); + + if audio.n.is_negative() { + bail!("no samples found"); + } + if audio.samples.is_null() { + bail!("audio samples are null"); + } + let samples: &[f32] = std::slice::from_raw_parts(audio.samples, audio.n as usize); + let samples = samples.to_vec(); + let sample_rate = audio.sample_rate; + let duration = (samples.len() as i32) / sample_rate; + + // Free + sherpa_rs_sys::SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio_ptr); + + Ok(TtsAudio { + samples, + sample_rate: sample_rate as u32, + duration, + }) +} diff --git a/crates/sherpa-rs/src/tts/vits.rs b/crates/sherpa-rs/src/tts/vits.rs new file mode 100644 index 0000000..42f05de --- /dev/null +++ b/crates/sherpa-rs/src/tts/vits.rs @@ -0,0 +1,72 @@ +use std::{ mem, ptr::null }; + +use eyre::Result; +use sherpa_rs_sys; +use crate::{ utils::RawCStr, OnnxConfig }; + +use super::TtsAudio; + +pub struct VitsTts { + tts: *const sherpa_rs_sys::SherpaOnnxOfflineTts, +} + +#[derive(Default)] +pub struct VitsTtsConfig { + pub model: String, + pub lexicon: String, + pub dict_dir: String, + pub tokens: String, + pub data_dir: String, + pub length_scale: f32, + pub noise_scale: f32, + pub noise_scale_w: f32, + + pub onnx_config: OnnxConfig, +} + +impl VitsTts { + pub fn new(config: VitsTtsConfig) -> Self { + let tts = unsafe { + let model = RawCStr::new(&config.model); + let tokens = RawCStr::new(&config.tokens); + let data_dir = RawCStr::new(&config.data_dir); + let lexicon = RawCStr::new(&config.lexicon); + let dict_dir = RawCStr::new(&config.dict_dir); + + let provider = RawCStr::new(&config.onnx_config.provider); + + let model_config = sherpa_rs_sys::SherpaOnnxOfflineTtsModelConfig { + num_threads: config.onnx_config.num_threads, + vits: sherpa_rs_sys::SherpaOnnxOfflineTtsVitsModelConfig { + model: model.as_ptr(), + lexicon: lexicon.as_ptr(), + tokens: tokens.as_ptr(), + data_dir: data_dir.as_ptr(), + noise_scale: config.noise_scale, + noise_scale_w: config.noise_scale_w, + length_scale: config.length_scale, + dict_dir: dict_dir.as_ptr(), + }, + debug: config.onnx_config.debug.into(), + provider: provider.as_ptr(), + matcha: mem::zeroed::<_>(), + kokoro: mem::zeroed::<_>(), + }; + let config = sherpa_rs_sys::SherpaOnnxOfflineTtsConfig { + max_num_sentences: 0, + model: model_config, + rule_fars: null(), + rule_fsts: null(), + }; + sherpa_rs_sys::SherpaOnnxCreateOfflineTts(&config) + }; + + Self { + tts, + } + } + + pub fn create(&mut self, text: &str, sid: i32, speed: f32) -> Result { + unsafe { super::create(self.tts, text, sid, speed) } + } +} diff --git a/examples/tts.rs b/examples/tts.rs deleted file mode 100644 index aa329ac..0000000 --- a/examples/tts.rs +++ /dev/null @@ -1,100 +0,0 @@ -/* -Convert text to speech - -Piper English model -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2 -tar xf vits-piper-en_US-amy-low.tar.bz2 -cargo run --example tts --features="tts" -- --text 'liliana, the most beautiful and lovely assistant of our team!' --output audio.wav --tokens "vits-piper-en_US-amy-low/tokens.txt" --model "vits-piper-en_US-amy-low/en_US-amy-low.onnx" --data-dir "vits-piper-en_US-amy-low/espeak-ng-data" - -High quality vits-ljs with emotions voice -wget https://huggingface.co/csukuangfj/vits-ljs/resolve/main/vits-ljs.onnx -wget https://huggingface.co/csukuangfj/vits-ljs/resolve/main/lexicon.txt -wget https://huggingface.co/csukuangfj/vits-ljs/resolve/main/tokens.txt -cargo run --example tts --features="tts" -- --text "liliana, the most beautiful and lovely assistant of our team!" --output audio.wav --tokens "tokens.txt" --model "vits-ljs.onnx" --lexicon lexicon.txt - -MMS Hebrew model -wget https://huggingface.co/thewh1teagle/mms-tts-heb/resolve/main/model_sherpa.onnx -wget https://huggingface.co/thewh1teagle/mms-tts-heb/resolve/main/tokens.txt -cargo run --example tts --features="tts" -- --text "שלום וברכה, ניפרד בשמחה" --output audio.wav --tokens "tokens.txt" --model "model_sherpa.onnx" -*/ -use clap::Parser; - -/// TTS -#[derive(Parser, Debug)] -#[command(version, about, long_about = None)] -struct Args { - #[arg(short, long)] - tokens: String, - - #[arg(short, long)] - model: String, - - #[arg(long)] - text: Option, - - #[arg(long)] - text_file_input: Option, - - #[arg(short, long)] - output: String, - - #[arg(long)] - dict_dir: Option, - - #[arg(long)] - data_dir: Option, - - #[arg(long)] - lexicon: Option, - - #[arg(long)] - tts_rule_fsts: Option, - - #[arg(long)] - sid: Option, - - #[arg(long)] - speed: Option, - - #[arg(long)] - max_num_sentences: Option, - - #[arg(long)] - provider: Option, - - #[arg(long)] - debug: bool, -} - -fn main() { - // Parse command-line arguments into `Args` struct - let args = Args::parse(); - let text = if args.text.is_some() { - args.text.unwrap() - } else { - std::fs::read_to_string(args.text_file_input.unwrap()).unwrap() - }; - - let vits_config = sherpa_rs::tts::VitsConfig { - lexicon: args.lexicon.unwrap_or_default(), - tokens: args.tokens, - data_dir: args.data_dir.unwrap_or_default(), - dict_dir: args.dict_dir.unwrap_or_default(), - ..Default::default() - }; - - let max_num_sentences = args.max_num_sentences.unwrap_or(2); - let tts_config = sherpa_rs::tts::OfflineTtsConfig { - model: args.model, - max_num_sentences, - rule_fsts: args.tts_rule_fsts.unwrap_or_default(), - provider: args.provider, - ..Default::default() - }; - let mut tts = sherpa_rs::tts::OfflineTts::new(tts_config, vits_config); - let speed = args.speed.unwrap_or(1.0); - let sid = args.sid.unwrap_or(0); - let audio = tts.generate(text, sid, speed).unwrap(); - audio.write_to_wav(&args.output).unwrap(); // Use the provided output path - println!("Created {}", args.output); -} diff --git a/examples/tts_kokoro.rs b/examples/tts_kokoro.rs new file mode 100644 index 0000000..577335a --- /dev/null +++ b/examples/tts_kokoro.rs @@ -0,0 +1,27 @@ +/* +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2 +tar xf kokoro-en-v0_19.tar.bz2 +rm kokoro-en-v0_19.tar.bz2 +cargo run --example tts_kokoro +*/ +pub use sherpa_rs::tts::{ KokoroTts, KokoroTtsConfig }; +use sherpa_rs::OnnxConfig; + +fn main() { + let config = KokoroTtsConfig { + model: "./kokoro-en-v0_19/model.onnx".to_string(), + voices: "./kokoro-en-v0_19/voices.bin".into(), + tokens: "./kokoro-en-v0_19/tokens.txt".into(), + data_dir: "./kokoro-en-v0_19/espeak-ng-data".into(), + length_scale: 1.0, + onnx_config: OnnxConfig::default(), + }; + let mut tts = KokoroTts::new(config); + + // 0->af, 1->af_bella, 2->af_nicole, 3->af_sarah, 4->af_sky, 5->am_adam + // 6->am_michael, 7->bf_emma, 8->bf_isabella, 9->bm_george, 10->bm_lewis + let sid = 0; + let audio = tts.create("Hello! This audio generated by Kokoro!", sid, 1.0).unwrap(); + sherpa_rs::write_audio_file("audio.wav", &audio.samples, audio.sample_rate).unwrap(); + println!("Created audio.wav") +} diff --git a/examples/tts_matcha.rs b/examples/tts_matcha.rs new file mode 100644 index 0000000..3cdd944 --- /dev/null +++ b/examples/tts_matcha.rs @@ -0,0 +1,24 @@ +/* +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 +rm matcha-icefall-en_US-ljspeech.tar.bz2 + +cargo run --example tts_matcha --features="tts" +*/ +pub use sherpa_rs::tts::{ MatchaTts, MatchaTtsConfig }; + +fn main() { + let config = MatchaTtsConfig { + acoustic_model: "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx".into(), + vocoder: "./hifigan_v2.onnx".into(), + tokens: "./matcha-icefall-en_US-ljspeech/tokens.txt".into(), + data_dir: "./matcha-icefall-en_US-ljspeech/espeak-ng-data".into(), + ..Default::default() + }; + let mut tts = MatchaTts::new(config); + let sid = 0; + let audio = tts.create("Hello! This audio generated by onnx model!", sid, 1.0).unwrap(); + sherpa_rs::write_audio_file("audio.wav", &audio.samples, audio.sample_rate).unwrap(); + println!("Created audio.wav") +} diff --git a/examples/tts_vits.rs b/examples/tts_vits.rs new file mode 100644 index 0000000..5e0932f --- /dev/null +++ b/examples/tts_vits.rs @@ -0,0 +1,22 @@ +/* +wget https://huggingface.co/csukuangfj/vits-ljs/resolve/main/vits-ljs.onnx +wget https://huggingface.co/csukuangfj/vits-ljs/resolve/main/lexicon.txt +wget https://huggingface.co/csukuangfj/vits-ljs/resolve/main/tokens.txt +cargo run --example tts_vits --features="tts" +*/ +pub use sherpa_rs::tts::{ VitsTts, VitsTtsConfig }; + +fn main() { + let config = VitsTtsConfig { + model: "./vits-ljs.onnx".into(), + lexicon: "./lexicon.txt".into(), + tokens: "./tokens.txt".into(), + length_scale: 1.0, + ..Default::default() + }; + let mut tts = VitsTts::new(config); + let sid = 0; + let audio = tts.create("Hello! This audio generated by onnx model!", sid, 1.0).unwrap(); + sherpa_rs::write_audio_file("audio.wav", &audio.samples, audio.sample_rate).unwrap(); + println!("Created audio.wav") +}