llama-tts : add '-o' option (#12398)

* added -o option to specify an output file name

* llama-tts returns ENOENT in case of file write error

note : PR #12042 is closed as superseded with this one.
This commit is contained in:
marcoStocchi
2025-03-15 17:23:11 +01:00
committed by GitHub
parent 3d35d87b41
commit f4c3dd5daa
2 changed files with 13 additions and 10 deletions

View File

@ -1889,7 +1889,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
[](common_params & params, const std::string & value) { [](common_params & params, const std::string & value) {
params.out_file = value; params.out_file = value;
} }
).set_examples({LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_CVECTOR_GENERATOR, LLAMA_EXAMPLE_EXPORT_LORA})); ).set_examples({LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_CVECTOR_GENERATOR, LLAMA_EXAMPLE_EXPORT_LORA, LLAMA_EXAMPLE_TTS}));
add_opt(common_arg( add_opt(common_arg(
{"-ofreq", "--output-frequency"}, "N", {"-ofreq", "--output-frequency"}, "N",
string_format("output the imatrix every N iterations (default: %d)", params.n_out_freq), string_format("output the imatrix every N iterations (default: %d)", params.n_out_freq),

View File

@ -87,11 +87,11 @@ struct wav_header {
uint32_t data_size; uint32_t data_size;
}; };
static void save_wav16(const std::string & fname, const std::vector<float> & data, int sample_rate) { static bool save_wav16(const std::string & fname, const std::vector<float> & data, int sample_rate) {
std::ofstream file(fname, std::ios::binary); std::ofstream file(fname, std::ios::binary);
if (!file) { if (!file) {
LOG_ERR("%s: Failed to open file '%s' for writing", __func__, fname.c_str()); LOG_ERR("%s: Failed to open file '%s' for writing.\n", __func__, fname.c_str());
return; return false;
} }
wav_header header; wav_header header;
@ -108,7 +108,7 @@ static void save_wav16(const std::string & fname, const std::vector<float> & dat
file.write(reinterpret_cast<const char*>(&pcm_sample), sizeof(pcm_sample)); file.write(reinterpret_cast<const char*>(&pcm_sample), sizeof(pcm_sample));
} }
file.close(); return file.good();
} }
static void fill_hann_window(int length, bool periodic, float * output) { static void fill_hann_window(int length, bool periodic, float * output) {
@ -536,6 +536,7 @@ static std::string audio_data_from_speaker(json speaker, const outetts_version t
int main(int argc, char ** argv) { int main(int argc, char ** argv) {
common_params params; common_params params;
params.out_file = "output.wav";
params.prompt = ""; params.prompt = "";
params.n_predict = 4096; params.n_predict = 4096;
@ -1060,8 +1061,6 @@ lovely<|t_0.56|><|code_start|><|634|><|596|><|1766|><|1556|><|1306|><|1285|><|14
} }
#endif #endif
const std::string fname = "output.wav";
const int n_sr = 24000; // sampling rate const int n_sr = 24000; // sampling rate
// zero out first 0.25 seconds // zero out first 0.25 seconds
@ -1072,11 +1071,15 @@ lovely<|t_0.56|><|code_start|><|634|><|596|><|1766|><|1556|><|1306|><|1285|><|14
LOG_INF("%s: time for spectral ops: %.3f ms\n", __func__, (ggml_time_us() - t_spec_start) / 1000.0f); LOG_INF("%s: time for spectral ops: %.3f ms\n", __func__, (ggml_time_us() - t_spec_start) / 1000.0f);
LOG_INF("%s: total time: %.3f ms\n", __func__, (ggml_time_us() - t_main_start) / 1000.0f); LOG_INF("%s: total time: %.3f ms\n", __func__, (ggml_time_us() - t_main_start) / 1000.0f);
save_wav16(fname, audio, n_sr); int retval = 0;
LOG_INF("%s: audio written to file '%s'\n", __func__, fname.c_str()); if (save_wav16(params.out_file, audio, n_sr)) {
LOG_INF("%s: audio written to file '%s'\n", __func__, params.out_file.c_str());
} else {
retval = ENOENT;
}
llama_backend_free(); llama_backend_free();
return 0; return retval;
} }