57 lines
1.8 KiB
C++
57 lines
1.8 KiB
C++
#include "whisper.h"
|
|
#include "common-whisper.h"
|
|
|
|
#include <cstdio>
|
|
#include <cfloat>
|
|
#include <string>
|
|
#include <cstring>
|
|
|
|
#ifdef NDEBUG
|
|
#undef NDEBUG
|
|
#endif
|
|
|
|
#include <cassert>
|
|
|
|
int main() {
|
|
std::string whisper_model_path = WHISPER_MODEL_PATH;
|
|
std::string vad_model_path = VAD_MODEL_PATH;
|
|
std::string sample_path = SAMPLE_PATH;
|
|
|
|
// Load the sample audio file
|
|
std::vector<float> pcmf32;
|
|
std::vector<std::vector<float>> pcmf32s;
|
|
assert(read_audio_data(sample_path.c_str(), pcmf32, pcmf32s, false));
|
|
|
|
struct whisper_context_params cparams = whisper_context_default_params();
|
|
struct whisper_context * wctx = whisper_init_from_file_with_params(
|
|
whisper_model_path.c_str(),
|
|
cparams);
|
|
|
|
struct whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_BEAM_SEARCH);
|
|
wparams.vad = true;
|
|
wparams.vad_model_path = vad_model_path.c_str();
|
|
|
|
wparams.vad_params.threshold = 0.5f;
|
|
wparams.vad_params.min_speech_duration_ms = 250;
|
|
wparams.vad_params.min_silence_duration_ms = 100;
|
|
wparams.vad_params.max_speech_duration_s = FLT_MAX;
|
|
wparams.vad_params.speech_pad_ms = 30;
|
|
|
|
assert(whisper_full_parallel(wctx, wparams, pcmf32.data(), pcmf32.size(), 1) == 0);
|
|
|
|
const int n_segments = whisper_full_n_segments(wctx);
|
|
assert(n_segments == 1);
|
|
|
|
|
|
printf("Segment text:\n%s", whisper_full_get_segment_text(wctx, 0));
|
|
assert(strcmp(" And so my fellow Americans, ask not what your country can do for you,"
|
|
" ask what you can do for your country.",
|
|
whisper_full_get_segment_text(wctx, 0)) == 0);
|
|
assert(whisper_full_get_segment_t0(wctx, 0) == 32);
|
|
assert(whisper_full_get_segment_t1(wctx, 0) == 1051);
|
|
|
|
whisper_free(wctx);
|
|
|
|
return 0;
|
|
}
|