using DeepSpeechClient;
using NAudio.Wave;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
namespace CSharpExamples
{
class Program
{
///
/// Get the value of an argurment.
///
/// Argument list.
/// Key of the argument.
/// Value of the argument.
static string GetArgument(IEnumerable args, string option)
=> args.SkipWhile(i => i != option).Skip(1).Take(1).FirstOrDefault();
static void Main(string[] args)
{
string model = null;
string alphabet = null;
string lm = null;
string trie = null;
string audio = null;
if (args.Length > 0)
{
model = GetArgument(args, "--model");
alphabet = GetArgument(args, "--alphabet");
lm = GetArgument(args, "--lm");
trie = GetArgument(args, "--trie");
audio = GetArgument(args, "--audio");
}
const uint N_CEP = 26;
const uint N_CONTEXT = 9;
const uint BEAM_WIDTH = 200;
const float LM_ALPHA = 0.75f;
const float LM_BETA = 1.85f;
Stopwatch stopwatch = new Stopwatch();
using (DeepSpeech sttClient = new DeepSpeech())
{
var result = 1;
Console.WriteLine("Loading model...");
stopwatch.Start();
try
{
result = sttClient.CreateModel(
model ?? "output_graph.pbmm",
N_CEP, N_CONTEXT,
alphabet ?? "alphabet.txt",
BEAM_WIDTH);
}
catch (IOException ex)
{
Console.WriteLine("Error loading lm.");
Console.WriteLine(ex.Message);
}
stopwatch.Stop();
if (result == 0)
{
Console.WriteLine($"Model loaded - {stopwatch.Elapsed.Milliseconds} ms");
stopwatch.Reset();
if (lm != null)
{
Console.WriteLine("Loadin LM...");
try
{
result = sttClient.EnableDecoderWithLM(
alphabet ?? "alphabet.txt",
lm ?? "lm.binary",
trie ?? "trie",
LM_ALPHA, LM_BETA);
}
catch (IOException ex)
{
Console.WriteLine("Error loading lm.");
Console.WriteLine(ex.Message);
}
}
string audioFile = audio ?? "arctic_a0024.wav";
var waveBuffer = new WaveBuffer(File.ReadAllBytes(audioFile));
using (var waveInfo = new WaveFileReader(audioFile))
{
Console.WriteLine("Running inference....");
stopwatch.Start();
string speechResult = sttClient.SpeechToText(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2), 16000);
stopwatch.Stop();
Console.WriteLine($"Audio duration: {waveInfo.TotalTime.ToString()}");
Console.WriteLine($"Inference took: {stopwatch.Elapsed.ToString()}");
Console.WriteLine($"Recognized text: {speechResult}");
}
waveBuffer.Clear();
}
else
{
Console.WriteLine("Error loding the model.");
}
}
}
}
}