/*
* Copyright (c) 2017 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
using CommandLine;
using Google.Apis.Auth.OAuth2;
using Google.Cloud.Speech.V1;
using Grpc.Auth;
using System;
using System.Collections.Generic;
using System.IO;
using System.Threading;
using System.Threading.Tasks;
namespace GoogleCloudSamples
{
class Options
{
[Value(0, HelpText = "A path to a sound file. Encoding must be "
+ "Linear16 with a sample rate of 16000.", Required = true)]
public string FilePath { get; set; }
}
class StorageOptions
{
[Value(0, HelpText = "A path to a sound file. "
+ "Can be a local file path or a Google Cloud Storage path like "
+ "gs://my-bucket/my-object. "
+ "Encoding must be "
+ "Linear16 with a sample rate of 16000.", Required = true)]
public string FilePath { get; set; }
}
[Verb("sync", HelpText = "Detects speech in an audio file.")]
class SyncOptions : StorageOptions
{
[Option('w', HelpText = "Report the time offsets of individual words.")]
public bool EnableWordTimeOffsets { get; set; }
}
[Verb("with-context", HelpText = "Detects speech in an audio file."
+ " Add additional context on stdin.")]
class OptionsWithContext : StorageOptions { }
[Verb("async", HelpText = "Creates a job to detect speech in an audio "
+ "file, and waits for the job to complete.")]
class AsyncOptions : StorageOptions
{
[Option('w', HelpText = "Report the time offsets of individual words.")]
public bool EnableWordTimeOffsets { get; set; }
}
[Verb("sync-creds", HelpText = "Detects speech in an audio file.")]
class SyncOptionsWithCreds
{
[Value(0, HelpText = "A path to a sound file. Encoding must be "
+ "Linear16 with a sample rate of 16000.", Required = true)]
public string FilePath { get; set; }
[Value(1, HelpText = "Path to Google credentials json file.", Required = true)]
public string CredentialsFilePath { get; set; }
}
[Verb("stream", HelpText = "Detects speech in an audio file by streaming "
+ "it to the Speech API.")]
class StreamingOptions : Options { }
[Verb("listen", HelpText = "Detects speech in a microphone input stream.")]
class ListenOptions
{
[Value(0, HelpText = "Number of seconds to listen for.", Required = false)]
public int Seconds { get; set; } = 3;
}
[Verb("rec", HelpText = "Detects speech in an audio file. Supports other file formats.")]
class RecOptions : Options
{
[Option('b', Default = 16000, HelpText = "Sample rate in bits per second.")]
public int BitRate { get; set; }
[Option('e', Default = RecognitionConfig.Types.AudioEncoding.Linear16,
HelpText = "Audio file encoding format.")]
public RecognitionConfig.Types.AudioEncoding Encoding { get; set; }
}
public class Recognize
{
static object Rec(string filePath, int bitRate,
RecognitionConfig.Types.AudioEncoding encoding)
{
var speech = SpeechClient.Create();
var response = speech.Recognize(new RecognitionConfig()
{
Encoding = encoding,
SampleRateHertz = bitRate,
LanguageCode = "en",
}, RecognitionAudio.FromFile(filePath));
foreach (var result in response.Results)
{
foreach (var alternative in result.Alternatives)
{
Console.WriteLine(alternative.Transcript);
}
}
return 0;
}
// [START speech_sync_recognize]
static object SyncRecognize(string filePath)
{
var speech = SpeechClient.Create();
var response = speech.Recognize(new RecognitionConfig()
{
Encoding = RecognitionConfig.Types.AudioEncoding.Linear16,
SampleRateHertz = 16000,
LanguageCode = "en",
}, RecognitionAudio.FromFile(filePath));
foreach (var result in response.Results)
{
foreach (var alternative in result.Alternatives)
{
Console.WriteLine(alternative.Transcript);
}
}
return 0;
}
// [END speech_sync_recognize]
// [START speech_sync_recognize_words]
static object SyncRecognizeWords(string filePath)
{
var speech = SpeechClient.Create();
var response = speech.Recognize(new RecognitionConfig()
{
Encoding = RecognitionConfig.Types.AudioEncoding.Linear16,
SampleRateHertz = 16000,
LanguageCode = "en",
EnableWordTimeOffsets = true,
}, RecognitionAudio.FromFile(filePath));
foreach (var result in response.Results)
{
foreach (var alternative in result.Alternatives)
{
Console.WriteLine($"Transcript: { alternative.Transcript}");
Console.WriteLine("Word details:");
Console.WriteLine($" Word count:{alternative.Words.Count}");
foreach (var item in alternative.Words)
{
Console.WriteLine($" {item.Word}");
Console.WriteLine($" WordStartTime: {item.StartTime}");
Console.WriteLine($" WordEndTime: {item.EndTime}");
}
}
}
return 0;
}
// [END speech_sync_recognize_words]
///
/// Reads a list of phrases from stdin.
///
static List ReadPhrases()
{
Console.Write("Reading phrases from stdin. Finish with blank line.\n> ");
var phrases = new List();
string line = Console.ReadLine();
while (!string.IsNullOrWhiteSpace(line))
{
phrases.Add(line.Trim());
Console.Write("> ");
line = Console.ReadLine();
}
return phrases;
}
static object RecognizeWithContext(string filePath, IEnumerable phrases)
{
var speech = SpeechClient.Create();
var config = new RecognitionConfig()
{
SpeechContexts = { new SpeechContext() { Phrases = { phrases } } },
Encoding = RecognitionConfig.Types.AudioEncoding.Linear16,
SampleRateHertz = 16000,
LanguageCode = "en",
};
var audio = IsStorageUri(filePath) ?
RecognitionAudio.FromStorageUri(filePath) :
RecognitionAudio.FromFile(filePath);
var response = speech.Recognize(config, audio);
foreach (var result in response.Results)
{
foreach (var alternative in result.Alternatives)
{
Console.WriteLine(alternative.Transcript);
}
}
return 0;
}
static object SyncRecognizeWithCredentials(string filePath, string credentialsFilePath)
{
GoogleCredential googleCredential;
using (Stream m = new FileStream(credentialsFilePath, FileMode.Open))
googleCredential = GoogleCredential.FromStream(m);
var channel = new Grpc.Core.Channel(SpeechClient.DefaultEndpoint.Host,
googleCredential.ToChannelCredentials());
var speech = SpeechClient.Create(channel);
var response = speech.Recognize(new RecognitionConfig()
{
Encoding = RecognitionConfig.Types.AudioEncoding.Linear16,
SampleRateHertz = 16000,
LanguageCode = "en",
}, RecognitionAudio.FromFile(filePath));
foreach (var result in response.Results)
{
foreach (var alternative in result.Alternatives)
{
Console.WriteLine(alternative.Transcript);
}
}
return 0;
}
// [START speech_sync_recognize_gcs]
static object SyncRecognizeGcs(string storageUri)
{
var speech = SpeechClient.Create();
var response = speech.Recognize(new RecognitionConfig()
{
Encoding = RecognitionConfig.Types.AudioEncoding.Linear16,
SampleRateHertz = 16000,
LanguageCode = "en",
}, RecognitionAudio.FromStorageUri(storageUri));
foreach (var result in response.Results)
{
foreach (var alternative in result.Alternatives)
{
Console.WriteLine(alternative.Transcript);
}
}
return 0;
}
// [END speech_sync_recognize_gcs]
// [START speech_async_recognize]
static object LongRunningRecognize(string filePath)
{
var speech = SpeechClient.Create();
var longOperation = speech.LongRunningRecognize(new RecognitionConfig()
{
Encoding = RecognitionConfig.Types.AudioEncoding.Linear16,
SampleRateHertz = 16000,
LanguageCode = "en",
}, RecognitionAudio.FromFile(filePath));
longOperation = longOperation.PollUntilCompleted();
var response = longOperation.Result;
foreach (var result in response.Results)
{
foreach (var alternative in result.Alternatives)
{
Console.WriteLine(alternative.Transcript);
}
}
return 0;
}
// [END speech_async_recognize]
// [START speech_async_recognize_gcs]
static object AsyncRecognizeGcs(string storageUri)
{
var speech = SpeechClient.Create();
var longOperation = speech.LongRunningRecognize(new RecognitionConfig()
{
Encoding = RecognitionConfig.Types.AudioEncoding.Linear16,
SampleRateHertz = 16000,
LanguageCode = "en",
}, RecognitionAudio.FromStorageUri(storageUri));
longOperation = longOperation.PollUntilCompleted();
var response = longOperation.Result;
foreach (var result in response.Results)
{
foreach (var alternative in result.Alternatives)
{
Console.WriteLine($"Transcript: { alternative.Transcript}");
}
}
return 0;
}
// [END speech_async_recognize_gcs]
// [START speech_async_recognize_gcs_words]
static object AsyncRecognizeGcsWords(string storageUri)
{
var speech = SpeechClient.Create();
var longOperation = speech.LongRunningRecognize(new RecognitionConfig()
{
Encoding = RecognitionConfig.Types.AudioEncoding.Linear16,
SampleRateHertz = 16000,
LanguageCode = "en",
EnableWordTimeOffsets = true,
}, RecognitionAudio.FromStorageUri(storageUri));
longOperation = longOperation.PollUntilCompleted();
var response = longOperation.Result;
foreach (var result in response.Results)
{
foreach (var alternative in result.Alternatives)
{
Console.WriteLine($"Transcript: { alternative.Transcript}");
Console.WriteLine("Word details:");
Console.WriteLine($" Word count:{alternative.Words.Count}");
foreach (var item in alternative.Words)
{
Console.WriteLine($" {item.Word}");
Console.WriteLine($" WordStartTime: {item.StartTime}");
Console.WriteLine($" WordEndTime: {item.EndTime}");
}
}
}
return 0;
}
// [END speech_async_recognize_gcs_words]
///
/// Stream the content of the file to the API in 32kb chunks.
///
// [START speech_streaming_recognize]
static async Task