Google Cloud Speech API 非同期の使い方 -InfiniTalk

WavファイルをGoogle Cloud Speech API 非同期を使ってテキストに変換する方法

①InfiniTalkサーバ(Asterisk側)

  1. GoogleCloudSDKのインストール
  2. AGIでmonitorを使いステレオ録音を行う
  3. gsutil -m rsync で GCPのストレージに転送する。

②GoogleCloudPlatform

次の3つをインストール

  1. Nodejs(8.11.3)
  2. GoogleSpeechAPI
  3. GooglePubSubAPI

GoogleCloudPubSubAPIで新規にアップロードしたファイルがあったら、通知メッセージを受け取る

GoogleSpeechtoTextAPIでテキスト変換(nodejs)

 

function makeSpeechRequest(gcsAudioUri, config, speechContext) {
var full_config = {
encoding : config.encoding,
sampleRateHertz : config.sampleRateHertz,
languageCode : config.languageCode,
speechContexts : [speechContext]
}
var audio = {
uri: gcsAudioUri,
};
var request = {
config: full_config,
audio: audio,
};
return request;
}
function makeLocalTextFilePath(audio_file_path) {
var filename = audio_file_path.replace(AUDIO_ROOT_PATH, '');
var full_text_file_path = __dirname + '/' + LOCAL_TEXT_ROOT_PATH + filename.replace('.wav', '.txt');
return full_text_file_path;
}
function makeFileContent(data) {
return util.inspect(data, false, null);
}
/*
* Get speech Context from json file
* File Path:
* ./dictionary/[group]/speechcontext.json
* [group] : Audio File's group
*/
function getSpeechContext(audio_file_path) {
var speechContext = {};
var temp = audio_file_path.split('/');
var group = temp[2];
var speechContextFile = 'dictionary/' + group + '/speechcontext.json';
if (fs.existsSync(speechContextFile)) {
speechContext = JSON.parse(fs.readFileSync(speechContextFile, 'utf8'));
}
return speechContext;
}
function saveAndSync(gcsSyncpath, file_content, local_file_path) {
dir = getDirName(local_file_path);
create_folder_command = 'mkdir -p ' + dir;
const child = exec(create_folder_command, (error, stdout, stderr) => {
if (error !== null)
return console.log(`exec error: ${error}`);
fs.writeFile(local_file_path, file_content, function(err) {
if(err) {
return console.log(err);
}
_syncToStorage(gcsSyncpath, 0);
});
});
}
function _syncToStorage(gcsSyncpath, one_more_time_flag) {
if(!is_syncing || one_more_time_flag) {
is_syncing = 1;
one_more_time = 0;
console.log("syncing.....");
command = 'gsutil -m rsync -r ' + LOCAL_TEXT_ROOT_PATH + ' ' + gcsSyncpath;
const child = exec(command, (error, stdout, stderr) => {
if (error !== null)
return console.log(`exec error: ${error}`);
console.log('successfully synced file to ' + gcsSyncpath);
if(one_more_time) {
_syncToStorage(gcsSyncpath, 1);
} else {
is_syncing = 0;
}
});
} else {
one_more_time = 1;
console.log("skip sync!!");
}
}
/*
* THE EXPORT FUCTION
* send the speech request to google speech engine and
* wait for results
*
*/
function speechToText(gcsBucket, audio_file_path, storage_text_path) {
var gcsAudioUri = gcsBucket + audio_file_path;
var speechContext = getSpeechContext(audio_file_path);
var request = makeSpeechRequest(gcsAudioUri, CONFIG.WAV, speechContext);
client.longRunningRecognize(request)
.then(data => {
const operation = data[0];
// Get a Promise representation of the final result of the job
return operation.promise();
})
.then(data => {
var file_content = makeFileContent(data[0]);
var file_path = makeLocalTextFilePath(audio_file_path);
var gcsSyncpath = gcsBucket + LOCAL_TEXT_ROOT_PATH;
saveAndSync(gcsSyncpath, file_content, file_path);
})
.catch(err => {
console.error('ERROR:', err);
});
}
module.exports = {
speechToText: speechToText
}

他にご質問がございましたら、リクエストを送信してください

コメント

Powered by Zendesk