TTS Engine Custom - Google Cloud Text To Speech - WaveNet and Standard

jersonjunior · May 29, 2018, 7:09pm

WaveNet voices
The Cloud Text-to-Speech API also offers a group of premium voices generated using a WaveNet model, the same technology used to produce speech for Google Assistant, Google Search, and Google Translate. WaveNet technology provides more than just a series of synthetic voices: it represents a new way of creating synthetic speech.

Important remark

The following procedures were performed in a test environment, the propolys-tts.agi file will be modified and then FreePBX will alert you as per the text below;

Module: “Text To Speech”, File: “/var/www/html/admin/modules/tts/agi-bin/propolys-tts.agi altered”

Create Custom Engine:

cd /opt/
git clone GitHub - googleapis/nodejs-text-to-speech: This repository is deprecated. All of its content and history has been moved to googleapis/google-cloud-node.
npm install --save @google-cloud/text-to-speech
npm install
npm install optimist
npm install child_process

cd /opt/nodejs-text-to-speech/samples

vim quickstart.js

/**
 * Copyright 2018, Google, Inc.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

'use strict';

// [START vision_quickstart]
var argv = require('optimist').argv;
var child_process = require('child_process');
const fs = require('fs');

// Imports the Google Cloud client library
const textToSpeech = require('@google-cloud/text-to-speech');

// Creates a client
const client = new textToSpeech.TextToSpeechClient();

// The text to synthesize
const text = argv.text;

// Construct the request
const request = {
  input: {text: text},
  // Select the language and SSML Voice Gender (optional)
  //voice: {languageCode: 'pt-BR', ssmlGender: 'en-US-Wavenet-F'},
  voice: {languageCode: 'en-US', name: 'en-US-Wavenet-F', ssmlGender: 'FEMALE'},
  // Select the type of audio encoding
  audioConfig: {audioEncoding: 'MP3'},
};

// Performs the Text-to-Speech request
client.synthesizeSpeech(request, (err, response) => {
  if (err) {
    console.error('ERROR:', err);
    return;
  }

  // Write the binary audio content to a local file
  fs.writeFile(argv.mp3, response.audioContent, 'binary', err => {
    if (err) {
      console.error('ERROR:', err);
      return;
    }
    console.log('Audio content written to file: output.mp3');
    var output = child_process.execSync('lame --decode ' + argv.mp3 + ' ' + '-b 8000' + ' ' + argv.wav + '.wav');
  });
});
// [END vision_quickstart]

/var/www/html/admin/modules/tts/agi-bin/propolys-tts.agi

#!/usr/bin/php -q
<?php
//	License for all code of this FreePBX module can be found in the license file inside the module directory
//	Copyright 2013 Schmooze Com Inc.
//  Xavier Ourciere xourciere[at]propolys[dot]com
//

$config = parse_amportal_conf( "/etc/amportal.conf" );

require_once "phpagi.php";
require_once "phpagi-asmanager.php";

$AGI = new AGI();
debug("TTS AGI Started", 1);
//Path of your google credentials
putenv("GOOGLE_APPLICATION_CREDENTIALS=/opt/nodejs-text-to-speech/samples/test.json");
$text = $argv[1];
$hash = md5($text);
$engine = $argv[2];
$enginebin = $argv[3];

$f = $AGI->get_full_variable('${CHANNEL(audionativeformat)}');
$nformat = $f['data'];
$format = array(
	"ext" => "wav",
	"rate" => "8000"
);

//amazing work my friend: https://github.com/stevenmirabito/asterisk-picotts/blob/master/picotts.agi#L251
switch(true) {
	case preg_match('/(silk|sln)12/',$nformat):
		$format = array(
			"ext" => "sln12",
			"rate" => "12000"
		);
	break;
	case preg_match('/(speex|slin|silk)16|g722|siren7/',$nformat):
		$format = array(
			"ext" => "sln16",
			"rate" => "16000"
		);
	break;
	case preg_match('/(speex|slin|celt)32|siren14/',$nformat):
		$format = array(
			"ext" => "sln32",
			"rate" => "32000"
		);
	break;
	case preg_match('/(celt|slin)44/',$nformat):
		$format = array(
			"ext" => "sln44",
			"rate" => "44000"
		);
	break;
	case preg_match('/(celt|slin)48/',$nformat):
		$format = array(
			"ext" => "sln48",
			"rate" => "48000"
		);
	break;
	default;
		$format = array(
			"ext" => "wav",
			"rate" => "8000"
		);
	break;
}

if (!isset($text))
{
	return 0;
}

if ( $retval != 0 ) {
	debug("ERROR: TTS engine binary not found.", 1);
	return $retval;
}

$soundsdir = $config["ASTVARLIBDIR"]."/sounds/tts";
if( !is_dir($soundsdir) ) mkdir($soundsdir, 0775);

$wavefile = $soundsdir."/$engine-tts-$hash.".$format['ext'];
$tmpwavefile = $soundsdir."/$engine-tts-temp-$hash.wav";
debug("Generated WAV file: $wavefile", 3);
$textfile = $soundsdir."/$engine-tts-$hash.txt";
debug("TXT file: $textfile", 3);

if ( !file_exists($wavefile) ) {
	debug("Text to speech wave file doesnt exist, lets create it.", 1);
	if ( false === ($fh = fopen($textfile, "w")) ) {
		debug("ERROR: Cannot open the file: $textfile", 1);
		return 1;
	}
	if ( false === fwrite($fh, $text) ) {
		debug("ERROR: Cannot write to file: $textfile", 1);
		return 1;
	}
	fclose($fh);
	debug("Executing $engine", 1);
	switch ($engine) {
		case 'text2wave':
			exec($enginebin." -f ".$format['rate']." -o $tmpwavefile $textfile");
			break;
		case 'flite':
			exec($enginebin." -f $textfile -o $tmpwavefile");
			break;
		case 'swift':
			exec($enginebin." -p audio/channels=1,audio/sampling-rate=".$format['rate']." -o $tmpwavefile -f $textfile");
			break;
		case 'pico':
			exec($enginebin." -o $tmpwavefile ".escapeshellarg(file_get_contents($textfile)));
			break;
		case 'node':
                        exec($enginebin." /opt/nodejs-text-to-speech/samples/quickstart.js --mp3=/var/lib/asterisk/sounds/tts/$engine-tts-$hash.mp3 --text='$text' --wav=/var/lib/asterisk/sounds/tts/$engine-tts-$hash");
			break;
		default:
			debug("$engine is not a valid engine!", 1);
		break;
	}
}
if(file_exists($tmpwavefile)) {
	exec("sox $tmpwavefile -q -r ".$format['rate']." -t raw $wavefile");
	unlink($tmpwavefile);
}

if(file_exists($wavefile)) {
	// Adding a wait because the first time the wave file is generated, it was not played
	$AGI->wait_for_digit(1000);
	debug("Streaming the generated wave.", 1);
	$AGI->stream_file("tts/".basename($wavefile,".".$format['ext']),'#');
} else {
	debug("File was not created!", 1);
}
debug("TTS AGI end", 1);

function parse_amportal_conf($filename) {
	$file = file($filename);
	$matches = array();
	$matchpattern = '/^\s*([a-zA-Z0-9]+)\s*=\s*(.*)\s*([;#].*)?/';
	foreach ($file as $line) {
		if (preg_match($matchpattern, $line, $matches)) {
			$conf[ $matches[1] ] = $matches[2];
		}
	}
	return $conf;
}

function debug($string, $level=3) {
	global $AGI;
	$AGI->verbose($string, $level);
}

More information:

system · May 29, 2019, 7:09pm

This topic was automatically closed 365 days after the last reply. New replies are no longer allowed.