resources/network_speech_synthesis/tts_extension.js

// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

/**
 * @fileoverview
 * This is a component extension that implements a text-to-speech (TTS)
 * engine powered by Google's speech synthesis API.
 *
 * This is an "event page", so it's not loaded when the API isn't being used,
 * and doesn't waste resources. When a web page or web app makes a speech
 * request and the parameters match one of the voices in this extension's
 * manifest, it makes a request to Google's API using Chrome's private key
 * and plays the resulting speech using HTML5 audio.
 */

/**
 * The main class for this extension. Adds listeners to
 * chrome.ttsEngine.onSpeak and chrome.ttsEngine.onStop and implements
 * them using Google's speech synthesis API.
 * @constructor
 */
function TtsExtension() {}

TtsExtension.prototype = {
  /**
   * The url prefix of the speech server, including static query
   * parameters that don't change.
   * @type {string}
   * @const
   * @private
   */
  SPEECH_SERVER_URL_:
      'https://www.google.com/speech-api/v2/synthesize?' +
      'enc=mpeg&client=chromium',

  /**
   * A mapping from language and gender to voice name, hardcoded for now
   * until the speech synthesis server capabilities response provides this.
   * The key of this map is of the form '<lang>-<gender>'.
   * @type {Object.<string, string>}
   * @private
   */
  LANG_AND_GENDER_TO_VOICE_NAME_: {
    'en-gb-male': 'rjs',
    'en-gb-female': 'fis',
  },

  /**
   * The arguments passed to the onSpeak event handler for the utterance
   * that's currently being spoken. Should be null when no object is
   * pending.
   *
   * @type {?{utterance: string, options: Object, callback: Function}}
   * @private
   */
  currentUtterance_: null,

  /**
   * The HTML5 audio element we use for playing the sound served by the
   * speech server.
   * @type {HTMLAudioElement}
   * @private
   */
  audioElement_: null,

  /**
   * A mapping from voice name to language and gender, derived from the
   * manifest file.  This is used in case the speech synthesis request
   * specifies a voice name but doesn't specify a language code or gender.
   * @type {Object.<string, {lang: string, gender: string}>}
   * @private
   */
  voiceNameToLangAndGender_: {},

  /**
   * This is the main function called to initialize this extension.
   * Initializes data structures and adds event listeners.
   */
  init: function() {
    // Get voices from manifest.
    var voices = chrome.app.getDetails().tts_engine.voices;
    for (var i = 0; i < voices.length; i++) {
      this.voiceNameToLangAndGender_[voices[i].voice_name] = {
        lang: voices[i].lang,
        gender: voices[i].gender
      };
    }

    // Initialize the audio element and event listeners on it.
    this.audioElement_ = document.createElement('audio');
    document.body.appendChild(this.audioElement_);
    this.audioElement_.addEventListener(
        'ended', this.onStop_.bind(this), false);
    this.audioElement_.addEventListener(
        'canplaythrough', this.onStart_.bind(this), false);

    // Install event listeners for the ttsEngine API.
    chrome.ttsEngine.onSpeak.addListener(this.onSpeak_.bind(this));
    chrome.ttsEngine.onStop.addListener(this.onStop_.bind(this));
    chrome.ttsEngine.onPause.addListener(this.onPause_.bind(this));
    chrome.ttsEngine.onResume.addListener(this.onResume_.bind(this));
  },

  /**
   * Handler for the chrome.ttsEngine.onSpeak interface.
   * Gets Chrome's Google API key and then uses it to generate a request
   * url for the requested speech utterance. Sets that url as the source
   * of the HTML5 audio element.
   * @param {string} utterance The text to be spoken.
   * @param {Object} options Options to control the speech, as defined
   *     in the Chrome ttsEngine extension API.
   * @private
   */
  onSpeak_: function(utterance, options, callback) {
    // Truncate the utterance if it's too long. Both Chrome's tts
    // extension api and the web speech api specify 32k as the
    // maximum limit for an utterance.
    if (utterance.length > 32768)
      utterance = utterance.substr(0, 32768);

    try {
      // First, stop any pending audio.
      this.onStop_();

      this.currentUtterance_ = {
        utterance: utterance,
        options: options,
        callback: callback
      };

      var lang = options.lang;
      var gender = options.gender;
      if (options.voiceName) {
        lang = this.voiceNameToLangAndGender_[options.voiceName].lang;
        gender = this.voiceNameToLangAndGender_[options.voiceName].gender;
      }

      if (!lang)
        lang = navigator.language;

      // Look up the specific voice name for this language and gender.
      // If it's not in the map, it doesn't matter - the language will
      // be used directly. This is only used for languages where more
      // than one gender is actually available.
      var key = lang.toLowerCase() + '-' + gender;
      var voiceName = this.LANG_AND_GENDER_TO_VOICE_NAME_[key];

      var url = this.SPEECH_SERVER_URL_;
      chrome.systemPrivate.getApiKey((function(key) {
        url += '&key=' + key;
        url += '&text=' + encodeURIComponent(utterance);
        url += '&lang=' + lang.toLowerCase();

        if (voiceName)
          url += '&name=' + voiceName;

        if (options.rate) {
          // Input rate is between 0.1 and 10.0 with a default of 1.0.
          // Output speed is between 0.0 and 1.0 with a default of 0.5.
          url += '&speed=' + (options.rate / 2.0);
        }

        if (options.pitch) {
          // Input pitch is between 0.0 and 2.0 with a default of 1.0.
          // Output pitch is between 0.0 and 1.0 with a default of 0.5.
          url += '&pitch=' + (options.pitch / 2.0);
        }

        // This begins loading the audio but does not play it.
        // When enough of the audio has loaded to begin playback,
        // the 'canplaythrough' handler will call this.onStart_,
        // which sends a start event to the ttsEngine callback and
        // then begins playing audio.
        this.audioElement_.src = url;
      }).bind(this));
    } catch (err) {
      console.error(String(err));
      callback({
        'type': 'error',
        'errorMessage': String(err)
      });
      this.currentUtterance_ = null;
    }
  },

  /**
   * Handler for the chrome.ttsEngine.onStop interface.
   * Called either when the ttsEngine API requests us to stop, or when
   * we reach the end of the audio stream. Pause the audio element to
   * silence it, and send a callback to the ttsEngine API to let it know
   * that we've completed. Note that the ttsEngine API manages callback
   * messages and will automatically replace the 'end' event with a
   * more specific callback like 'interrupted' when sending it to the
   * TTS client.
   * @private
   */
  onStop_: function() {
    if (this.currentUtterance_) {
      this.audioElement_.pause();
      this.currentUtterance_.callback({
        'type': 'end',
        'charIndex': this.currentUtterance_.utterance.length
      });
    }
    this.currentUtterance_ = null;
  },

  /**
   * Handler for the canplaythrough event on the audio element.
   * Called when the audio element has buffered enough audio to begin
   * playback. Send the 'start' event to the ttsEngine callback and
   * then begin playing the audio element.
   * @private
   */
  onStart_: function() {
    if (this.currentUtterance_) {
      if (this.currentUtterance_.options.volume !== undefined) {
        // Both APIs use the same range for volume, between 0.0 and 1.0.
        this.audioElement_.volume = this.currentUtterance_.options.volume;
      }
      this.audioElement_.play();
      this.currentUtterance_.callback({
          'type': 'start',
          'charIndex': 0
      });
    }
  },

  /**
   * Handler for the chrome.ttsEngine.onPause interface.
   * Pauses audio if we're in the middle of an utterance.
   * @private
   */
  onPause_: function() {
    if (this.currentUtterance_) {
      this.audioElement_.pause();
    }
  },

  /**
   * Handler for the chrome.ttsEngine.onPause interface.
   * Resumes audio if we're in the middle of an utterance.
   * @private
   */
  onResume_: function() {
    if (this.currentUtterance_) {
      this.audioElement_.play();
    }
  }

};

(new TtsExtension()).init();