Home | History | Annotate | Download | only in interface
      1 // Copyright 2014 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 /**
      6  * @fileoverview Base class for Text-to-Speech engines that actually transform
      7  * text to speech.
      8  *
      9  */
     10 
     11 goog.provide('cvox.AbstractTts');
     12 
     13 goog.require('cvox.TtsInterface');
     14 goog.require('goog.i18n.MessageFormat');
     15 
     16 /**
     17  * Creates a new instance.
     18  * @constructor
     19  * @implements {cvox.TtsInterface}
     20  */
     21 cvox.AbstractTts = function() {
     22   this.ttsProperties = new Object();
     23 
     24   /**
     25    * Default value for TTS properties.
     26    * Note that these as well as the subsequent properties might be different
     27    * on different host platforms (like Chrome, Android, etc.).
     28    * @type {{pitch : number,
     29    *         rate: number,
     30    *         volume: number}}
     31    * @protected
     32    */
     33   this.propertyDefault = {
     34     'rate': 0.5,
     35     'pitch': 0.5,
     36     'volume': 0.5
     37   };
     38 
     39   /**
     40    * Min value for TTS properties.
     41    * @type {{pitch : number,
     42    *         rate: number,
     43    *         volume: number}}
     44    * @protected
     45    */
     46   this.propertyMin = {
     47     'rate': 0.0,
     48     'pitch': 0.0,
     49     'volume': 0.0
     50   };
     51 
     52   /**
     53    * Max value for TTS properties.
     54    * @type {{pitch : number,
     55    *         rate: number,
     56    *         volume: number}}
     57    * @protected
     58    */
     59   this.propertyMax = {
     60     'rate': 1.0,
     61     'pitch': 1.0,
     62     'volume': 1.0
     63   };
     64 
     65   /**
     66    * Step value for TTS properties.
     67    * @type {{pitch : number,
     68    *         rate: number,
     69    *         volume: number}}
     70    * @protected
     71    */
     72   this.propertyStep = {
     73     'rate': 0.1,
     74     'pitch': 0.1,
     75     'volume': 0.1
     76   };
     77 
     78 
     79   /** @private */
     80 
     81   if (cvox.AbstractTts.pronunciationDictionaryRegexp_ == undefined) {
     82     // Create an expression that matches all words in the pronunciation
     83     // dictionary on word boundaries, ignoring case.
     84     var words = [];
     85     for (var word in cvox.AbstractTts.PRONUNCIATION_DICTIONARY) {
     86       words.push(word);
     87     }
     88     var expr = '\\b(' + words.join('|') + ')\\b';
     89     cvox.AbstractTts.pronunciationDictionaryRegexp_ = new RegExp(expr, 'ig');
     90   }
     91 
     92   if (cvox.AbstractTts.substitutionDictionaryRegexp_ == undefined) {
     93     // Create an expression that matches all words in the substitution
     94     // dictionary.
     95     var symbols = [];
     96     for (var symbol in cvox.AbstractTts.SUBSTITUTION_DICTIONARY) {
     97       symbols.push(symbol);
     98     }
     99     var expr = '(' + symbols.join('|') + ')';
    100     cvox.AbstractTts.substitutionDictionaryRegexp_ = new RegExp(expr, 'ig');
    101   }
    102 };
    103 
    104 
    105 /**
    106  * Default TTS properties for this TTS engine.
    107  * @type {Object}
    108  * @protected
    109  */
    110 cvox.AbstractTts.prototype.ttsProperties;
    111 
    112 
    113 /** @override */
    114 cvox.AbstractTts.prototype.speak = function(textString, queueMode, properties) {
    115   return this;
    116 };
    117 
    118 
    119 /** @override */
    120 cvox.AbstractTts.prototype.isSpeaking = function() {
    121   return false;
    122 };
    123 
    124 
    125 /** @override */
    126 cvox.AbstractTts.prototype.stop = function() {
    127 };
    128 
    129 
    130 /** @override */
    131 cvox.AbstractTts.prototype.addCapturingEventListener = function(listener) { };
    132 
    133 
    134 /** @override */
    135 cvox.AbstractTts.prototype.increaseOrDecreaseProperty =
    136     function(propertyName, increase) {
    137       var min = this.propertyMin[propertyName];
    138       var max = this.propertyMax[propertyName];
    139       var step = this.propertyStep[propertyName];
    140       var current = this.ttsProperties[propertyName];
    141       current = increase ? current + step : current - step;
    142       this.ttsProperties[propertyName] = Math.max(Math.min(current, max), min);
    143     };
    144 
    145 
    146 /**
    147  * Merges the given properties with the default ones. Always returns a
    148  * new object, so that you can safely modify the result of mergeProperties
    149  * without worrying that you're modifying an object used elsewhere.
    150  * @param {Object=} properties The properties to merge with the current ones.
    151  * @return {Object} The merged properties.
    152  * @protected
    153  */
    154 cvox.AbstractTts.prototype.mergeProperties = function(properties) {
    155   var mergedProperties = new Object();
    156   var p;
    157   if (this.ttsProperties) {
    158     for (p in this.ttsProperties) {
    159       mergedProperties[p] = this.ttsProperties[p];
    160     }
    161   }
    162   if (properties) {
    163     var tts = cvox.AbstractTts;
    164     if (typeof(properties[tts.VOLUME]) == 'number') {
    165       mergedProperties[tts.VOLUME] = properties[tts.VOLUME];
    166     }
    167     if (typeof(properties[tts.PITCH]) == 'number') {
    168       mergedProperties[tts.PITCH] = properties[tts.PITCH];
    169     }
    170     if (typeof(properties[tts.RATE]) == 'number') {
    171       mergedProperties[tts.RATE] = properties[tts.RATE];
    172     }
    173     if (typeof(properties[tts.LANG]) == 'string') {
    174       mergedProperties[tts.LANG] = properties[tts.LANG];
    175     }
    176 
    177     var context = this;
    178     var mergeRelativeProperty = function(abs, rel) {
    179       if (typeof(properties[rel]) == 'number' &&
    180           typeof(mergedProperties[abs]) == 'number') {
    181         mergedProperties[abs] += properties[rel];
    182         var min = context.propertyMin[abs];
    183         var max = context.propertyMax[abs];
    184         if (mergedProperties[abs] > max) {
    185           mergedProperties[abs] = max;
    186         } else if (mergedProperties[abs] < min) {
    187           mergedProperties[abs] = min;
    188         }
    189       }
    190     };
    191 
    192     mergeRelativeProperty(tts.VOLUME, tts.RELATIVE_VOLUME);
    193     mergeRelativeProperty(tts.PITCH, tts.RELATIVE_PITCH);
    194     mergeRelativeProperty(tts.RATE, tts.RELATIVE_RATE);
    195   }
    196 
    197   for (p in properties) {
    198     if (!mergedProperties.hasOwnProperty(p)) {
    199       mergedProperties[p] = properties[p];
    200     }
    201   }
    202 
    203   return mergedProperties;
    204 };
    205 
    206 
    207 /**
    208  * Method to preprocess text to be spoken properly by a speech
    209  * engine.
    210  *
    211  * 1. Replace any single character with a description of that character.
    212  *
    213  * 2. Convert all-caps words to lowercase if they don't look like an
    214  *    acronym / abbreviation.
    215  *
    216  * @param {string} text A text string to be spoken.
    217  * @param {Object= } properties Out parameter populated with how to speak the
    218  *     string.
    219  * @return {string} The text formatted in a way that will sound better by
    220  *     most speech engines.
    221  * @protected
    222  */
    223 cvox.AbstractTts.prototype.preprocess = function(text, properties) {
    224   if (text.length == 1 && text >= 'A' && text <= 'Z') {
    225     for (var prop in cvox.AbstractTts.PERSONALITY_CAPITAL)
    226     properties[prop] = cvox.AbstractTts.PERSONALITY_CAPITAL[prop];
    227   }
    228 
    229   // Substitute all symbols in the substitution dictionary. This is pretty
    230   // efficient because we use a single regexp that matches all symbols
    231   // simultaneously.
    232   text = text.replace(
    233       cvox.AbstractTts.substitutionDictionaryRegexp_,
    234       function(symbol) {
    235         return ' ' + cvox.AbstractTts.SUBSTITUTION_DICTIONARY[symbol] + ' ';
    236       });
    237 
    238   // Handle single characters that we want to make sure we pronounce.
    239   if (text.length == 1) {
    240     return cvox.AbstractTts.CHARACTER_DICTIONARY[text] ?
    241         (new goog.i18n.MessageFormat(cvox.ChromeVox.msgs.getMsg(
    242                 cvox.AbstractTts.CHARACTER_DICTIONARY[text])))
    243             .format({'COUNT': 1}) :
    244         text.toUpperCase();
    245   }
    246 
    247   // Substitute all words in the pronunciation dictionary. This is pretty
    248   // efficient because we use a single regexp that matches all words
    249   // simultaneously, and it calls a function with each match, which we can
    250   // use to look up the replacement in our dictionary.
    251   text = text.replace(
    252       cvox.AbstractTts.pronunciationDictionaryRegexp_,
    253       function(word) {
    254         return cvox.AbstractTts.PRONUNCIATION_DICTIONARY[word.toLowerCase()];
    255       });
    256 
    257   // Special case for google+, where the punctuation must be pronounced.
    258   text = text.replace(/google\+/ig, 'google plus');
    259 
    260   // Expand all repeated characters.
    261   text = text.replace(
    262       cvox.AbstractTts.repetitionRegexp_, cvox.AbstractTts.repetitionReplace_);
    263 
    264   // If there's no lower case letters, and at least two spaces, skip spacing
    265   // text.
    266   var skipSpacing = false;
    267   if (!text.match(/[a-z]+/) && text.indexOf(' ') != text.lastIndexOf(' ')) {
    268     skipSpacing = true;
    269   }
    270 
    271   // Convert all-caps words to lowercase if they don't look like acronyms,
    272   // otherwise add a space before all-caps words so that all-caps words in
    273   // the middle of camelCase will be separated.
    274   text = text.replace(/[A-Z]+/g, function(word) {
    275     // If a word contains vowels and is more than 3 letters long, it is
    276     // probably a real word and not just an abbreviation. Convert it to lower
    277     // case and speak it normally.
    278     if ((word.length > 3) && word.match(/([AEIOUY])/g)) {
    279       return word.toLowerCase();
    280     } else if (!skipSpacing) {
    281       // Builds spaced-out camelCased/all CAPS words so they sound better when
    282       // spoken by TTS engines.
    283       return ' ' + word.split('').join(' ');
    284     } else {
    285       return word;
    286     }
    287   });
    288 
    289   return text;
    290 };
    291 
    292 
    293 /** TTS rate property. @type {string} */
    294 cvox.AbstractTts.RATE = 'rate';
    295 /** TTS pitch property. @type {string} */
    296 cvox.AbstractTts.PITCH = 'pitch';
    297 /** TTS volume property. @type {string} */
    298 cvox.AbstractTts.VOLUME = 'volume';
    299 /** TTS language property. @type {string} */
    300 cvox.AbstractTts.LANG = 'lang';
    301 
    302 /** TTS relative rate property. @type {string} */
    303 cvox.AbstractTts.RELATIVE_RATE = 'relativeRate';
    304 /** TTS relative pitch property. @type {string} */
    305 cvox.AbstractTts.RELATIVE_PITCH = 'relativePitch';
    306 /** TTS relative volume property. @type {string} */
    307 cvox.AbstractTts.RELATIVE_VOLUME = 'relativeVolume';
    308 
    309 /** TTS color property (for the lens display). @type {string} */
    310 cvox.AbstractTts.COLOR = 'color';
    311 /** TTS CSS font-weight property (for the lens display). @type {string} */
    312 cvox.AbstractTts.FONT_WEIGHT = 'fontWeight';
    313 
    314 /** TTS punctuation-echo property. @type {string} */
    315 cvox.AbstractTts.PUNCTUATION_ECHO = 'punctuationEcho';
    316 
    317 /** TTS pause property. @type {string} */
    318 cvox.AbstractTts.PAUSE = 'pause';
    319 
    320 /**
    321  * TTS personality for annotations - text spoken by ChromeVox that
    322  * elaborates on a user interface element but isn't displayed on-screen.
    323  * @type {Object}
    324  */
    325 cvox.AbstractTts.PERSONALITY_ANNOTATION = {
    326   'relativePitch': -0.25,
    327   // TODO:(rshearer) Added this color change for I/O presentation.
    328   'color': 'yellow',
    329   'punctuationEcho': 'none'
    330 };
    331 
    332 
    333 /**
    334  * TTS personality for announcements - text spoken by ChromeVox that
    335  * isn't tied to any user interface elements.
    336  * @type {Object}
    337  */
    338 cvox.AbstractTts.PERSONALITY_ANNOUNCEMENT = {
    339   'punctuationEcho': 'none'
    340 };
    341 
    342 /**
    343  * TTS personality for alerts from the system, such as battery level
    344  * warnings.
    345  * @type {Object}
    346  */
    347 cvox.AbstractTts.PERSONALITY_SYSTEM_ALERT = {
    348   'punctuationEcho': 'none',
    349   'doNotInterrupt': true
    350 };
    351 
    352 /**
    353  * TTS personality for an aside - text in parentheses.
    354  * @type {Object}
    355  */
    356 cvox.AbstractTts.PERSONALITY_ASIDE = {
    357   'relativePitch': -0.1,
    358   'color': '#669'
    359 };
    360 
    361 
    362 /**
    363  * TTS personality for capital letters.
    364  * @type {Object}
    365  */
    366 cvox.AbstractTts.PERSONALITY_CAPITAL = {
    367   'relativePitch': 0.6
    368 };
    369 
    370 
    371 /**
    372  * TTS personality for deleted text.
    373  * @type {Object}
    374  */
    375 cvox.AbstractTts.PERSONALITY_DELETED = {
    376   'punctuationEcho': 'none',
    377   'relativePitch': -0.6
    378 };
    379 
    380 
    381 /**
    382  * TTS personality for quoted text.
    383  * @type {Object}
    384  */
    385 cvox.AbstractTts.PERSONALITY_QUOTE = {
    386   'relativePitch': 0.1,
    387   'color': '#b6b',
    388   'fontWeight': 'bold'
    389 };
    390 
    391 
    392 /**
    393  * TTS personality for strong or bold text.
    394  * @type {Object}
    395  */
    396 cvox.AbstractTts.PERSONALITY_STRONG = {
    397   'relativePitch': 0.1,
    398   'color': '#b66',
    399   'fontWeight': 'bold'
    400 };
    401 
    402 
    403 /**
    404  * TTS personality for emphasis or italicized text.
    405  * @type {Object}
    406  */
    407 cvox.AbstractTts.PERSONALITY_EMPHASIS = {
    408   'relativeVolume': 0.1,
    409   'relativeRate': -0.1,
    410   'color': '#6bb',
    411   'fontWeight': 'bold'
    412 };
    413 
    414 
    415 /**
    416  * Flag indicating if the TTS is being debugged.
    417  * @type {boolean}
    418  */
    419 cvox.AbstractTts.DEBUG = true;
    420 
    421 
    422 /**
    423  * Speech queue mode that interrupts the current utterance.
    424  * @type {number}
    425  */
    426 cvox.AbstractTts.QUEUE_MODE_FLUSH = 0;
    427 
    428 
    429 /**
    430  * Speech queue mode that does not interrupt the current utterance.
    431  * @type {number}
    432  */
    433 cvox.AbstractTts.QUEUE_MODE_QUEUE = 1;
    434 
    435 
    436 /**
    437  * Speech queue mode that flushes all utterances of the same category
    438  * (as set by properties['category']).
    439  * @type {number}
    440  */
    441 cvox.AbstractTts.QUEUE_MODE_CATEGORY_FLUSH = 2;
    442 
    443 
    444 /**
    445  * Character dictionary. These symbols are replaced with their human readable
    446  * equivalents. This replacement only occurs for single character utterances.
    447  * @type {Object.<string, string>}
    448  */
    449 cvox.AbstractTts.CHARACTER_DICTIONARY = {
    450   ' ': 'space',
    451   '`': 'backtick',
    452   '~': 'tilde',
    453   '!': 'exclamation',
    454   '@': 'at',
    455   '#': 'pound',
    456   '$': 'dollar',
    457   '%': 'percent',
    458   '^': 'caret',
    459   '&': 'ampersand',
    460   '*': 'asterisk',
    461   '(': 'open_paren',
    462   ')': 'close_paren',
    463   '-': 'dash',
    464   '_': 'underscore',
    465   '=': 'equals',
    466   '+': 'plus',
    467   '[': 'left_bracket',
    468   ']': 'right_bracket',
    469   '{': 'left_brace',
    470   '}': 'right_brace',
    471   '|': 'pipe',
    472   ';': 'semicolon',
    473   ':': 'colon',
    474   ',': 'comma',
    475   '.': 'dot',
    476   '<': 'less_than',
    477   '>': 'greater_than',
    478   '/': 'slash',
    479   '?': 'question_mark',
    480   '"': 'quote',
    481   '\'': 'apostrophe',
    482   '\t': 'tab',
    483   '\r': 'return',
    484   '\n': 'new_line',
    485   '\\': 'backslash'
    486 };
    487 
    488 
    489 /**
    490  * Pronunciation dictionary. Each key must be lowercase, its replacement
    491  * should be spelled out the way most TTS engines will pronounce it
    492  * correctly. This particular dictionary only handles letters and numbers,
    493  * no symbols.
    494  * @type {Object.<string, string>}
    495  */
    496 cvox.AbstractTts.PRONUNCIATION_DICTIONARY = {
    497   'admob': 'ad-mob',
    498   'adsense': 'ad-sense',
    499   'adwords': 'ad-words',
    500   'angularjs': 'angular j s',
    501   'bcc': 'B C C',
    502   'cc': 'C C',
    503   'chromevox': 'chrome vox',
    504   'cr48': 'C R 48',
    505   'ctrl': 'control',
    506   'doubleclick': 'double-click',
    507   'gmail': 'gee mail',
    508   'gtalk': 'gee talk',
    509   'http': 'H T T P',
    510   'https' : 'H T T P S',
    511   'igoogle': 'eye google',
    512   'pagerank': 'page-rank',
    513   'username': 'user-name',
    514   'www': 'W W W',
    515   'youtube': 'you tube'
    516 };
    517 
    518 
    519 /**
    520  * Pronunciation dictionary regexp.
    521  * @type {RegExp};
    522  * @private
    523  */
    524 cvox.AbstractTts.pronunciationDictionaryRegexp_;
    525 
    526 
    527 /**
    528  * Substitution dictionary. These symbols or patterns are ALWAYS substituted
    529  * whenever they occur, so this should be reserved only for unicode characters
    530  * and characters that never have any different meaning in context.
    531  *
    532  * For example, do not include '$' here because $2 should be read as
    533  * "two dollars".
    534  * @type {Object.<string, string>}
    535  */
    536 cvox.AbstractTts.SUBSTITUTION_DICTIONARY = {
    537   '://': 'colon slash slash',
    538   '\u00bc': 'one fourth',
    539   '\u00bd': 'one half',
    540   '\u2190': 'left arrow',
    541   '\u2191': 'up arrow',
    542   '\u2192': 'right arrow',
    543   '\u2193': 'down arrow',
    544   '\u21d0': 'left double arrow',
    545   '\u21d1': 'up double arrow',
    546   '\u21d2': 'right double  arrow',
    547   '\u21d3': 'down double arrow',
    548   '\u21e6': 'left arrow',
    549   '\u21e7': 'up arrow',
    550   '\u21e8': 'right arrow',
    551   '\u21e9': 'down arrow',
    552   '\u2303': 'control',
    553   '\u2318': 'command',
    554   '\u2325': 'option',
    555   '\u25b2': 'up triangle',
    556   '\u25b3': 'up triangle',
    557   '\u25b4': 'up triangle',
    558   '\u25b5': 'up triangle',
    559   '\u25b6': 'right triangle',
    560   '\u25b7': 'right triangle',
    561   '\u25b8': 'right triangle',
    562   '\u25b9': 'right triangle',
    563   '\u25ba': 'right pointer',
    564   '\u25bb': 'right pointer',
    565   '\u25bc': 'down triangle',
    566   '\u25bd': 'down triangle',
    567   '\u25be': 'down triangle',
    568   '\u25bf': 'down triangle',
    569   '\u25c0': 'left triangle',
    570   '\u25c1': 'left triangle',
    571   '\u25c2': 'left triangle',
    572   '\u25c3': 'left triangle',
    573   '\u25c4': 'left pointer',
    574   '\u25c5': 'left pointer',
    575   '\uf8ff': 'apple'
    576 };
    577 
    578 
    579 /**
    580  * Substitution dictionary regexp.
    581  * @type {RegExp};
    582  * @private
    583  */
    584 cvox.AbstractTts.substitutionDictionaryRegexp_;
    585 
    586 
    587 /**
    588  * repetition filter regexp.
    589  * @type {RegExp}
    590  * @private
    591  */
    592 cvox.AbstractTts.repetitionRegexp_ =
    593     /([-\/\\|!@#$%^&*\(\)=_+\[\]\{\}.?;'":<>])\1{2,}/g;
    594 
    595 
    596 /**
    597  * Constructs a description of a repeated character. Use as a param to
    598  * string.replace.
    599  * @param {string} match The matching string.
    600  * @return {string} The description.
    601  * @private
    602  */
    603 cvox.AbstractTts.repetitionReplace_ = function(match) {
    604   var count = match.length;
    605   return ' ' + (new goog.i18n.MessageFormat(cvox.ChromeVox.msgs.getMsg(
    606       cvox.AbstractTts.CHARACTER_DICTIONARY[match[0]])))
    607           .format({'COUNT': count}) + ' ';
    608 };
    609 
    610 
    611 /**
    612  * @override
    613  */
    614 cvox.AbstractTts.prototype.getDefaultProperty = function(property) {
    615   return this.propertyDefault[property];
    616 };
    617