1 /* com_svox_picottsengine.cpp 2 3 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 * This is the Manager layer. It sits on top of the native Pico engine 18 * and provides the interface to the defined Google TTS engine API. 19 * The Google engine API is the boundary to allow a TTS engine to be swapped. 20 * The Manager layer also provide the SSML tag interpretation. 21 * The supported SSML tags are mapped to corresponding tags natively supported by Pico. 22 * Native Pico functions always begin with picoXXX. 23 * 24 * In the Pico engine, the language cannot be changed indpendently of the voice. 25 * If either the voice or locale/language are changed, a new resource is loaded. 26 * 27 * Only a subset of SSML 1.0 tags are supported. 28 * Some SSML tags involve significant complexity. 29 * If the language is changed through an SSML tag, there is a latency for the load. 30 * 31 */ 32 //#define LOG_NDEBUG 0 33 34 #include <stdio.h> 35 #include <unistd.h> 36 #include <stdlib.h> 37 38 #define LOG_TAG "SVOX Pico Engine" 39 40 #include <utils/Log.h> 41 #include <utils/String16.h> /* for strlen16 */ 42 #include <android_runtime/AndroidRuntime.h> 43 #include <TtsEngine.h> 44 45 #include <cutils/jstring.h> 46 #include <picoapi.h> 47 #include <picodefs.h> 48 49 #include "svox_ssml_parser.h" 50 51 using namespace android; 52 53 /* adaptation layer defines */ 54 #define PICO_MEM_SIZE 2500000 55 /* speaking rate */ 56 #define PICO_MIN_RATE 20 57 #define PICO_MAX_RATE 500 58 #define PICO_DEF_RATE 100 59 /* speaking pitch */ 60 #define PICO_MIN_PITCH 50 61 #define PICO_MAX_PITCH 200 62 #define PICO_DEF_PITCH 100 63 /* speaking volume */ 64 #define PICO_MIN_VOLUME 0 65 #define PICO_MAX_VOLUME 500 66 #define PICO_DEF_VOLUME 100 67 68 /* string constants */ 69 #define MAX_OUTBUF_SIZE 128 70 const char * PICO_SYSTEM_LINGWARE_PATH = "/system/tts/lang_pico/"; 71 const char * PICO_LINGWARE_PATH = "/sdcard/svox/"; 72 const char * PICO_VOICE_NAME = "PicoVoice"; 73 const char * PICO_SPEED_OPEN_TAG = "<speed level='%d'>"; 74 const char * PICO_SPEED_CLOSE_TAG = "</speed>"; 75 const char * PICO_PITCH_OPEN_TAG = "<pitch level='%d'>"; 76 const char * PICO_PITCH_CLOSE_TAG = "</pitch>"; 77 const char * PICO_VOLUME_OPEN_TAG = "<volume level='%d'>"; 78 const char * PICO_VOLUME_CLOSE_TAG = "</volume>"; 79 const char * PICO_PHONEME_OPEN_TAG = "<phoneme ph='"; 80 const char * PICO_PHONEME_CLOSE_TAG = "'/>"; 81 82 /* supported voices 83 Pico does not seperately specify the voice and locale. */ 84 const char * picoSupportedLangIso3[] = { "eng", "eng", "deu", "spa", "fra", "ita" }; 85 const char * picoSupportedCountryIso3[] = { "USA", "GBR", "DEU", "ESP", "FRA", "ITA" }; 86 const char * picoSupportedLang[] = { "en-US", "en-GB", "de-DE", "es-ES", "fr-FR", "it-IT" }; 87 const char * picoInternalLang[] = { "en-US", "en-GB", "de-DE", "es-ES", "fr-FR", "it-IT" }; 88 const char * picoInternalTaLingware[] = { "en-US_ta.bin", "en-GB_ta.bin", "de-DE_ta.bin", "es-ES_ta.bin", "fr-FR_ta.bin", "it-IT_ta.bin" }; 89 const char * picoInternalSgLingware[] = { "en-US_lh0_sg.bin", "en-GB_kh0_sg.bin", "de-DE_gl0_sg.bin", "es-ES_zl0_sg.bin", "fr-FR_nk0_sg.bin", "it-IT_cm0_sg.bin" }; 90 const char * picoInternalUtppLingware[] = { "en-US_utpp.bin", "en-GB_utpp.bin", "de-DE_utpp.bin", "es-ES_utpp.bin", "fr-FR_utpp.bin", "it-IT_utpp.bin" }; 91 const int picoNumSupportedVocs = 6; 92 93 /* supported properties */ 94 const char * picoSupportedProperties[] = { "language", "rate", "pitch", "volume" }; 95 const int picoNumSupportedProperties = 4; 96 97 98 /* adapation layer global variables */ 99 synthDoneCB_t * picoSynthDoneCBPtr; 100 void * picoMemArea = NULL; 101 pico_System picoSystem = NULL; 102 pico_Resource picoTaResource = NULL; 103 pico_Resource picoSgResource = NULL; 104 pico_Resource picoUtppResource = NULL; 105 pico_Engine picoEngine = NULL; 106 pico_Char * picoTaFileName = NULL; 107 pico_Char * picoSgFileName = NULL; 108 pico_Char * picoUtppFileName = NULL; 109 pico_Char * picoTaResourceName = NULL; 110 pico_Char * picoSgResourceName = NULL; 111 pico_Char * picoUtppResourceName = NULL; 112 int picoSynthAbort = 0; 113 char * picoProp_currLang = NULL; /* current language */ 114 int picoProp_currRate = PICO_DEF_RATE; /* current rate */ 115 int picoProp_currPitch = PICO_DEF_PITCH; /* current pitch */ 116 int picoProp_currVolume = PICO_DEF_VOLUME; /* current volume */ 117 118 int picoCurrentLangIndex = -1; 119 120 char * pico_alt_lingware_path = NULL; 121 122 123 /* internal helper functions */ 124 125 /** checkForLocale 126 * Check whether the requested locale is among the supported locales. 127 * @locale - the locale to check, either in xx or xx-YY format 128 * return index of the locale, or -1 if not supported. 129 */ 130 static int checkForLocale( const char * locale ) 131 { 132 int found = -1; /* language not found */ 133 int i; 134 if (locale == NULL) { 135 ALOGE("checkForLocale called with NULL language"); 136 return found; 137 } 138 139 /* Verify that the requested locale is a locale that we support. */ 140 for (i = 0; i < picoNumSupportedVocs; i ++) { 141 if (strcmp(locale, picoSupportedLang[i]) == 0) { /* in array */ 142 found = i; 143 break; 144 } 145 }; 146 147 /* The exact locale was not found. */ 148 if (found < 0) { 149 /* We didn't find an exact match; it may have been specified with only the first 2 characters. 150 This could overmatch ISO 639-3 language codes.%% */ 151 152 /* check whether the current language matches the locale's language */ 153 if ((picoCurrentLangIndex > -1) && 154 (strncmp(locale, picoSupportedLang[picoCurrentLangIndex], 2) == 0)) { 155 /* the current language matches the requested language, let's use it */ 156 found = picoCurrentLangIndex; 157 } else { 158 /* check whether we can find a match at least on the language */ 159 for (i = 0; i < picoNumSupportedVocs; i ++) { 160 if (strncmp(locale, picoSupportedLang[i], 2) == 0) { 161 found = i; 162 break; 163 } 164 } 165 } 166 167 if (found < 0) { 168 ALOGE("TtsEngine::set language called with unsupported locale %s", locale); 169 } 170 }; 171 return found; 172 } 173 174 175 /** cleanResources 176 * Unloads any loaded Pico resources. 177 */ 178 static void cleanResources( void ) 179 { 180 if (picoEngine) { 181 pico_disposeEngine( picoSystem, &picoEngine ); 182 pico_releaseVoiceDefinition( picoSystem, (pico_Char *) PICO_VOICE_NAME ); 183 picoEngine = NULL; 184 } 185 if (picoUtppResource) { 186 pico_unloadResource( picoSystem, &picoUtppResource ); 187 picoUtppResource = NULL; 188 } 189 if (picoTaResource) { 190 pico_unloadResource( picoSystem, &picoTaResource ); 191 picoTaResource = NULL; 192 } 193 if (picoSgResource) { 194 pico_unloadResource( picoSystem, &picoSgResource ); 195 picoSgResource = NULL; 196 } 197 198 if (picoSystem) { 199 pico_terminate(&picoSystem); 200 picoSystem = NULL; 201 } 202 picoCurrentLangIndex = -1; 203 } 204 205 206 /** cleanFiles 207 * Frees any memory allocated for file and resource strings. 208 */ 209 static void cleanFiles( void ) 210 { 211 if (picoProp_currLang) { 212 free( picoProp_currLang ); 213 picoProp_currLang = NULL; 214 } 215 216 if (picoTaFileName) { 217 free( picoTaFileName ); 218 picoTaFileName = NULL; 219 } 220 221 if (picoSgFileName) { 222 free( picoSgFileName ); 223 picoSgFileName = NULL; 224 } 225 226 if (picoUtppFileName) { 227 free( picoUtppFileName ); 228 picoUtppFileName = NULL; 229 } 230 231 if (picoTaResourceName) { 232 free( picoTaResourceName ); 233 picoTaResourceName = NULL; 234 } 235 236 if (picoSgResourceName) { 237 free( picoSgResourceName ); 238 picoSgResourceName = NULL; 239 } 240 241 if (picoUtppResourceName) { 242 free( picoUtppResourceName ); 243 picoUtppResourceName = NULL; 244 } 245 } 246 247 /** hasResourcesForLanguage 248 * Check to see if the resources required to load the language at the specified index 249 * are properly installed 250 * @langIndex - the index of the language to check the resources for. The index is valid. 251 * return true if the required resources are installed, false otherwise 252 */ 253 static bool hasResourcesForLanguage(int langIndex) { 254 FILE * pFile; 255 char* fileName = (char*)malloc(PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE); 256 257 /* check resources on system (under PICO_SYSTEM_LINGWARE_PATH). */ 258 strcpy((char*)fileName, PICO_SYSTEM_LINGWARE_PATH); 259 strcat((char*)fileName, (const char*)picoInternalTaLingware[langIndex]); 260 pFile = fopen(fileName, "r"); 261 if (pFile != NULL) { 262 /* "ta" file found. */ 263 fclose (pFile); 264 /* now look for "sg" file. */ 265 strcpy((char*)fileName, PICO_SYSTEM_LINGWARE_PATH); 266 strcat((char*)fileName, (const char*)picoInternalSgLingware[langIndex]); 267 pFile = fopen(fileName, "r"); 268 if (pFile != NULL) { 269 /* "sg" file found, no need to continue checking, return success. */ 270 fclose(pFile); 271 free(fileName); 272 return true; 273 } 274 } 275 276 /* resources not found on system, check resources on alternative location */ 277 /* (under pico_alt_lingware_path). */ 278 strcpy((char*)fileName, pico_alt_lingware_path); 279 strcat((char*)fileName, (const char*)picoInternalTaLingware[langIndex]); 280 pFile = fopen(fileName, "r"); 281 if (pFile == NULL) { 282 free(fileName); 283 return false; 284 } else { 285 fclose (pFile); 286 } 287 288 strcpy((char*)fileName, pico_alt_lingware_path); 289 strcat((char*)fileName, (const char*)picoInternalSgLingware[langIndex]); 290 pFile = fopen(fileName, "r"); 291 if (pFile == NULL) { 292 free(fileName); 293 return false; 294 } else { 295 fclose(pFile); 296 free(fileName); 297 return true; 298 } 299 } 300 301 /** doLanguageSwitchFromLangIndex 302 * Switch to the requested locale. 303 * If the locale is already loaded, it returns immediately. 304 * If another locale is already is loaded, it will first be unloaded and the new one then loaded. 305 * If no locale is loaded, the requested locale will be loaded. 306 * @langIndex - the index of the locale/voice to load, which is guaranteed to be supported. 307 * return TTS_SUCCESS or TTS_FAILURE 308 */ 309 static tts_result doLanguageSwitchFromLangIndex( int langIndex ) 310 { 311 int ret; /* function result code */ 312 313 if (langIndex>=0) { 314 /* If we already have a loaded locale, check whether it is the same one as requested. */ 315 if (picoProp_currLang && (strcmp(picoProp_currLang, picoSupportedLang[langIndex]) == 0)) { 316 //ALOGI("Language already loaded (%s == %s)", picoProp_currLang, 317 // picoSupportedLang[langIndex]); 318 return TTS_SUCCESS; 319 } 320 } 321 322 /* It is not the same locale; unload the current one first. Also invalidates the system object*/ 323 cleanResources(); 324 325 /* Allocate memory for file and resource names. */ 326 cleanFiles(); 327 328 if (picoSystem==NULL) { 329 /*re-init system object*/ 330 ret = pico_initialize( picoMemArea, PICO_MEM_SIZE, &picoSystem ); 331 if (PICO_OK != ret) { 332 ALOGE("Failed to initialize the pico system object\n"); 333 return TTS_FAILURE; 334 } 335 } 336 337 picoProp_currLang = (char *) malloc( 10 ); 338 picoTaFileName = (pico_Char *) malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE ); 339 picoSgFileName = (pico_Char *) malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE ); 340 picoUtppFileName = (pico_Char *) malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE ); 341 picoTaResourceName = (pico_Char *) malloc( PICO_MAX_RESOURCE_NAME_SIZE ); 342 picoSgResourceName = (pico_Char *) malloc( PICO_MAX_RESOURCE_NAME_SIZE ); 343 picoUtppResourceName =(pico_Char *) malloc( PICO_MAX_RESOURCE_NAME_SIZE ); 344 345 if ( 346 (picoProp_currLang==NULL) || (picoTaFileName==NULL) || (picoSgFileName==NULL) || 347 (picoUtppFileName==NULL) || (picoTaResourceName==NULL) || (picoSgResourceName==NULL) || 348 (picoUtppResourceName==NULL) 349 ) { 350 ALOGE("Failed to allocate memory for internal strings\n"); 351 cleanResources(); 352 return TTS_FAILURE; 353 } 354 355 /* Find where to load the resource files from: system or alternative location */ 356 /* based on availability of the Ta file. Try the alternative location first, this is where */ 357 /* more recent language file updates would be installed (under pico_alt_lingware_path). */ 358 bool bUseSystemPath = true; 359 FILE * pFile; 360 char* tmpFileName = (char*)malloc(PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE); 361 strcpy((char*)tmpFileName, pico_alt_lingware_path); 362 strcat((char*)tmpFileName, (const char*)picoInternalTaLingware[langIndex]); 363 pFile = fopen(tmpFileName, "r"); 364 if (pFile != NULL) { 365 /* "ta" file found under pico_alt_lingware_path, don't use the system path. */ 366 fclose (pFile); 367 bUseSystemPath = false; 368 } 369 free(tmpFileName); 370 371 /* Set the path and file names for resource files. */ 372 if (bUseSystemPath) { 373 strcpy((char *) picoTaFileName, PICO_SYSTEM_LINGWARE_PATH); 374 strcpy((char *) picoSgFileName, PICO_SYSTEM_LINGWARE_PATH); 375 strcpy((char *) picoUtppFileName, PICO_SYSTEM_LINGWARE_PATH); 376 } else { 377 strcpy((char *) picoTaFileName, pico_alt_lingware_path); 378 strcpy((char *) picoSgFileName, pico_alt_lingware_path); 379 strcpy((char *) picoUtppFileName, pico_alt_lingware_path); 380 } 381 strcat((char *) picoTaFileName, (const char *) picoInternalTaLingware[langIndex]); 382 strcat((char *) picoSgFileName, (const char *) picoInternalSgLingware[langIndex]); 383 strcat((char *) picoUtppFileName, (const char *) picoInternalUtppLingware[langIndex]); 384 385 /* Load the text analysis Lingware resource file. */ 386 ret = pico_loadResource( picoSystem, picoTaFileName, &picoTaResource ); 387 if (PICO_OK != ret) { 388 ALOGE("Failed to load textana resource for %s [%d]", picoSupportedLang[langIndex], ret); 389 cleanResources(); 390 cleanFiles(); 391 return TTS_FAILURE; 392 } 393 394 /* Load the signal generation Lingware resource file. */ 395 ret = pico_loadResource( picoSystem, picoSgFileName, &picoSgResource ); 396 if (PICO_OK != ret) { 397 ALOGE("Failed to load siggen resource for %s [%d]", picoSupportedLang[langIndex], ret); 398 cleanResources(); 399 cleanFiles(); 400 return TTS_FAILURE; 401 } 402 403 /* Load the utpp Lingware resource file if exists - NOTE: this file is optional 404 and is currently not used. Loading is only attempted for future compatibility. 405 If this file is not present the loading will still succeed. */ 406 ret = pico_loadResource( picoSystem, picoUtppFileName, &picoUtppResource ); 407 if ((PICO_OK != ret) && (ret != PICO_EXC_CANT_OPEN_FILE)) { 408 ALOGE("Failed to load utpp resource for %s [%d]", picoSupportedLang[langIndex], ret); 409 cleanResources(); 410 cleanFiles(); 411 return TTS_FAILURE; 412 } 413 414 /* Get the text analysis resource name. */ 415 ret = pico_getResourceName( picoSystem, picoTaResource, (char *) picoTaResourceName ); 416 if (PICO_OK != ret) { 417 ALOGE("Failed to get textana resource name for %s [%d]", picoSupportedLang[langIndex], ret); 418 cleanResources(); 419 cleanFiles(); 420 return TTS_FAILURE; 421 } 422 423 /* Get the signal generation resource name. */ 424 ret = pico_getResourceName( picoSystem, picoSgResource, (char *) picoSgResourceName ); 425 if ((PICO_OK == ret) && (picoUtppResource != NULL)) { 426 /* Get utpp resource name - optional: see note above. */ 427 ret = pico_getResourceName( picoSystem, picoUtppResource, (char *) picoUtppResourceName ); 428 if (PICO_OK != ret) { 429 ALOGE("Failed to get utpp resource name for %s [%d]", picoSupportedLang[langIndex], ret); 430 cleanResources(); 431 cleanFiles(); 432 return TTS_FAILURE; 433 } 434 } 435 if (PICO_OK != ret) { 436 ALOGE("Failed to get siggen resource name for %s [%d]", picoSupportedLang[langIndex], ret); 437 cleanResources(); 438 cleanFiles(); 439 return TTS_FAILURE; 440 } 441 442 /* Create a voice definition. */ 443 ret = pico_createVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME ); 444 if (PICO_OK != ret) { 445 ALOGE("Failed to create voice for %s [%d]", picoSupportedLang[langIndex], ret); 446 cleanResources(); 447 cleanFiles(); 448 return TTS_FAILURE; 449 } 450 451 /* Add the text analysis resource to the voice. */ 452 ret = pico_addResourceToVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME, picoTaResourceName ); 453 if (PICO_OK != ret) { 454 ALOGE("Failed to add textana resource to voice for %s [%d]", picoSupportedLang[langIndex], ret); 455 cleanResources(); 456 cleanFiles(); 457 return TTS_FAILURE; 458 } 459 460 /* Add the signal generation resource to the voice. */ 461 ret = pico_addResourceToVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME, picoSgResourceName ); 462 if ((PICO_OK == ret) && (picoUtppResource != NULL)) { 463 /* Add utpp resource to voice - optional: see note above. */ 464 ret = pico_addResourceToVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME, picoUtppResourceName ); 465 if (PICO_OK != ret) { 466 ALOGE("Failed to add utpp resource to voice for %s [%d]", picoSupportedLang[langIndex], ret); 467 cleanResources(); 468 cleanFiles(); 469 return TTS_FAILURE; 470 } 471 } 472 473 if (PICO_OK != ret) { 474 ALOGE("Failed to add siggen resource to voice for %s [%d]", picoSupportedLang[langIndex], ret); 475 cleanResources(); 476 cleanFiles(); 477 return TTS_FAILURE; 478 } 479 480 ret = pico_newEngine( picoSystem, (const pico_Char *) PICO_VOICE_NAME, &picoEngine ); 481 if (PICO_OK != ret) { 482 ALOGE("Failed to create engine for %s [%d]", picoSupportedLang[langIndex], ret); 483 cleanResources(); 484 cleanFiles(); 485 return TTS_FAILURE; 486 } 487 488 /* Set the current locale/voice. */ 489 strcpy( picoProp_currLang, picoSupportedLang[langIndex] ); 490 picoCurrentLangIndex = langIndex; 491 ALOGI("loaded %s successfully", picoProp_currLang); 492 return TTS_SUCCESS; 493 } 494 495 496 /** doLanguageSwitch 497 * Switch to the requested locale. 498 * If this locale is already loaded, it returns immediately. 499 * If another locale is already loaded, this will first be unloaded 500 * and the new one then loaded. 501 * If no locale is loaded, the requested will be loaded. 502 * @locale - the locale to check, either in xx or xx-YY format (i.e "en" or "en-US") 503 * return TTS_SUCCESS or TTS_FAILURE 504 */ 505 static tts_result doLanguageSwitch( const char * locale ) 506 { 507 int loclIndex; /* locale index */ 508 509 /* Load the new locale. */ 510 loclIndex = checkForLocale( locale ); 511 if (loclIndex < 0) { 512 ALOGE("Tried to swith to non-supported locale %s", locale); 513 return TTS_FAILURE; 514 } 515 //ALOGI("Found supported locale %s", picoSupportedLang[loclIndex]); 516 return doLanguageSwitchFromLangIndex( loclIndex ); 517 } 518 519 520 /** doAddProperties 521 * Add <speed>, <pitch> and <volume> tags to the text, 522 * if the properties have been set to non-default values, and return the new string. 523 * The calling function is responsible for freeing the returned string. 524 * @str - text to apply tags to 525 * return new string with tags applied 526 */ 527 static char * doAddProperties( const char * str ) 528 { 529 char * data = NULL; 530 int haspitch, hasspeed, hasvol; /* parameters */ 531 int textlen; /* property string length */ 532 haspitch = 0; hasspeed = 0; hasvol = 0; 533 textlen = strlen(str) + 1; 534 if (picoProp_currPitch != PICO_DEF_PITCH) { /* non-default pitch */ 535 textlen += strlen(PICO_PITCH_OPEN_TAG) + 5; 536 textlen += strlen(PICO_PITCH_CLOSE_TAG); 537 haspitch = 1; 538 } 539 if (picoProp_currRate != PICO_DEF_RATE) { /* non-default rate */ 540 textlen += strlen(PICO_SPEED_OPEN_TAG) + 5; 541 textlen += strlen(PICO_SPEED_CLOSE_TAG); 542 hasspeed = 1; 543 } 544 545 if (picoProp_currVolume != PICO_DEF_VOLUME) { /* non-default volume */ 546 textlen += strlen(PICO_VOLUME_OPEN_TAG) + 5; 547 textlen += strlen(PICO_VOLUME_CLOSE_TAG); 548 hasvol = 1; 549 } 550 551 /* Compose the property strings. */ 552 data = (char *) malloc( textlen ); /* allocate string */ 553 if (!data) { 554 return NULL; 555 } 556 memset(data, 0, textlen); /* clear it */ 557 if (haspitch) { 558 char* tmp = (char*)malloc(strlen(PICO_PITCH_OPEN_TAG) + strlen(PICO_PITCH_CLOSE_TAG) + 5); 559 sprintf(tmp, PICO_PITCH_OPEN_TAG, picoProp_currPitch); 560 strcat(data, tmp); 561 free(tmp); 562 } 563 564 if (hasspeed) { 565 char* tmp = (char*)malloc(strlen(PICO_SPEED_OPEN_TAG) + strlen(PICO_SPEED_CLOSE_TAG) + 5); 566 sprintf(tmp, PICO_SPEED_OPEN_TAG, picoProp_currRate); 567 strcat(data, tmp); 568 free(tmp); 569 } 570 571 if (hasvol) { 572 char* tmp = (char*)malloc(strlen(PICO_VOLUME_OPEN_TAG) + strlen(PICO_VOLUME_CLOSE_TAG) + 5); 573 sprintf(tmp, PICO_VOLUME_OPEN_TAG, picoProp_currVolume); 574 strcat(data, tmp); 575 free(tmp); 576 } 577 578 strcat(data, str); 579 if (hasvol) { 580 strcat(data, PICO_VOLUME_CLOSE_TAG); 581 } 582 583 if (hasspeed) { 584 strcat(data, PICO_SPEED_CLOSE_TAG); 585 } 586 587 if (haspitch) { 588 strcat(data, PICO_PITCH_CLOSE_TAG); 589 } 590 return data; 591 } 592 593 594 /** get_tok 595 * Searches for tokens in a string 596 * @str - text to be processed 597 * @pos - position of first character to be searched in str 598 * @textlen - postion of last character to be searched 599 * @tokstart - address of a variable to receive the start of the token found 600 * @tokstart - address of a variable to receive the length of the token found 601 * return : 1=token found, 0=token not found 602 * notes : the token separator set could be enlarged adding characters in "seps" 603 */ 604 static int get_tok(const char * str , int pos, int textlen, int *tokstart, int *toklen) 605 { 606 const char * seps = " "; 607 608 /*look for start*/ 609 while ((pos<textlen) && (strchr(seps,str[pos]) != NULL)) { 610 pos++; 611 } 612 if (pos == textlen) { 613 /*no characters != seps found whithin string*/ 614 return 0; 615 } 616 *tokstart = pos; 617 /*look for end*/ 618 while ((pos<textlen) && (strchr(seps,str[pos]) == NULL)) { 619 pos++; 620 } 621 *toklen = pos - *tokstart; 622 return 1; 623 }/*get_tok*/ 624 625 626 /** get_sub_tok 627 * Searches for subtokens in a token having a compound structure with camel case like "xxxYyyy" 628 * @str - text to be processed 629 * @pos - position of first character to be searched in str 630 * @textlen - postion of last character to be searched in str 631 * @tokstart - address of a variable to receive the start of the sub token found 632 * @tokstart - address of a variable to receive the length of the sub token found 633 * return : 1=sub token found, 0=sub token not found 634 * notes : the sub token separator set could be enlarged adding characters in "seps" 635 */ 636 static int get_sub_tok(const char * str , int pos, int textlen, int *tokstart, int *toklen) { 637 638 const char * seps = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; 639 640 if (pos == textlen) { 641 return 0; 642 } 643 644 /*first char != space*/ 645 *tokstart = pos; 646 /*finding first non separator*/ 647 while ((pos < textlen) && (strchr(seps, str[pos]) != NULL)) { 648 pos++; 649 } 650 if (pos == textlen) { 651 /*characters all in seps found whithin string : return full token*/ 652 *toklen = pos - *tokstart; 653 return 1; 654 } 655 /*pos should be pointing to first non seps and more chars are there*/ 656 /*finding first separator*/ 657 while ((pos < textlen) && (strchr(seps, str[pos]) == NULL)) { 658 pos++; 659 } 660 if (pos == textlen) { 661 /*transition non seps->seps not found : return full token*/ 662 *toklen = pos - *tokstart; 663 return 1; 664 } 665 *toklen = pos - *tokstart; 666 return 1; 667 }/*get_sub_tok*/ 668 669 670 /** doCamelCase 671 * Searches for tokens having a compound structure with camel case and transforms them as follows : 672 * "XxxxYyyy" -->> "Xxxx Yyyy", 673 * "xxxYyyy" -->> "xxx Yyyy", 674 * "XXXYyyy" -->> "XXXYyyy" 675 * etc.... 676 * The calling function is responsible for freeing the returned string. 677 * @str - text to be processed 678 * return new string with text processed 679 */ 680 static char * doCamelCase( const char * str ) 681 { 682 int textlen; /* input string length */ 683 int totlen; /* output string length */ 684 int tlen_2, nsubtok; /* nuber of subtokens */ 685 int toklen, tokstart; /*legnth and start of generic token*/ 686 int stoklen, stokstart; /*legnth and start of generic sub-token*/ 687 int pos, tokpos, outpos; /*postion of current char in input string and token and output*/ 688 char *data; /*pointer of the returned string*/ 689 690 pos = 0; 691 tokpos = 0; 692 toklen = 0; 693 stoklen = 0; 694 tlen_2 = 0; 695 totlen = 0; 696 697 textlen = strlen(str) + 1; 698 699 /*counting characters after sub token splitting including spaces*/ 700 //while ((pos<textlen) && (str[pos]!=0)) { 701 while (get_tok(str, pos, textlen, &tokstart, &toklen)) { 702 tokpos = tokstart; 703 tlen_2 = 0; 704 nsubtok = 0; 705 while (get_sub_tok(str, tokpos, tokstart+toklen, &stokstart, &stoklen)) { 706 totlen += stoklen; 707 tlen_2 += stoklen; 708 tokpos = stokstart + stoklen; 709 nsubtok += 1; 710 } 711 totlen += nsubtok; /*add spaces between subtokens*/ 712 pos = tokstart + tlen_2; 713 } 714 //} 715 /* Allocate the return string */ 716 717 data = (char *) malloc( totlen ); /* allocate string */ 718 if (!data) { 719 return NULL; 720 } 721 memset(data, 0, totlen); /* clear it */ 722 outpos = 0; 723 pos = 0; 724 /*copying characters*/ 725 //while ((pos<textlen) && (str[pos]!=0)) { 726 while (get_tok (str, pos, textlen, &tokstart, &toklen)) { 727 tokpos = tokstart; 728 tlen_2 = 0; 729 nsubtok = 0; 730 while (get_sub_tok(str, tokpos, tokstart+toklen, &stokstart, &stoklen)) { 731 strncpy(&(data[outpos]), &(str[stokstart]), stoklen); 732 outpos += stoklen; 733 strncpy(&(data[outpos]), " ", 1); 734 tlen_2 += stoklen; 735 outpos += 1; 736 tokpos = stokstart + stoklen; 737 } 738 pos=tokstart+tlen_2; 739 } 740 //} 741 if (outpos == 0) { 742 outpos = 1; 743 } 744 data[outpos-1] = 0; 745 return data; 746 }/*doCamelCase*/ 747 748 749 /** createPhonemeString 750 * Wrap all individual words in <phoneme> tags. 751 * The Pico <phoneme> tag only supports one word in each tag, 752 * therefore they must be individually wrapped! 753 * @xsampa - text to convert to Pico phomene string 754 * @length - length of the input string 755 * return new string with tags applied 756 */ 757 extern char * createPhonemeString( const char * xsampa, int length ) 758 { 759 char * convstring = NULL; 760 int origStrLen = strlen(xsampa); 761 int numWords = 1; 762 int start, totalLength, i, j; 763 764 for (i = 0; i < origStrLen; i ++) { 765 if ((xsampa[i] == ' ') || (xsampa[i] == '#')) { 766 numWords ++; 767 } 768 } 769 770 if (numWords == 1) { 771 convstring = new char[origStrLen + 17]; 772 convstring[0] = '\0'; 773 strcat(convstring, PICO_PHONEME_OPEN_TAG); 774 strcat(convstring, xsampa); 775 strcat(convstring, PICO_PHONEME_CLOSE_TAG); 776 } else { 777 char * words[numWords]; 778 start = 0; totalLength = 0; i = 0; j = 0; 779 for (i=0, j=0; i < origStrLen; i++) { 780 if ((xsampa[i] == ' ') || (xsampa[i] == '#')) { 781 words[j] = new char[i+1-start+17]; 782 words[j][0] = '\0'; 783 strcat( words[j], PICO_PHONEME_OPEN_TAG); 784 strncat(words[j], xsampa+start, i-start); 785 strcat( words[j], PICO_PHONEME_CLOSE_TAG); 786 start = i + 1; 787 j++; 788 totalLength += strlen(words[j-1]); 789 } 790 } 791 words[j] = new char[i+1-start+17]; 792 words[j][0] = '\0'; 793 strcat(words[j], PICO_PHONEME_OPEN_TAG); 794 strcat(words[j], xsampa+start); 795 strcat(words[j], PICO_PHONEME_CLOSE_TAG); 796 totalLength += strlen(words[j]); 797 convstring = new char[totalLength + 1]; 798 convstring[0] = '\0'; 799 for (i=0 ; i < numWords ; i++) { 800 strcat(convstring, words[i]); 801 delete [] words[i]; 802 } 803 } 804 805 return convstring; 806 } 807 808 /* The XSAMPA uses as many as 5 characters to represent a single IPA code. */ 809 typedef struct tagPhnArr 810 { 811 char16_t strIPA; /* IPA Unicode symbol */ 812 char strXSAMPA[6]; /* SAMPA sequence */ 813 } PArr; 814 815 #define phn_cnt (134+7) 816 817 PArr PhnAry[phn_cnt] = { 818 819 /* XSAMPA conversion table 820 This maps a single IPA symbol to a sequence representing XSAMPA. 821 This relies upon a direct one-to-one correspondance 822 including diphthongs and affricates. */ 823 824 /* Vowels (23) complete */ 825 {0x025B, "E"}, 826 {0x0251, "A"}, 827 {0x0254, "O"}, 828 {0x00F8, "2"}, 829 {0x0153, "9"}, 830 {0x0276, "&"}, 831 {0x0252, "Q"}, 832 {0x028C, "V"}, 833 {0x0264, "7"}, 834 {0x026F, "M"}, 835 {0x0268, "1"}, 836 {0x0289, "}"}, 837 {0x026A, "I"}, 838 {0x028F, "Y"}, 839 {0x028A, "U"}, 840 {0x0259, "@"}, 841 {0x0275, "8"}, 842 {0x0250, "6"}, 843 {0x00E6, "{"}, 844 {0x025C, "3"}, 845 {0x025A, "@`"}, 846 {0x025E, "3\\\\"}, 847 {0x0258, "@\\\\"}, 848 849 /* Consonants (60) complete */ 850 {0x0288, "t`"}, 851 {0x0256, "d`"}, 852 {0x025F, "J\\\\"}, 853 {0x0261, "g"}, 854 {0x0262, "G\\\\"}, 855 {0x0294, "?"}, 856 {0x0271, "F"}, 857 {0x0273, "n`"}, 858 {0x0272, "J"}, 859 {0x014B, "N"}, 860 {0x0274, "N\\\\"}, 861 {0x0299, "B\\\\"}, 862 {0x0280, "R\\\\"}, 863 {0x027E, "4"}, 864 {0x027D, "r`"}, 865 {0x0278, "p\\\\"}, 866 {0x03B2, "B"}, 867 {0x03B8, "T"}, 868 {0x00F0, "D"}, 869 {0x0283, "S"}, 870 {0x0292, "Z"}, 871 {0x0282, "s`"}, 872 {0x0290, "z`"}, 873 {0x00E7, "C"}, 874 {0x029D, "j\\\\"}, 875 {0x0263, "G"}, 876 {0x03C7, "X"}, 877 {0x0281, "R"}, 878 {0x0127, "X\\\\"}, 879 {0x0295, "?\\\\"}, 880 {0x0266, "h\\\\"}, 881 {0x026C, "K"}, 882 {0x026E, "K\\\\"}, 883 {0x028B, "P"}, 884 {0x0279, "r\\\\"}, 885 {0x027B, "r\\\\'"}, 886 {0x0270, "M\\\\"}, 887 {0x026D, "l`"}, 888 {0x028E, "L"}, 889 {0x029F, "L\\\\"}, 890 {0x0253, "b_<"}, 891 {0x0257, "d_<"}, 892 {0x0284, "J\\_<"}, 893 {0x0260, "g_<"}, 894 {0x029B, "G\\_<"}, 895 {0x028D, "W"}, 896 {0x0265, "H"}, 897 {0x029C, "H\\\\"}, 898 {0x02A1, ">\\\\"}, 899 {0x02A2, "<\\\\"}, 900 {0x0267, "x\\\\"}, /* hooktop heng */ 901 {0x0298, "O\\\\"}, 902 {0x01C0, "|\\\\"}, 903 {0x01C3, "!\\\\"}, 904 {0x01C2, "=\\"}, 905 {0x01C1, "|\\|\\"}, 906 {0x027A, "l\\\\"}, 907 {0x0255, "s\\\\"}, 908 {0x0291, "z\\\\"}, 909 {0x026B, "l_G"}, 910 911 912 /* Diacritics (37) complete */ 913 {0x02BC, "_>"}, 914 {0x0325, "_0"}, 915 {0x030A, "_0"}, 916 {0x032C, "_v"}, 917 {0x02B0, "_h"}, 918 {0x0324, "_t"}, 919 {0x0330, "_k"}, 920 {0x033C, "_N"}, 921 {0x032A, "_d"}, 922 {0x033A, "_a"}, 923 {0x033B, "_m"}, 924 {0x0339, "_O"}, 925 {0x031C, "_c"}, 926 {0x031F, "_+"}, 927 {0x0320, "_-"}, 928 {0x0308, "_\""}, /* centralized */ 929 {0x033D, "_x"}, 930 {0x0318, "_A"}, 931 {0x0319, "_q"}, 932 {0x02DE, "`"}, 933 {0x02B7, "_w"}, 934 {0x02B2, "_j"}, 935 {0x02E0, "_G"}, 936 {0x02E4, "_?\\\\"}, /* pharyngealized */ 937 {0x0303, "~"}, /* nasalized */ 938 {0x207F, "_n"}, 939 {0x02E1, "_l"}, 940 {0x031A, "_}"}, 941 {0x0334, "_e"}, 942 {0x031D, "_r"}, /* raised equivalent to 02D4 */ 943 {0x02D4, "_r"}, /* raised equivalent to 031D */ 944 {0x031E, "_o"}, /* lowered equivalent to 02D5 */ 945 {0x02D5, "_o"}, /* lowered equivalent to 031E */ 946 {0x0329, "="}, /* sylabic */ 947 {0x032F, "_^"}, /* non-sylabic */ 948 {0x0361, "_"}, /* top tie bar */ 949 {0x035C, "_"}, 950 951 /* Suprasegmental (15) incomplete */ 952 {0x02C8, "\""}, /* primary stress */ 953 {0x02CC, "%"}, /* secondary stress */ 954 {0x02D0, ":"}, /* long */ 955 {0x02D1, ":\\\\"}, /* half-long */ 956 {0x0306, "_X"}, /* extra short */ 957 958 {0x2016, "||"}, /* major group */ 959 {0x203F, "-\\\\"}, /* bottom tie bar */ 960 {0x2197, "<R>"}, /* global rise */ 961 {0x2198, "<F>"}, /* global fall */ 962 {0x2193, "<D>"}, /* downstep */ 963 {0x2191, "<U>"}, /* upstep */ 964 {0x02E5, "<T>"}, /* extra high level */ 965 {0x02E7, "<M>"}, /* mid level */ 966 {0x02E9, "<B>"}, /* extra low level */ 967 968 {0x025D, "3`:"}, /* non-IPA %% */ 969 970 /* Affricates (6) complete */ 971 {0x02A3, "d_z"}, 972 {0x02A4, "d_Z"}, 973 {0x02A5, "d_z\\\\"}, 974 {0x02A6, "t_s"}, 975 {0x02A7, "t_S"}, 976 {0x02A8, "t_s\\\\"} 977 }; 978 979 980 void CnvIPAPnt( const char16_t IPnt, char * XPnt ) 981 { 982 char16_t ThisPnt = IPnt; /* local copy of single IPA codepoint */ 983 int idx; /* index into table */ 984 985 /* Convert an individual IPA codepoint. 986 A single IPA code could map to a string. 987 Search the table. If it is not found, use the same character. 988 Since most codepoints can be contained within 16 bits, 989 they are represented as wide chars. */ 990 XPnt[0] = 0; /* clear the result string */ 991 992 /* Search the table for the conversion. */ 993 for (idx = 0; idx < phn_cnt; idx ++) { /* for each item in table */ 994 if (IPnt == PhnAry[idx].strIPA) { /* matches IPA code */ 995 strcat( XPnt, (const char *)&(PhnAry[idx].strXSAMPA) ); /* copy the XSAMPA string */ 996 return; 997 } 998 } 999 strcat(XPnt, (const char *)&ThisPnt); /* just copy it */ 1000 } 1001 1002 1003 /** cnvIpaToXsampa 1004 * Convert an IPA character string to an XSAMPA character string. 1005 * @ipaString - input IPA string to convert 1006 * @outXsampaString - converted XSAMPA string is passed back in this parameter 1007 * return size of the new string 1008 */ 1009 1010 int cnvIpaToXsampa( const char16_t * ipaString, size_t ipaStringSize, char ** outXsampaString ) 1011 { 1012 size_t xsize; /* size of result */ 1013 size_t ipidx; /* index into IPA string */ 1014 char * XPnt; /* short XSAMPA char sequence */ 1015 1016 /* Convert an IPA string to an XSAMPA string and store the xsampa string in *outXsampaString. 1017 It is the responsibility of the caller to free the allocated string. 1018 Increment through the string. For each base & combination convert it to the XSAMP equivalent. 1019 Because of the XSAMPA limitations, not all IPA characters will be covered. */ 1020 XPnt = (char *) malloc(6); 1021 xsize = (4 * ipaStringSize) + 8; /* assume more than double size */ 1022 *outXsampaString = (char *) malloc( xsize );/* allocate return string */ 1023 *outXsampaString[0] = 0; 1024 xsize = 0; /* clear final */ 1025 1026 for (ipidx = 0; ipidx < ipaStringSize; ipidx ++) { /* for each IPA code */ 1027 CnvIPAPnt( ipaString[ipidx], XPnt ); /* get converted character */ 1028 strcat((char *)*outXsampaString, XPnt ); /* concatenate XSAMPA */ 1029 } 1030 free(XPnt); 1031 xsize = strlen(*outXsampaString); /* get the final length */ 1032 return xsize; 1033 } 1034 1035 1036 /* Google Engine API function implementations */ 1037 1038 /** init 1039 * Allocates Pico memory block and initializes the Pico system. 1040 * synthDoneCBPtr - Pointer to callback function which will receive generated samples 1041 * config - the engine configuration parameters, here only contains the non-system path 1042 * for the lingware location 1043 * return tts_result 1044 */ 1045 tts_result TtsEngine::init( synthDoneCB_t synthDoneCBPtr, const char *config ) 1046 { 1047 if (synthDoneCBPtr == NULL) { 1048 ALOGE("Callback pointer is NULL"); 1049 return TTS_FAILURE; 1050 } 1051 1052 picoMemArea = malloc( PICO_MEM_SIZE ); 1053 if (!picoMemArea) { 1054 ALOGE("Failed to allocate memory for Pico system"); 1055 return TTS_FAILURE; 1056 } 1057 1058 pico_Status ret = pico_initialize( picoMemArea, PICO_MEM_SIZE, &picoSystem ); 1059 if (PICO_OK != ret) { 1060 ALOGE("Failed to initialize Pico system"); 1061 free( picoMemArea ); 1062 picoMemArea = NULL; 1063 return TTS_FAILURE; 1064 } 1065 1066 picoSynthDoneCBPtr = synthDoneCBPtr; 1067 1068 picoCurrentLangIndex = -1; 1069 1070 // was the initialization given an alternative path for the lingware location? 1071 if ((config != NULL) && (strlen(config) > 0)) { 1072 pico_alt_lingware_path = (char*)malloc(strlen(config)); 1073 strcpy((char*)pico_alt_lingware_path, config); 1074 ALOGV("Alternative lingware path %s", pico_alt_lingware_path); 1075 } else { 1076 pico_alt_lingware_path = (char*)malloc(strlen(PICO_LINGWARE_PATH) + 1); 1077 strcpy((char*)pico_alt_lingware_path, PICO_LINGWARE_PATH); 1078 ALOGV("Using predefined lingware path %s", pico_alt_lingware_path); 1079 } 1080 1081 return TTS_SUCCESS; 1082 } 1083 1084 1085 /** shutdown 1086 * Unloads all Pico resources; terminates Pico system and frees Pico memory block. 1087 * return tts_result 1088 */ 1089 tts_result TtsEngine::shutdown( void ) 1090 { 1091 cleanResources(); 1092 1093 if (picoSystem) { 1094 pico_terminate(&picoSystem); 1095 picoSystem = NULL; 1096 } 1097 if (picoMemArea) { 1098 free(picoMemArea); 1099 picoMemArea = NULL; 1100 } 1101 1102 cleanFiles(); 1103 return TTS_SUCCESS; 1104 } 1105 1106 1107 /** loadLanguage 1108 * Load a new language. 1109 * @lang - string with ISO 3 letter language code. 1110 * @country - string with ISO 3 letter country code . 1111 * @variant - string with language variant for that language and country pair. 1112 * return tts_result 1113 */ 1114 tts_result TtsEngine::loadLanguage(const char *lang, const char *country, const char *variant) 1115 { 1116 return TTS_FAILURE; 1117 //return setProperty("language", value, size); 1118 } 1119 1120 1121 /** setLanguage 1122 * Load a new language (locale). Use the ISO 639-3 language codes. 1123 * @lang - string with ISO 639-3 language code. 1124 * @country - string with ISO 3 letter country code. 1125 * @variant - string with language variant for that language and country pair. 1126 * return tts_result 1127 */ 1128 tts_result TtsEngine::setLanguage( const char * lang, const char * country, const char * variant ) 1129 { 1130 //ALOGI("TtsEngine::setLanguage %s %s %s", lang, country, variant); 1131 int langIndex; 1132 int countryIndex; 1133 int i; 1134 1135 if (lang == NULL) 1136 { 1137 ALOGE("TtsEngine::setLanguage called with NULL language"); 1138 return TTS_FAILURE; 1139 } 1140 1141 /* We look for a match on the language first 1142 then we look for a match on the country. 1143 If no match on the language: 1144 return an error. 1145 If match on the language, but no match on the country: 1146 load the language found for the language match. 1147 If match on the language, and match on the country: 1148 load the language found for the country match. */ 1149 1150 /* Find a match on the language. */ 1151 langIndex = -1; /* no match */ 1152 for (i = 0; i < picoNumSupportedVocs; i ++) 1153 { 1154 if (strcmp(lang, picoSupportedLangIso3[i]) == 0) 1155 { 1156 langIndex = i; 1157 break; 1158 } 1159 } 1160 if (langIndex < 0) 1161 { 1162 /* The language isn't supported. */ 1163 ALOGE("TtsEngine::setLanguage called with unsupported language"); 1164 return TTS_FAILURE; 1165 } 1166 1167 /* Find a match on the country, if there is one. */ 1168 if (country != NULL) 1169 { 1170 countryIndex = -1; 1171 for (i = langIndex; i < picoNumSupportedVocs; i ++) 1172 { 1173 if ( (strcmp(lang, picoSupportedLangIso3[i]) == 0) 1174 && (strcmp(country, picoSupportedCountryIso3[i]) == 0)) 1175 { 1176 countryIndex = i; 1177 break; 1178 } 1179 } 1180 1181 if (countryIndex < 0) 1182 { 1183 /* We didn't find a match on the country, but we had a match on the language. 1184 Use that language. */ 1185 ALOGI("TtsEngine::setLanguage found matching language(%s) but not matching country(%s).", 1186 lang, country); 1187 } 1188 else 1189 { 1190 /* We have a match on both the language and the country. */ 1191 langIndex = countryIndex; 1192 } 1193 } 1194 1195 return doLanguageSwitchFromLangIndex( langIndex ); /* switch the language */ 1196 } 1197 1198 1199 /** isLanguageAvailable 1200 * Returns the level of support for a language. 1201 * @lang - string with ISO 3 letter language code. 1202 * @country - string with ISO 3 letter country code . 1203 * @variant - string with language variant for that language and country pair. 1204 * return tts_support_result 1205 */ 1206 tts_support_result TtsEngine::isLanguageAvailable(const char *lang, const char *country, 1207 const char *variant) { 1208 int langIndex = -1; 1209 int countryIndex = -1; 1210 //------------------------- 1211 // language matching 1212 // if no language specified 1213 if (lang == NULL) { 1214 ALOGE("TtsEngine::isLanguageAvailable called with no language"); 1215 return TTS_LANG_NOT_SUPPORTED; 1216 } 1217 1218 // find a match on the language 1219 for (int i = 0; i < picoNumSupportedVocs; i++) 1220 { 1221 if (strcmp(lang, picoSupportedLangIso3[i]) == 0) { 1222 langIndex = i; 1223 break; 1224 } 1225 } 1226 if (langIndex < 0) { 1227 // language isn't supported 1228 ALOGV("TtsEngine::isLanguageAvailable called with unsupported language"); 1229 return TTS_LANG_NOT_SUPPORTED; 1230 } 1231 1232 //------------------------- 1233 // country matching 1234 // if no country specified 1235 if ((country == NULL) || (strlen(country) == 0)) { 1236 // check installation of matched language 1237 return (hasResourcesForLanguage(langIndex) ? TTS_LANG_AVAILABLE : TTS_LANG_MISSING_DATA); 1238 } 1239 1240 // find a match on the country 1241 for (int i = langIndex; i < picoNumSupportedVocs; i++) { 1242 if ((strcmp(lang, picoSupportedLangIso3[i]) == 0) 1243 && (strcmp(country, picoSupportedCountryIso3[i]) == 0)) { 1244 countryIndex = i; 1245 break; 1246 } 1247 } 1248 if (countryIndex < 0) { 1249 // we didn't find a match on the country, but we had a match on the language 1250 // check installation of matched language 1251 return (hasResourcesForLanguage(langIndex) ? TTS_LANG_AVAILABLE : TTS_LANG_MISSING_DATA); 1252 } else { 1253 // we have a match on the language and the country 1254 langIndex = countryIndex; 1255 // check installation of matched language + country 1256 return (hasResourcesForLanguage(langIndex) ? TTS_LANG_COUNTRY_AVAILABLE : TTS_LANG_MISSING_DATA); 1257 } 1258 1259 // no variants supported in this library, TTS_LANG_COUNTRY_VAR_AVAILABLE cannot be returned. 1260 } 1261 1262 1263 /** getLanguage 1264 * Get the currently loaded language - if any. 1265 * @lang - string with current ISO 3 letter language code, empty string if no loaded language. 1266 * @country - string with current ISO 3 letter country code, empty string if no loaded language. 1267 * @variant - string with current language variant, empty string if no loaded language. 1268 * return tts_result 1269 */ 1270 tts_result TtsEngine::getLanguage(char *language, char *country, char *variant) 1271 { 1272 if (picoCurrentLangIndex == -1) { 1273 strcpy(language, "\0"); 1274 strcpy(country, "\0"); 1275 strcpy(variant, "\0"); 1276 } else { 1277 strcpy(language, picoSupportedLangIso3[picoCurrentLangIndex]); 1278 strcpy(country, picoSupportedCountryIso3[picoCurrentLangIndex]); 1279 // no variant in this implementation 1280 strcpy(variant, "\0"); 1281 } 1282 return TTS_SUCCESS; 1283 } 1284 1285 1286 /** setAudioFormat 1287 * sets the audio format to use for synthesis, returns what is actually used. 1288 * @encoding - reference to encoding format 1289 * @rate - reference to sample rate 1290 * @channels - reference to number of channels 1291 * return tts_result 1292 * */ 1293 tts_result TtsEngine::setAudioFormat(tts_audio_format& encoding, uint32_t& rate, 1294 int& channels) 1295 { 1296 // ignore the input parameters, the enforced audio parameters are fixed here 1297 encoding = TTS_AUDIO_FORMAT_PCM_16_BIT; 1298 rate = 16000; 1299 channels = 1; 1300 return TTS_SUCCESS; 1301 } 1302 1303 1304 /** setProperty 1305 * Set property. The supported properties are: language, rate, pitch and volume. 1306 * @property - name of property to set 1307 * @value - value to set 1308 * @size - size of value 1309 * return tts_result 1310 */ 1311 tts_result TtsEngine::setProperty( const char * property, const char * value, const size_t size ) 1312 { 1313 int rate; 1314 int pitch; 1315 int volume; 1316 1317 /* Set a specific property for the engine. 1318 Supported properties include: language (locale), rate, pitch, volume. */ 1319 /* Sanity check */ 1320 if (property == NULL) { 1321 ALOGE("setProperty called with property NULL"); 1322 return TTS_PROPERTY_UNSUPPORTED; 1323 } 1324 1325 if (value == NULL) { 1326 ALOGE("setProperty called with value NULL"); 1327 return TTS_VALUE_INVALID; 1328 } 1329 1330 if (strncmp(property, "language", 8) == 0) { 1331 /* Verify it's in correct format. */ 1332 if (strlen(value) != 2 && strlen(value) != 6) { 1333 ALOGE("change language called with incorrect format"); 1334 return TTS_VALUE_INVALID; 1335 } 1336 1337 /* Try to switch to specified language. */ 1338 if (doLanguageSwitch(value) == TTS_FAILURE) { 1339 ALOGE("failed to load language"); 1340 return TTS_FAILURE; 1341 } else { 1342 return TTS_SUCCESS; 1343 } 1344 } else if (strncmp(property, "rate", 4) == 0) { 1345 rate = atoi(value); 1346 if (rate < PICO_MIN_RATE) { 1347 rate = PICO_MIN_RATE; 1348 } 1349 if (rate > PICO_MAX_RATE) { 1350 rate = PICO_MAX_RATE; 1351 } 1352 picoProp_currRate = rate; 1353 return TTS_SUCCESS; 1354 } else if (strncmp(property, "pitch", 5) == 0) { 1355 pitch = atoi(value); 1356 if (pitch < PICO_MIN_PITCH) { 1357 pitch = PICO_MIN_PITCH; 1358 } 1359 if (pitch > PICO_MAX_PITCH) { 1360 pitch = PICO_MAX_PITCH; 1361 } 1362 picoProp_currPitch = pitch; 1363 return TTS_SUCCESS; 1364 } else if (strncmp(property, "volume", 6) == 0) { 1365 volume = atoi(value); 1366 if (volume < PICO_MIN_VOLUME) { 1367 volume = PICO_MIN_VOLUME; 1368 } 1369 if (volume > PICO_MAX_VOLUME) { 1370 volume = PICO_MAX_VOLUME; 1371 } 1372 picoProp_currVolume = volume; 1373 return TTS_SUCCESS; 1374 } 1375 1376 return TTS_PROPERTY_UNSUPPORTED; 1377 } 1378 1379 1380 /** getProperty 1381 * Get the property. Supported properties are: language, rate, pitch and volume. 1382 * @property - name of property to get 1383 * @value - buffer which will receive value of property 1384 * @iosize - size of value - if size is too small on return this will contain actual size needed 1385 * return tts_result 1386 */ 1387 tts_result TtsEngine::getProperty( const char * property, char * value, size_t * iosize ) 1388 { 1389 /* Get the property for the engine. 1390 This property was previously set by setProperty or by default. */ 1391 /* sanity check */ 1392 if (property == NULL) { 1393 ALOGE("getProperty called with property NULL"); 1394 return TTS_PROPERTY_UNSUPPORTED; 1395 } 1396 1397 if (value == NULL) { 1398 ALOGE("getProperty called with value NULL"); 1399 return TTS_VALUE_INVALID; 1400 } 1401 1402 if (strncmp(property, "language", 8) == 0) { 1403 if (picoProp_currLang == NULL) { 1404 strcpy(value, ""); 1405 } else { 1406 if (*iosize < strlen(picoProp_currLang)+1) { 1407 *iosize = strlen(picoProp_currLang) + 1; 1408 return TTS_PROPERTY_SIZE_TOO_SMALL; 1409 } 1410 strcpy(value, picoProp_currLang); 1411 } 1412 return TTS_SUCCESS; 1413 } else if (strncmp(property, "rate", 4) == 0) { 1414 char tmprate[4]; 1415 sprintf(tmprate, "%d", picoProp_currRate); 1416 if (*iosize < strlen(tmprate)+1) { 1417 *iosize = strlen(tmprate) + 1; 1418 return TTS_PROPERTY_SIZE_TOO_SMALL; 1419 } 1420 strcpy(value, tmprate); 1421 return TTS_SUCCESS; 1422 } else if (strncmp(property, "pitch", 5) == 0) { 1423 char tmppitch[4]; 1424 sprintf(tmppitch, "%d", picoProp_currPitch); 1425 if (*iosize < strlen(tmppitch)+1) { 1426 *iosize = strlen(tmppitch) + 1; 1427 return TTS_PROPERTY_SIZE_TOO_SMALL; 1428 } 1429 strcpy(value, tmppitch); 1430 return TTS_SUCCESS; 1431 } else if (strncmp(property, "volume", 6) == 0) { 1432 char tmpvol[4]; 1433 sprintf(tmpvol, "%d", picoProp_currVolume); 1434 if (*iosize < strlen(tmpvol)+1) { 1435 *iosize = strlen(tmpvol) + 1; 1436 return TTS_PROPERTY_SIZE_TOO_SMALL; 1437 } 1438 strcpy(value, tmpvol); 1439 return TTS_SUCCESS; 1440 } 1441 1442 /* Unknown property */ 1443 ALOGE("Unsupported property"); 1444 return TTS_PROPERTY_UNSUPPORTED; 1445 } 1446 1447 1448 /** synthesizeText 1449 * Synthesizes a text string. 1450 * The text string could be annotated with SSML tags. 1451 * @text - text to synthesize 1452 * @buffer - buffer which will receive generated samples 1453 * @bufferSize - size of buffer 1454 * @userdata - pointer to user data which will be passed back to callback function 1455 * return tts_result 1456 */ 1457 tts_result TtsEngine::synthesizeText( const char * text, int8_t * buffer, size_t bufferSize, void * userdata ) 1458 { 1459 int err; 1460 int cbret; 1461 pico_Char * inp = NULL; 1462 char * expanded_text = NULL; 1463 pico_Char * local_text = NULL; 1464 short outbuf[MAX_OUTBUF_SIZE/2]; 1465 pico_Int16 bytes_sent, bytes_recv, text_remaining, out_data_type; 1466 pico_Status ret; 1467 SvoxSsmlParser * parser = NULL; 1468 1469 picoSynthAbort = 0; 1470 if (text == NULL) { 1471 ALOGE("synthesizeText called with NULL string"); 1472 return TTS_FAILURE; 1473 } 1474 1475 if (strlen(text) == 0) { 1476 return TTS_SUCCESS; 1477 } 1478 1479 if (buffer == NULL) { 1480 ALOGE("synthesizeText called with NULL buffer"); 1481 return TTS_FAILURE; 1482 } 1483 1484 if ( (strncmp(text, "<speak", 6) == 0) || (strncmp(text, "<?xml", 5) == 0) ) { 1485 /* SSML input */ 1486 parser = new SvoxSsmlParser(); 1487 if (parser && parser->initSuccessful()) { 1488 err = parser->parseDocument(text, 1); 1489 if (err == XML_STATUS_ERROR) { 1490 /* Note: for some reason expat always thinks the input document has an error 1491 at the end, even when the XML document is perfectly formed */ 1492 ALOGI("Warning: SSML document parsed with errors"); 1493 } 1494 char * parsed_text = parser->getParsedDocument(); 1495 if (parsed_text) { 1496 /* Add property tags to the string - if any. */ 1497 local_text = (pico_Char *) doAddProperties( parsed_text ); 1498 if (!local_text) { 1499 ALOGE("Failed to allocate memory for text string"); 1500 delete parser; 1501 return TTS_FAILURE; 1502 } 1503 char * lang = parser->getParsedDocumentLanguage(); 1504 if (lang != NULL) { 1505 if (doLanguageSwitch(lang) == TTS_FAILURE) { 1506 ALOGE("Failed to switch to language (%s) specified in SSML document.", lang); 1507 delete parser; 1508 return TTS_FAILURE; 1509 } 1510 } else { 1511 // lang is NULL, pick a language so the synthesis can be performed 1512 if (picoCurrentLangIndex == -1) { 1513 // no current language loaded, pick the first one and load it 1514 if (doLanguageSwitchFromLangIndex(0) == TTS_FAILURE) { 1515 ALOGE("Failed to switch to default language."); 1516 delete parser; 1517 return TTS_FAILURE; 1518 } 1519 } 1520 //ALOGI("No language in SSML, using current language (%s).", picoProp_currLang); 1521 } 1522 delete parser; 1523 } else { 1524 ALOGE("Failed to parse SSML document"); 1525 delete parser; 1526 return TTS_FAILURE; 1527 } 1528 } else { 1529 ALOGE("Failed to create SSML parser"); 1530 if (parser) { 1531 delete parser; 1532 } 1533 return TTS_FAILURE; 1534 } 1535 } else { 1536 /* camelCase pre-processing */ 1537 expanded_text = doCamelCase(text); 1538 /* Add property tags to the string - if any. */ 1539 local_text = (pico_Char *) doAddProperties( expanded_text ); 1540 if (expanded_text) { 1541 free( expanded_text ); 1542 } 1543 if (!local_text) { 1544 ALOGE("Failed to allocate memory for text string"); 1545 return TTS_FAILURE; 1546 } 1547 } 1548 1549 text_remaining = strlen((const char *) local_text) + 1; 1550 1551 inp = (pico_Char *) local_text; 1552 1553 size_t bufused = 0; 1554 1555 /* synthesis loop */ 1556 while (text_remaining) { 1557 if (picoSynthAbort) { 1558 ret = pico_resetEngine( picoEngine, PICO_RESET_SOFT ); 1559 break; 1560 } 1561 1562 /* Feed the text into the engine. */ 1563 ret = pico_putTextUtf8( picoEngine, inp, text_remaining, &bytes_sent ); 1564 if (ret != PICO_OK) { 1565 ALOGE("Error synthesizing string '%s': [%d]", text, ret); 1566 if (local_text) { 1567 free( local_text ); 1568 } 1569 return TTS_FAILURE; 1570 } 1571 1572 text_remaining -= bytes_sent; 1573 inp += bytes_sent; 1574 do { 1575 if (picoSynthAbort) { 1576 ret = pico_resetEngine( picoEngine, PICO_RESET_SOFT ); 1577 break; 1578 } 1579 /* Retrieve the samples and add them to the buffer. */ 1580 ret = pico_getData( picoEngine, (void *) outbuf, MAX_OUTBUF_SIZE, &bytes_recv, 1581 &out_data_type ); 1582 if (bytes_recv) { 1583 if ((bufused + bytes_recv) <= bufferSize) { 1584 memcpy(buffer+bufused, (int8_t *) outbuf, bytes_recv); 1585 bufused += bytes_recv; 1586 } else { 1587 /* The buffer filled; pass this on to the callback function. */ 1588 cbret = picoSynthDoneCBPtr(userdata, 16000, TTS_AUDIO_FORMAT_PCM_16_BIT, 1, buffer, 1589 bufused, TTS_SYNTH_PENDING); 1590 if (cbret == TTS_CALLBACK_HALT) { 1591 ALOGI("Halt requested by caller. Halting."); 1592 picoSynthAbort = 1; 1593 ret = pico_resetEngine( picoEngine, PICO_RESET_SOFT ); 1594 break; 1595 } 1596 bufused = 0; 1597 memcpy(buffer, (int8_t *) outbuf, bytes_recv); 1598 bufused += bytes_recv; 1599 } 1600 } 1601 } while (PICO_STEP_BUSY == ret); 1602 1603 /* This chunk of synthesis is finished; pass the remaining samples. 1604 Use 16 KHz, 16-bit samples. */ 1605 if (!picoSynthAbort) { 1606 picoSynthDoneCBPtr( userdata, 16000, TTS_AUDIO_FORMAT_PCM_16_BIT, 1, buffer, bufused, 1607 TTS_SYNTH_PENDING); 1608 } 1609 picoSynthAbort = 0; 1610 1611 if (ret != PICO_STEP_IDLE) { 1612 if (ret != 0){ 1613 ALOGE("Error occurred during synthesis [%d]", ret); 1614 } 1615 if (local_text) { 1616 free(local_text); 1617 } 1618 ALOGV("Synth loop: sending TTS_SYNTH_DONE after error"); 1619 picoSynthDoneCBPtr( userdata, 16000, TTS_AUDIO_FORMAT_PCM_16_BIT, 1, buffer, bufused, 1620 TTS_SYNTH_DONE); 1621 pico_resetEngine( picoEngine, PICO_RESET_SOFT ); 1622 return TTS_FAILURE; 1623 } 1624 } 1625 1626 /* Synthesis is done; notify the caller */ 1627 ALOGV("Synth loop: sending TTS_SYNTH_DONE after all done, or was asked to stop"); 1628 picoSynthDoneCBPtr( userdata, 16000, TTS_AUDIO_FORMAT_PCM_16_BIT, 1, buffer, bufused, 1629 TTS_SYNTH_DONE); 1630 1631 if (local_text) { 1632 free( local_text ); 1633 } 1634 return TTS_SUCCESS; 1635 } 1636 1637 1638 1639 /** stop 1640 * Aborts the running synthesis. 1641 * return tts_result 1642 */ 1643 tts_result TtsEngine::stop( void ) 1644 { 1645 picoSynthAbort = 1; 1646 return TTS_SUCCESS; 1647 } 1648 1649 1650 #ifdef __cplusplus 1651 extern "C" { 1652 #endif 1653 1654 TtsEngine * getTtsEngine( void ) 1655 { 1656 return new TtsEngine(); 1657 } 1658 1659 #ifdef __cplusplus 1660 } 1661 #endif 1662