Home | History | Annotate | Download | only in tts
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
      5  * use this file except in compliance with the License. You may obtain a copy of
      6  * the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
     12  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
     13  * License for the specific language governing permissions and limitations under
     14  * the License.
     15  */
     16 package android.speech.tts;
     17 
     18 import android.annotation.IntDef;
     19 import android.annotation.IntRange;
     20 import android.media.AudioFormat;
     21 
     22 import java.lang.annotation.Retention;
     23 import java.lang.annotation.RetentionPolicy;
     24 
     25 /**
     26  * A callback to return speech data synthesized by a text to speech engine.
     27  *
     28  * The engine can provide streaming audio by calling
     29  * {@link #start}, then {@link #audioAvailable} until all audio has been provided, then finally
     30  * {@link #done}.
     31  *
     32  * {@link #error} can be called at any stage in the synthesis process to
     33  * indicate that an error has occurred, but if the call is made after a call
     34  * to {@link #done}, it might be discarded.
     35  *
     36  * {@link #done} must be called at the end of synthesis, regardless of errors.
     37  *
     38  * All methods can be only called on the synthesis thread.
     39  */
     40 public interface SynthesisCallback {
     41 
     42     /** @hide */
     43     @Retention(RetentionPolicy.SOURCE)
     44     @IntDef({
     45         AudioFormat.ENCODING_PCM_8BIT,
     46         AudioFormat.ENCODING_PCM_16BIT,
     47         AudioFormat.ENCODING_PCM_FLOAT
     48     })
     49     @interface SupportedAudioFormat {};
     50 
     51     /**
     52      * @return the maximum number of bytes that the TTS engine can pass in a single call of {@link
     53      *     #audioAvailable}. Calls to {@link #audioAvailable} with data lengths larger than this
     54      *     value will not succeed.
     55      */
     56     int getMaxBufferSize();
     57 
     58   /**
     59    * The service should call this when it starts to synthesize audio for this request.
     60    *
     61    * <p>This method should only be called on the synthesis thread, while in {@link
     62    * TextToSpeechService#onSynthesizeText}.
     63    *
     64    * @param sampleRateInHz Sample rate in HZ of the generated audio.
     65    * @param audioFormat Audio format of the generated audio. Must be one of {@link
     66    *     AudioFormat#ENCODING_PCM_8BIT} or {@link AudioFormat#ENCODING_PCM_16BIT}. Can also be
     67    *     {@link AudioFormat#ENCODING_PCM_FLOAT} when targetting Android N and above.
     68    * @param channelCount The number of channels. Must be {@code 1} or {@code 2}.
     69    * @return {@link android.speech.tts.TextToSpeech#SUCCESS}, {@link
     70    *     android.speech.tts.TextToSpeech#ERROR} or {@link android.speech.tts.TextToSpeech#STOPPED}.
     71    */
     72   int start(
     73       int sampleRateInHz,
     74       @SupportedAudioFormat int audioFormat,
     75       @IntRange(from = 1, to = 2) int channelCount);
     76 
     77   /**
     78    * The service should call this method when synthesized audio is ready for consumption.
     79    *
     80    * <p>This method should only be called on the synthesis thread, while in {@link
     81    * TextToSpeechService#onSynthesizeText}.
     82    *
     83    * @param buffer The generated audio data. This method will not hold on to {@code buffer}, so the
     84    *     caller is free to modify it after this method returns.
     85    * @param offset The offset into {@code buffer} where the audio data starts.
     86    * @param length The number of bytes of audio data in {@code buffer}. This must be less than or
     87    *     equal to the return value of {@link #getMaxBufferSize}.
     88    * @return {@link android.speech.tts.TextToSpeech#SUCCESS}, {@link
     89    *     android.speech.tts.TextToSpeech#ERROR} or {@link android.speech.tts.TextToSpeech#STOPPED}.
     90    */
     91   int audioAvailable(byte[] buffer, int offset, int length);
     92 
     93   /**
     94    * The service should call this method when all the synthesized audio for a request has been
     95    * passed to {@link #audioAvailable}.
     96    *
     97    * <p>This method should only be called on the synthesis thread, while in {@link
     98    * TextToSpeechService#onSynthesizeText}.
     99    *
    100    * <p>This method has to be called if {@link #start} and/or {@link #error} was called.
    101    *
    102    * @return {@link android.speech.tts.TextToSpeech#SUCCESS}, {@link
    103    *     android.speech.tts.TextToSpeech#ERROR} or {@link android.speech.tts.TextToSpeech#STOPPED}.
    104    */
    105   int done();
    106 
    107     /**
    108      * The service should call this method if the speech synthesis fails.
    109      *
    110      * <p>This method should only be called on the synthesis thread, while in {@link
    111      * TextToSpeechService#onSynthesizeText}.
    112      */
    113     void error();
    114 
    115   /**
    116    * The service should call this method if the speech synthesis fails.
    117    *
    118    * <p>This method should only be called on the synthesis thread, while in {@link
    119    * TextToSpeechService#onSynthesizeText}.
    120    *
    121    * @param errorCode Error code to pass to the client. One of the ERROR_ values from {@link
    122    *     android.speech.tts.TextToSpeech}
    123    */
    124   void error(@TextToSpeech.Error int errorCode);
    125 
    126     /**
    127      * Check if {@link #start} was called or not.
    128      *
    129      * <p>This method should only be called on the synthesis thread, while in {@link
    130      * TextToSpeechService#onSynthesizeText}.
    131      *
    132      * <p>Useful for checking if a fallback from network request is possible.
    133      */
    134     boolean hasStarted();
    135 
    136     /**
    137      * Check if {@link #done} was called or not.
    138      *
    139      * <p>This method should only be called on the synthesis thread, while in {@link
    140      * TextToSpeechService#onSynthesizeText}.
    141      *
    142      * <p>Useful for checking if a fallback from network request is possible.
    143      */
    144     boolean hasFinished();
    145 
    146     /**
    147      * The service may call this method to provide timing information about the spoken text.
    148      *
    149      * <p>Calling this method means that at the given audio frame, the given range of the input is
    150      * about to be spoken. If this method is called the client will receive a callback on the
    151      * listener ({@link UtteranceProgressListener#onRangeStart}) at the moment that frame has been
    152      * reached by the playback head.
    153      *
    154      * <p>This information can be used by the client, for example, to highlight ranges of the text
    155      * while it is spoken.
    156      *
    157      * <p>The markerInFrames is a frame index into the audio for this synthesis request, i.e. into
    158      * the concatenation of the audio bytes sent to audioAvailable for this synthesis request. The
    159      * definition of a frame depends on the format given by {@link #start}. See {@link AudioFormat}
    160      * for more information.
    161      *
    162      * <p>This method should only be called on the synthesis thread, while in {@link
    163      * TextToSpeechService#onSynthesizeText}.
    164      *
    165      * @param markerInFrames The position in frames in the audio where this range is spoken.
    166      * @param start The start index of the range in the input text.
    167      * @param end The end index (exclusive) of the range in the input text.
    168      */
    169     default void rangeStart(int markerInFrames, int start, int end) {}
    170 }
    171