Home | History | Annotate | Download | only in speech
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "content/browser/speech/audio_encoder.h"
      6 
      7 #include "base/basictypes.h"
      8 #include "base/logging.h"
      9 #include "base/memory/scoped_ptr.h"
     10 #include "base/stl_util.h"
     11 #include "base/strings/string_number_conversions.h"
     12 #include "content/browser/speech/audio_buffer.h"
     13 #include "third_party/flac/include/FLAC/stream_encoder.h"
     14 #include "third_party/speex/include/speex/speex.h"
     15 
     16 namespace content {
     17 namespace {
     18 
     19 //-------------------------------- FLACEncoder ---------------------------------
     20 
     21 const char* const kContentTypeFLAC = "audio/x-flac; rate=";
     22 const int kFLACCompressionLevel = 0;  // 0 for speed
     23 
     24 class FLACEncoder : public AudioEncoder {
     25  public:
     26   FLACEncoder(int sampling_rate, int bits_per_sample);
     27   virtual ~FLACEncoder();
     28   virtual void Encode(const AudioChunk& raw_audio) OVERRIDE;
     29   virtual void Flush() OVERRIDE;
     30 
     31  private:
     32   static FLAC__StreamEncoderWriteStatus WriteCallback(
     33       const FLAC__StreamEncoder* encoder,
     34       const FLAC__byte buffer[],
     35       size_t bytes,
     36       unsigned samples,
     37       unsigned current_frame,
     38       void* client_data);
     39 
     40   FLAC__StreamEncoder* encoder_;
     41   bool is_encoder_initialized_;
     42 
     43   DISALLOW_COPY_AND_ASSIGN(FLACEncoder);
     44 };
     45 
     46 FLAC__StreamEncoderWriteStatus FLACEncoder::WriteCallback(
     47     const FLAC__StreamEncoder* encoder,
     48     const FLAC__byte buffer[],
     49     size_t bytes,
     50     unsigned samples,
     51     unsigned current_frame,
     52     void* client_data) {
     53   FLACEncoder* me = static_cast<FLACEncoder*>(client_data);
     54   DCHECK(me->encoder_ == encoder);
     55   me->encoded_audio_buffer_.Enqueue(buffer, bytes);
     56   return FLAC__STREAM_ENCODER_WRITE_STATUS_OK;
     57 }
     58 
     59 FLACEncoder::FLACEncoder(int sampling_rate, int bits_per_sample)
     60     : AudioEncoder(std::string(kContentTypeFLAC) +
     61                    base::IntToString(sampling_rate),
     62                    bits_per_sample),
     63       encoder_(FLAC__stream_encoder_new()),
     64       is_encoder_initialized_(false) {
     65   FLAC__stream_encoder_set_channels(encoder_, 1);
     66   FLAC__stream_encoder_set_bits_per_sample(encoder_, bits_per_sample);
     67   FLAC__stream_encoder_set_sample_rate(encoder_, sampling_rate);
     68   FLAC__stream_encoder_set_compression_level(encoder_, kFLACCompressionLevel);
     69 
     70   // Initializing the encoder will cause sync bytes to be written to
     71   // its output stream, so we wait until the first call to this method
     72   // before doing so.
     73 }
     74 
     75 FLACEncoder::~FLACEncoder() {
     76   FLAC__stream_encoder_delete(encoder_);
     77 }
     78 
     79 void FLACEncoder::Encode(const AudioChunk& raw_audio) {
     80   DCHECK_EQ(raw_audio.bytes_per_sample(), 2);
     81   if (!is_encoder_initialized_) {
     82     const FLAC__StreamEncoderInitStatus encoder_status =
     83         FLAC__stream_encoder_init_stream(encoder_, WriteCallback, NULL, NULL,
     84                                          NULL, this);
     85     DCHECK_EQ(encoder_status, FLAC__STREAM_ENCODER_INIT_STATUS_OK);
     86     is_encoder_initialized_ = true;
     87   }
     88 
     89   // FLAC encoder wants samples as int32s.
     90   const int num_samples = raw_audio.NumSamples();
     91   scoped_ptr<FLAC__int32[]> flac_samples(new FLAC__int32[num_samples]);
     92   FLAC__int32* flac_samples_ptr = flac_samples.get();
     93   for (int i = 0; i < num_samples; ++i)
     94     flac_samples_ptr[i] = static_cast<FLAC__int32>(raw_audio.GetSample16(i));
     95 
     96   FLAC__stream_encoder_process(encoder_, &flac_samples_ptr, num_samples);
     97 }
     98 
     99 void FLACEncoder::Flush() {
    100   FLAC__stream_encoder_finish(encoder_);
    101 }
    102 
    103 //-------------------------------- SpeexEncoder --------------------------------
    104 
    105 const char* const kContentTypeSpeex = "audio/x-speex-with-header-byte; rate=";
    106 const int kSpeexEncodingQuality = 8;
    107 const int kMaxSpeexFrameLength = 110;  // (44kbps rate sampled at 32kHz).
    108 
    109 // Since the frame length gets written out as a byte in the encoded packet,
    110 // make sure it is within the byte range.
    111 COMPILE_ASSERT(kMaxSpeexFrameLength <= 0xFF, invalidLength);
    112 
    113 class SpeexEncoder : public AudioEncoder {
    114  public:
    115   explicit SpeexEncoder(int sampling_rate, int bits_per_sample);
    116   virtual ~SpeexEncoder();
    117   virtual void Encode(const AudioChunk& raw_audio) OVERRIDE;
    118   virtual void Flush() OVERRIDE {}
    119 
    120  private:
    121   void* encoder_state_;
    122   SpeexBits bits_;
    123   int samples_per_frame_;
    124   char encoded_frame_data_[kMaxSpeexFrameLength + 1];  // +1 for the frame size.
    125   DISALLOW_COPY_AND_ASSIGN(SpeexEncoder);
    126 };
    127 
    128 SpeexEncoder::SpeexEncoder(int sampling_rate, int bits_per_sample)
    129     : AudioEncoder(std::string(kContentTypeSpeex) +
    130                    base::IntToString(sampling_rate),
    131                    bits_per_sample) {
    132    // speex_bits_init() does not initialize all of the |bits_| struct.
    133    memset(&bits_, 0, sizeof(bits_));
    134    speex_bits_init(&bits_);
    135    encoder_state_ = speex_encoder_init(&speex_wb_mode);
    136    DCHECK(encoder_state_);
    137    speex_encoder_ctl(encoder_state_, SPEEX_GET_FRAME_SIZE, &samples_per_frame_);
    138    DCHECK(samples_per_frame_ > 0);
    139    int quality = kSpeexEncodingQuality;
    140    speex_encoder_ctl(encoder_state_, SPEEX_SET_QUALITY, &quality);
    141    int vbr = 1;
    142    speex_encoder_ctl(encoder_state_, SPEEX_SET_VBR, &vbr);
    143    memset(encoded_frame_data_, 0, sizeof(encoded_frame_data_));
    144 }
    145 
    146 SpeexEncoder::~SpeexEncoder() {
    147   speex_bits_destroy(&bits_);
    148   speex_encoder_destroy(encoder_state_);
    149 }
    150 
    151 void SpeexEncoder::Encode(const AudioChunk& raw_audio) {
    152   spx_int16_t* src_buffer =
    153       const_cast<spx_int16_t*>(raw_audio.SamplesData16());
    154   int num_samples = raw_audio.NumSamples();
    155   // Drop incomplete frames, typically those which come in when recording stops.
    156   num_samples -= (num_samples % samples_per_frame_);
    157   for (int i = 0; i < num_samples; i += samples_per_frame_) {
    158     speex_bits_reset(&bits_);
    159     speex_encode_int(encoder_state_, src_buffer + i, &bits_);
    160 
    161     // Encode the frame and place the size of the frame as the first byte. This
    162     // is the packet format for MIME type x-speex-with-header-byte.
    163     int frame_length = speex_bits_write(&bits_, encoded_frame_data_ + 1,
    164                                         kMaxSpeexFrameLength);
    165     encoded_frame_data_[0] = static_cast<char>(frame_length);
    166     encoded_audio_buffer_.Enqueue(
    167         reinterpret_cast<uint8*>(&encoded_frame_data_[0]), frame_length + 1);
    168   }
    169 }
    170 
    171 }  // namespace
    172 
    173 AudioEncoder* AudioEncoder::Create(Codec codec,
    174                                    int sampling_rate,
    175                                    int bits_per_sample) {
    176   if (codec == CODEC_FLAC)
    177     return new FLACEncoder(sampling_rate, bits_per_sample);
    178   return new SpeexEncoder(sampling_rate, bits_per_sample);
    179 }
    180 
    181 AudioEncoder::AudioEncoder(const std::string& mime_type, int bits_per_sample)
    182     : encoded_audio_buffer_(1), /* Byte granularity of encoded samples. */
    183       mime_type_(mime_type),
    184       bits_per_sample_(bits_per_sample) {
    185 }
    186 
    187 AudioEncoder::~AudioEncoder() {
    188 }
    189 
    190 scoped_refptr<AudioChunk> AudioEncoder::GetEncodedDataAndClear() {
    191   return encoded_audio_buffer_.DequeueAll();
    192 }
    193 
    194 }  // namespace content
    195