1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "content/browser/speech/audio_encoder.h" 6 7 #include "base/basictypes.h" 8 #include "base/logging.h" 9 #include "base/memory/scoped_ptr.h" 10 #include "base/stl_util.h" 11 #include "base/strings/string_number_conversions.h" 12 #include "content/browser/speech/audio_buffer.h" 13 #include "third_party/flac/include/FLAC/stream_encoder.h" 14 #include "third_party/speex/include/speex/speex.h" 15 16 namespace content { 17 namespace { 18 19 //-------------------------------- FLACEncoder --------------------------------- 20 21 const char* const kContentTypeFLAC = "audio/x-flac; rate="; 22 const int kFLACCompressionLevel = 0; // 0 for speed 23 24 class FLACEncoder : public AudioEncoder { 25 public: 26 FLACEncoder(int sampling_rate, int bits_per_sample); 27 virtual ~FLACEncoder(); 28 virtual void Encode(const AudioChunk& raw_audio) OVERRIDE; 29 virtual void Flush() OVERRIDE; 30 31 private: 32 static FLAC__StreamEncoderWriteStatus WriteCallback( 33 const FLAC__StreamEncoder* encoder, 34 const FLAC__byte buffer[], 35 size_t bytes, 36 unsigned samples, 37 unsigned current_frame, 38 void* client_data); 39 40 FLAC__StreamEncoder* encoder_; 41 bool is_encoder_initialized_; 42 43 DISALLOW_COPY_AND_ASSIGN(FLACEncoder); 44 }; 45 46 FLAC__StreamEncoderWriteStatus FLACEncoder::WriteCallback( 47 const FLAC__StreamEncoder* encoder, 48 const FLAC__byte buffer[], 49 size_t bytes, 50 unsigned samples, 51 unsigned current_frame, 52 void* client_data) { 53 FLACEncoder* me = static_cast<FLACEncoder*>(client_data); 54 DCHECK(me->encoder_ == encoder); 55 me->encoded_audio_buffer_.Enqueue(buffer, bytes); 56 return FLAC__STREAM_ENCODER_WRITE_STATUS_OK; 57 } 58 59 FLACEncoder::FLACEncoder(int sampling_rate, int bits_per_sample) 60 : AudioEncoder(std::string(kContentTypeFLAC) + 61 base::IntToString(sampling_rate), 62 bits_per_sample), 63 encoder_(FLAC__stream_encoder_new()), 64 is_encoder_initialized_(false) { 65 FLAC__stream_encoder_set_channels(encoder_, 1); 66 FLAC__stream_encoder_set_bits_per_sample(encoder_, bits_per_sample); 67 FLAC__stream_encoder_set_sample_rate(encoder_, sampling_rate); 68 FLAC__stream_encoder_set_compression_level(encoder_, kFLACCompressionLevel); 69 70 // Initializing the encoder will cause sync bytes to be written to 71 // its output stream, so we wait until the first call to this method 72 // before doing so. 73 } 74 75 FLACEncoder::~FLACEncoder() { 76 FLAC__stream_encoder_delete(encoder_); 77 } 78 79 void FLACEncoder::Encode(const AudioChunk& raw_audio) { 80 DCHECK_EQ(raw_audio.bytes_per_sample(), 2); 81 if (!is_encoder_initialized_) { 82 const FLAC__StreamEncoderInitStatus encoder_status = 83 FLAC__stream_encoder_init_stream(encoder_, WriteCallback, NULL, NULL, 84 NULL, this); 85 DCHECK_EQ(encoder_status, FLAC__STREAM_ENCODER_INIT_STATUS_OK); 86 is_encoder_initialized_ = true; 87 } 88 89 // FLAC encoder wants samples as int32s. 90 const int num_samples = raw_audio.NumSamples(); 91 scoped_ptr<FLAC__int32[]> flac_samples(new FLAC__int32[num_samples]); 92 FLAC__int32* flac_samples_ptr = flac_samples.get(); 93 for (int i = 0; i < num_samples; ++i) 94 flac_samples_ptr[i] = static_cast<FLAC__int32>(raw_audio.GetSample16(i)); 95 96 FLAC__stream_encoder_process(encoder_, &flac_samples_ptr, num_samples); 97 } 98 99 void FLACEncoder::Flush() { 100 FLAC__stream_encoder_finish(encoder_); 101 } 102 103 //-------------------------------- SpeexEncoder -------------------------------- 104 105 const char* const kContentTypeSpeex = "audio/x-speex-with-header-byte; rate="; 106 const int kSpeexEncodingQuality = 8; 107 const int kMaxSpeexFrameLength = 110; // (44kbps rate sampled at 32kHz). 108 109 // Since the frame length gets written out as a byte in the encoded packet, 110 // make sure it is within the byte range. 111 COMPILE_ASSERT(kMaxSpeexFrameLength <= 0xFF, invalidLength); 112 113 class SpeexEncoder : public AudioEncoder { 114 public: 115 explicit SpeexEncoder(int sampling_rate, int bits_per_sample); 116 virtual ~SpeexEncoder(); 117 virtual void Encode(const AudioChunk& raw_audio) OVERRIDE; 118 virtual void Flush() OVERRIDE {} 119 120 private: 121 void* encoder_state_; 122 SpeexBits bits_; 123 int samples_per_frame_; 124 char encoded_frame_data_[kMaxSpeexFrameLength + 1]; // +1 for the frame size. 125 DISALLOW_COPY_AND_ASSIGN(SpeexEncoder); 126 }; 127 128 SpeexEncoder::SpeexEncoder(int sampling_rate, int bits_per_sample) 129 : AudioEncoder(std::string(kContentTypeSpeex) + 130 base::IntToString(sampling_rate), 131 bits_per_sample) { 132 // speex_bits_init() does not initialize all of the |bits_| struct. 133 memset(&bits_, 0, sizeof(bits_)); 134 speex_bits_init(&bits_); 135 encoder_state_ = speex_encoder_init(&speex_wb_mode); 136 DCHECK(encoder_state_); 137 speex_encoder_ctl(encoder_state_, SPEEX_GET_FRAME_SIZE, &samples_per_frame_); 138 DCHECK(samples_per_frame_ > 0); 139 int quality = kSpeexEncodingQuality; 140 speex_encoder_ctl(encoder_state_, SPEEX_SET_QUALITY, &quality); 141 int vbr = 1; 142 speex_encoder_ctl(encoder_state_, SPEEX_SET_VBR, &vbr); 143 memset(encoded_frame_data_, 0, sizeof(encoded_frame_data_)); 144 } 145 146 SpeexEncoder::~SpeexEncoder() { 147 speex_bits_destroy(&bits_); 148 speex_encoder_destroy(encoder_state_); 149 } 150 151 void SpeexEncoder::Encode(const AudioChunk& raw_audio) { 152 spx_int16_t* src_buffer = 153 const_cast<spx_int16_t*>(raw_audio.SamplesData16()); 154 int num_samples = raw_audio.NumSamples(); 155 // Drop incomplete frames, typically those which come in when recording stops. 156 num_samples -= (num_samples % samples_per_frame_); 157 for (int i = 0; i < num_samples; i += samples_per_frame_) { 158 speex_bits_reset(&bits_); 159 speex_encode_int(encoder_state_, src_buffer + i, &bits_); 160 161 // Encode the frame and place the size of the frame as the first byte. This 162 // is the packet format for MIME type x-speex-with-header-byte. 163 int frame_length = speex_bits_write(&bits_, encoded_frame_data_ + 1, 164 kMaxSpeexFrameLength); 165 encoded_frame_data_[0] = static_cast<char>(frame_length); 166 encoded_audio_buffer_.Enqueue( 167 reinterpret_cast<uint8*>(&encoded_frame_data_[0]), frame_length + 1); 168 } 169 } 170 171 } // namespace 172 173 AudioEncoder* AudioEncoder::Create(Codec codec, 174 int sampling_rate, 175 int bits_per_sample) { 176 if (codec == CODEC_FLAC) 177 return new FLACEncoder(sampling_rate, bits_per_sample); 178 return new SpeexEncoder(sampling_rate, bits_per_sample); 179 } 180 181 AudioEncoder::AudioEncoder(const std::string& mime_type, int bits_per_sample) 182 : encoded_audio_buffer_(1), /* Byte granularity of encoded samples. */ 183 mime_type_(mime_type), 184 bits_per_sample_(bits_per_sample) { 185 } 186 187 AudioEncoder::~AudioEncoder() { 188 } 189 190 scoped_refptr<AudioChunk> AudioEncoder::GetEncodedDataAndClear() { 191 return encoded_audio_buffer_.DequeueAll(); 192 } 193 194 } // namespace content 195