1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "media/mp2t/es_parser_adts.h" 6 7 #include <list> 8 9 #include "base/basictypes.h" 10 #include "base/logging.h" 11 #include "base/strings/string_number_conversions.h" 12 #include "media/base/audio_timestamp_helper.h" 13 #include "media/base/bit_reader.h" 14 #include "media/base/buffers.h" 15 #include "media/base/channel_layout.h" 16 #include "media/base/stream_parser_buffer.h" 17 #include "media/mp2t/mp2t_common.h" 18 19 // Adts header is at least 7 bytes (can be 9 bytes). 20 static const int kAdtsHeaderMinSize = 7; 21 22 static const int adts_frequency_table[16] = { 23 96000, 24 88200, 25 64000, 26 48000, 27 44100, 28 32000, 29 24000, 30 22050, 31 16000, 32 12000, 33 11025, 34 8000, 35 7350, 36 0, 37 0, 38 0, 39 }; 40 static const int kMaxSupportedFrequencyIndex = 12; 41 42 static media::ChannelLayout adts_channel_layout[8] = { 43 media::CHANNEL_LAYOUT_NONE, 44 media::CHANNEL_LAYOUT_MONO, 45 media::CHANNEL_LAYOUT_STEREO, 46 media::CHANNEL_LAYOUT_SURROUND, 47 media::CHANNEL_LAYOUT_4_0, 48 media::CHANNEL_LAYOUT_5_0_BACK, 49 media::CHANNEL_LAYOUT_5_1_BACK, 50 media::CHANNEL_LAYOUT_7_1, 51 }; 52 53 // Number of samples per frame. 54 static const int kNumberSamplesPerAACFrame = 1024; 55 56 static int ExtractAdtsFrameSize(const uint8* adts_header) { 57 return ((static_cast<int>(adts_header[5]) >> 5) | 58 (static_cast<int>(adts_header[4]) << 3) | 59 ((static_cast<int>(adts_header[3]) & 0x3) << 11)); 60 } 61 62 static int ExtractAdtsFrequencyIndex(const uint8* adts_header) { 63 return ((adts_header[2] >> 2) & 0xf); 64 } 65 66 static int ExtractAdtsChannelConfig(const uint8* adts_header) { 67 return (((adts_header[3] >> 6) & 0x3) | 68 ((adts_header[2] & 0x1) << 2)); 69 } 70 71 // Return true if buf corresponds to an ADTS syncword. 72 // |buf| size must be at least 2. 73 static bool isAdtsSyncWord(const uint8* buf) { 74 return (buf[0] == 0xff) && ((buf[1] & 0xf6) == 0xf0); 75 } 76 77 // Look for an ADTS syncword. 78 // |new_pos| returns 79 // - either the byte position of the ADTS frame (if found) 80 // - or the byte position of 1st byte that was not processed (if not found). 81 // In every case, the returned value in |new_pos| is such that new_pos >= pos 82 // |frame_sz| returns the size of the ADTS frame (if found). 83 // Return whether a syncword was found. 84 static bool LookForSyncWord(const uint8* raw_es, int raw_es_size, 85 int pos, 86 int* new_pos, int* frame_sz) { 87 DCHECK_GE(pos, 0); 88 DCHECK_LE(pos, raw_es_size); 89 90 int max_offset = raw_es_size - kAdtsHeaderMinSize; 91 if (pos >= max_offset) { 92 // Do not change the position if: 93 // - max_offset < 0: not enough bytes to get a full header 94 // Since pos >= 0, this is a subcase of the next condition. 95 // - pos >= max_offset: might be the case after reading one full frame, 96 // |pos| is then incremented by the frame size and might then point 97 // to the end of the buffer. 98 *new_pos = pos; 99 return false; 100 } 101 102 for (int offset = pos; offset < max_offset; offset++) { 103 const uint8* cur_buf = &raw_es[offset]; 104 105 if (!isAdtsSyncWord(cur_buf)) 106 // The first 12 bits must be 1. 107 // The layer field (2 bits) must be set to 0. 108 continue; 109 110 int frame_size = ExtractAdtsFrameSize(cur_buf); 111 if (frame_size < kAdtsHeaderMinSize) { 112 // Too short to be an ADTS frame. 113 continue; 114 } 115 116 // Check whether there is another frame 117 // |size| apart from the current one. 118 int remaining_size = raw_es_size - offset; 119 if (remaining_size >= frame_size + 2 && 120 !isAdtsSyncWord(&cur_buf[frame_size])) { 121 continue; 122 } 123 124 *new_pos = offset; 125 *frame_sz = frame_size; 126 return true; 127 } 128 129 *new_pos = max_offset; 130 return false; 131 } 132 133 namespace media { 134 namespace mp2t { 135 136 EsParserAdts::EsParserAdts( 137 const NewAudioConfigCB& new_audio_config_cb, 138 const EmitBufferCB& emit_buffer_cb, 139 bool sbr_in_mimetype) 140 : new_audio_config_cb_(new_audio_config_cb), 141 emit_buffer_cb_(emit_buffer_cb), 142 sbr_in_mimetype_(sbr_in_mimetype) { 143 } 144 145 EsParserAdts::~EsParserAdts() { 146 } 147 148 bool EsParserAdts::Parse(const uint8* buf, int size, 149 base::TimeDelta pts, 150 base::TimeDelta dts) { 151 int raw_es_size; 152 const uint8* raw_es; 153 154 // The incoming PTS applies to the access unit that comes just after 155 // the beginning of |buf|. 156 if (pts != kNoTimestamp()) { 157 es_byte_queue_.Peek(&raw_es, &raw_es_size); 158 pts_list_.push_back(EsPts(raw_es_size, pts)); 159 } 160 161 // Copy the input data to the ES buffer. 162 es_byte_queue_.Push(buf, size); 163 es_byte_queue_.Peek(&raw_es, &raw_es_size); 164 165 // Look for every ADTS frame in the ES buffer starting at offset = 0 166 int es_position = 0; 167 int frame_size; 168 while (LookForSyncWord(raw_es, raw_es_size, es_position, 169 &es_position, &frame_size)) { 170 DVLOG(LOG_LEVEL_ES) 171 << "ADTS syncword @ pos=" << es_position 172 << " frame_size=" << frame_size; 173 DVLOG(LOG_LEVEL_ES) 174 << "ADTS header: " 175 << base::HexEncode(&raw_es[es_position], kAdtsHeaderMinSize); 176 177 // Do not process the frame if this one is a partial frame. 178 int remaining_size = raw_es_size - es_position; 179 if (frame_size > remaining_size) 180 break; 181 182 // Update the audio configuration if needed. 183 DCHECK_GE(frame_size, kAdtsHeaderMinSize); 184 if (!UpdateAudioConfiguration(&raw_es[es_position])) 185 return false; 186 187 // Get the PTS & the duration of this access unit. 188 while (!pts_list_.empty() && 189 pts_list_.front().first <= es_position) { 190 audio_timestamp_helper_->SetBaseTimestamp(pts_list_.front().second); 191 pts_list_.pop_front(); 192 } 193 194 base::TimeDelta current_pts = audio_timestamp_helper_->GetTimestamp(); 195 base::TimeDelta frame_duration = 196 audio_timestamp_helper_->GetFrameDuration(kNumberSamplesPerAACFrame); 197 198 // Emit an audio frame. 199 bool is_key_frame = true; 200 scoped_refptr<StreamParserBuffer> stream_parser_buffer = 201 StreamParserBuffer::CopyFrom( 202 &raw_es[es_position], 203 frame_size, 204 is_key_frame); 205 stream_parser_buffer->SetDecodeTimestamp(current_pts); 206 stream_parser_buffer->set_timestamp(current_pts); 207 stream_parser_buffer->set_duration(frame_duration); 208 emit_buffer_cb_.Run(stream_parser_buffer); 209 210 // Update the PTS of the next frame. 211 audio_timestamp_helper_->AddFrames(kNumberSamplesPerAACFrame); 212 213 // Skip the current frame. 214 es_position += frame_size; 215 } 216 217 // Discard all the bytes that have been processed. 218 DiscardEs(es_position); 219 220 return true; 221 } 222 223 void EsParserAdts::Flush() { 224 } 225 226 void EsParserAdts::Reset() { 227 es_byte_queue_.Reset(); 228 pts_list_.clear(); 229 last_audio_decoder_config_ = AudioDecoderConfig(); 230 } 231 232 bool EsParserAdts::UpdateAudioConfiguration(const uint8* adts_header) { 233 int frequency_index = ExtractAdtsFrequencyIndex(adts_header); 234 if (frequency_index > kMaxSupportedFrequencyIndex) { 235 // Frequency index 13 & 14 are reserved 236 // while 15 means that the frequency is explicitly written 237 // (not supported). 238 return false; 239 } 240 241 int channel_configuration = ExtractAdtsChannelConfig(adts_header); 242 if (channel_configuration == 0) { 243 // TODO(damienv): Add support for inband channel configuration. 244 return false; 245 } 246 247 // TODO(damienv): support HE-AAC frequency doubling (SBR) 248 // based on the incoming ADTS profile. 249 int samples_per_second = adts_frequency_table[frequency_index]; 250 int adts_profile = (adts_header[2] >> 6) & 0x3; 251 252 // The following code is written according to ISO 14496 Part 3 Table 1.11 and 253 // Table 1.22. (Table 1.11 refers to the capping to 48000, Table 1.22 refers 254 // to SBR doubling the AAC sample rate.) 255 // TODO(damienv) : Extend sample rate cap to 96kHz for Level 5 content. 256 int extended_samples_per_second = sbr_in_mimetype_ 257 ? std::min(2 * samples_per_second, 48000) 258 : samples_per_second; 259 260 AudioDecoderConfig audio_decoder_config( 261 kCodecAAC, 262 kSampleFormatS16, 263 adts_channel_layout[channel_configuration], 264 extended_samples_per_second, 265 NULL, 0, 266 false); 267 268 if (!audio_decoder_config.Matches(last_audio_decoder_config_)) { 269 DVLOG(1) << "Sampling frequency: " << samples_per_second; 270 DVLOG(1) << "Extended sampling frequency: " << extended_samples_per_second; 271 DVLOG(1) << "Channel config: " << channel_configuration; 272 DVLOG(1) << "Adts profile: " << adts_profile; 273 // Reset the timestamp helper to use a new time scale. 274 if (audio_timestamp_helper_) { 275 base::TimeDelta base_timestamp = audio_timestamp_helper_->GetTimestamp(); 276 audio_timestamp_helper_.reset( 277 new AudioTimestampHelper(samples_per_second)); 278 audio_timestamp_helper_->SetBaseTimestamp(base_timestamp); 279 } else { 280 audio_timestamp_helper_.reset( 281 new AudioTimestampHelper(samples_per_second)); 282 } 283 // Audio config notification. 284 last_audio_decoder_config_ = audio_decoder_config; 285 new_audio_config_cb_.Run(audio_decoder_config); 286 } 287 288 return true; 289 } 290 291 void EsParserAdts::DiscardEs(int nbytes) { 292 DCHECK_GE(nbytes, 0); 293 if (nbytes <= 0) 294 return; 295 296 // Adjust the ES position of each PTS. 297 for (EsPtsList::iterator it = pts_list_.begin(); it != pts_list_.end(); ++it) 298 it->first -= nbytes; 299 300 // Discard |nbytes| of ES. 301 es_byte_queue_.Pop(nbytes); 302 } 303 304 } // namespace mp2t 305 } // namespace media 306 307