1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 //#define LOG_NDEBUG 0 18 #define LOG_TAG "avc_utils" 19 #include <utils/Log.h> 20 21 #include "include/avc_utils.h" 22 23 #include <media/stagefright/foundation/ABitReader.h> 24 #include <media/stagefright/foundation/ADebug.h> 25 #include <media/stagefright/MediaDefs.h> 26 #include <media/stagefright/MediaErrors.h> 27 #include <media/stagefright/MetaData.h> 28 29 namespace android { 30 31 unsigned parseUE(ABitReader *br) { 32 unsigned numZeroes = 0; 33 while (br->getBits(1) == 0) { 34 ++numZeroes; 35 } 36 37 unsigned x = br->getBits(numZeroes); 38 39 return x + (1u << numZeroes) - 1; 40 } 41 42 // Determine video dimensions from the sequence parameterset. 43 void FindAVCDimensions( 44 const sp<ABuffer> &seqParamSet, int32_t *width, int32_t *height) { 45 ABitReader br(seqParamSet->data() + 1, seqParamSet->size() - 1); 46 47 unsigned profile_idc = br.getBits(8); 48 br.skipBits(16); 49 parseUE(&br); // seq_parameter_set_id 50 51 unsigned chroma_format_idc = 1; // 4:2:0 chroma format 52 53 if (profile_idc == 100 || profile_idc == 110 54 || profile_idc == 122 || profile_idc == 244 55 || profile_idc == 44 || profile_idc == 83 || profile_idc == 86) { 56 chroma_format_idc = parseUE(&br); 57 if (chroma_format_idc == 3) { 58 br.skipBits(1); // residual_colour_transform_flag 59 } 60 parseUE(&br); // bit_depth_luma_minus8 61 parseUE(&br); // bit_depth_chroma_minus8 62 br.skipBits(1); // qpprime_y_zero_transform_bypass_flag 63 CHECK_EQ(br.getBits(1), 0u); // seq_scaling_matrix_present_flag 64 } 65 66 parseUE(&br); // log2_max_frame_num_minus4 67 unsigned pic_order_cnt_type = parseUE(&br); 68 69 if (pic_order_cnt_type == 0) { 70 parseUE(&br); // log2_max_pic_order_cnt_lsb_minus4 71 } else if (pic_order_cnt_type == 1) { 72 // offset_for_non_ref_pic, offset_for_top_to_bottom_field and 73 // offset_for_ref_frame are technically se(v), but since we are 74 // just skipping over them the midpoint does not matter. 75 76 br.getBits(1); // delta_pic_order_always_zero_flag 77 parseUE(&br); // offset_for_non_ref_pic 78 parseUE(&br); // offset_for_top_to_bottom_field 79 80 unsigned num_ref_frames_in_pic_order_cnt_cycle = parseUE(&br); 81 for (unsigned i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; ++i) { 82 parseUE(&br); // offset_for_ref_frame 83 } 84 } 85 86 parseUE(&br); // num_ref_frames 87 br.getBits(1); // gaps_in_frame_num_value_allowed_flag 88 89 unsigned pic_width_in_mbs_minus1 = parseUE(&br); 90 unsigned pic_height_in_map_units_minus1 = parseUE(&br); 91 unsigned frame_mbs_only_flag = br.getBits(1); 92 93 *width = pic_width_in_mbs_minus1 * 16 + 16; 94 95 *height = (2 - frame_mbs_only_flag) 96 * (pic_height_in_map_units_minus1 * 16 + 16); 97 98 if (!frame_mbs_only_flag) { 99 br.getBits(1); // mb_adaptive_frame_field_flag 100 } 101 102 br.getBits(1); // direct_8x8_inference_flag 103 104 if (br.getBits(1)) { // frame_cropping_flag 105 unsigned frame_crop_left_offset = parseUE(&br); 106 unsigned frame_crop_right_offset = parseUE(&br); 107 unsigned frame_crop_top_offset = parseUE(&br); 108 unsigned frame_crop_bottom_offset = parseUE(&br); 109 110 unsigned cropUnitX, cropUnitY; 111 if (chroma_format_idc == 0 /* monochrome */) { 112 cropUnitX = 1; 113 cropUnitY = 2 - frame_mbs_only_flag; 114 } else { 115 unsigned subWidthC = (chroma_format_idc == 3) ? 1 : 2; 116 unsigned subHeightC = (chroma_format_idc == 1) ? 2 : 1; 117 118 cropUnitX = subWidthC; 119 cropUnitY = subHeightC * (2 - frame_mbs_only_flag); 120 } 121 122 LOGV("frame_crop = (%u, %u, %u, %u), cropUnitX = %u, cropUnitY = %u", 123 frame_crop_left_offset, frame_crop_right_offset, 124 frame_crop_top_offset, frame_crop_bottom_offset, 125 cropUnitX, cropUnitY); 126 127 *width -= 128 (frame_crop_left_offset + frame_crop_right_offset) * cropUnitX; 129 *height -= 130 (frame_crop_top_offset + frame_crop_bottom_offset) * cropUnitY; 131 } 132 } 133 134 status_t getNextNALUnit( 135 const uint8_t **_data, size_t *_size, 136 const uint8_t **nalStart, size_t *nalSize, 137 bool startCodeFollows) { 138 const uint8_t *data = *_data; 139 size_t size = *_size; 140 141 *nalStart = NULL; 142 *nalSize = 0; 143 144 if (size == 0) { 145 return -EAGAIN; 146 } 147 148 // Skip any number of leading 0x00. 149 150 size_t offset = 0; 151 while (offset < size && data[offset] == 0x00) { 152 ++offset; 153 } 154 155 if (offset == size) { 156 return -EAGAIN; 157 } 158 159 // A valid startcode consists of at least two 0x00 bytes followed by 0x01. 160 161 if (offset < 2 || data[offset] != 0x01) { 162 return ERROR_MALFORMED; 163 } 164 165 ++offset; 166 167 size_t startOffset = offset; 168 169 for (;;) { 170 while (offset < size && data[offset] != 0x01) { 171 ++offset; 172 } 173 174 if (offset == size) { 175 if (startCodeFollows) { 176 offset = size + 2; 177 break; 178 } 179 180 return -EAGAIN; 181 } 182 183 if (data[offset - 1] == 0x00 && data[offset - 2] == 0x00) { 184 break; 185 } 186 187 ++offset; 188 } 189 190 size_t endOffset = offset - 2; 191 while (endOffset > startOffset + 1 && data[endOffset - 1] == 0x00) { 192 --endOffset; 193 } 194 195 *nalStart = &data[startOffset]; 196 *nalSize = endOffset - startOffset; 197 198 if (offset + 2 < size) { 199 *_data = &data[offset - 2]; 200 *_size = size - offset + 2; 201 } else { 202 *_data = NULL; 203 *_size = 0; 204 } 205 206 return OK; 207 } 208 209 static sp<ABuffer> FindNAL( 210 const uint8_t *data, size_t size, unsigned nalType, 211 size_t *stopOffset) { 212 const uint8_t *nalStart; 213 size_t nalSize; 214 while (getNextNALUnit(&data, &size, &nalStart, &nalSize, true) == OK) { 215 if ((nalStart[0] & 0x1f) == nalType) { 216 sp<ABuffer> buffer = new ABuffer(nalSize); 217 memcpy(buffer->data(), nalStart, nalSize); 218 return buffer; 219 } 220 } 221 222 return NULL; 223 } 224 225 const char *AVCProfileToString(uint8_t profile) { 226 switch (profile) { 227 case kAVCProfileBaseline: 228 return "Baseline"; 229 case kAVCProfileMain: 230 return "Main"; 231 case kAVCProfileExtended: 232 return "Extended"; 233 case kAVCProfileHigh: 234 return "High"; 235 case kAVCProfileHigh10: 236 return "High 10"; 237 case kAVCProfileHigh422: 238 return "High 422"; 239 case kAVCProfileHigh444: 240 return "High 444"; 241 case kAVCProfileCAVLC444Intra: 242 return "CAVLC 444 Intra"; 243 default: return "Unknown"; 244 } 245 } 246 247 sp<MetaData> MakeAVCCodecSpecificData(const sp<ABuffer> &accessUnit) { 248 const uint8_t *data = accessUnit->data(); 249 size_t size = accessUnit->size(); 250 251 sp<ABuffer> seqParamSet = FindNAL(data, size, 7, NULL); 252 if (seqParamSet == NULL) { 253 return NULL; 254 } 255 256 int32_t width, height; 257 FindAVCDimensions(seqParamSet, &width, &height); 258 259 size_t stopOffset; 260 sp<ABuffer> picParamSet = FindNAL(data, size, 8, &stopOffset); 261 CHECK(picParamSet != NULL); 262 263 size_t csdSize = 264 1 + 3 + 1 + 1 265 + 2 * 1 + seqParamSet->size() 266 + 1 + 2 * 1 + picParamSet->size(); 267 268 sp<ABuffer> csd = new ABuffer(csdSize); 269 uint8_t *out = csd->data(); 270 271 *out++ = 0x01; // configurationVersion 272 memcpy(out, seqParamSet->data() + 1, 3); // profile/level... 273 274 uint8_t profile = out[0]; 275 uint8_t level = out[2]; 276 277 out += 3; 278 *out++ = (0x3f << 2) | 1; // lengthSize == 2 bytes 279 *out++ = 0xe0 | 1; 280 281 *out++ = seqParamSet->size() >> 8; 282 *out++ = seqParamSet->size() & 0xff; 283 memcpy(out, seqParamSet->data(), seqParamSet->size()); 284 out += seqParamSet->size(); 285 286 *out++ = 1; 287 288 *out++ = picParamSet->size() >> 8; 289 *out++ = picParamSet->size() & 0xff; 290 memcpy(out, picParamSet->data(), picParamSet->size()); 291 292 #if 0 293 LOGI("AVC seq param set"); 294 hexdump(seqParamSet->data(), seqParamSet->size()); 295 #endif 296 297 sp<MetaData> meta = new MetaData; 298 meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_AVC); 299 300 meta->setData(kKeyAVCC, kTypeAVCC, csd->data(), csd->size()); 301 meta->setInt32(kKeyWidth, width); 302 meta->setInt32(kKeyHeight, height); 303 304 LOGI("found AVC codec config (%d x %d, %s-profile level %d.%d)", 305 width, height, AVCProfileToString(profile), level / 10, level % 10); 306 307 return meta; 308 } 309 310 bool IsIDR(const sp<ABuffer> &buffer) { 311 const uint8_t *data = buffer->data(); 312 size_t size = buffer->size(); 313 314 bool foundIDR = false; 315 316 const uint8_t *nalStart; 317 size_t nalSize; 318 while (getNextNALUnit(&data, &size, &nalStart, &nalSize, true) == OK) { 319 CHECK_GT(nalSize, 0u); 320 321 unsigned nalType = nalStart[0] & 0x1f; 322 323 if (nalType == 5) { 324 foundIDR = true; 325 break; 326 } 327 } 328 329 return foundIDR; 330 } 331 332 bool IsAVCReferenceFrame(const sp<ABuffer> &accessUnit) { 333 const uint8_t *data = accessUnit->data(); 334 size_t size = accessUnit->size(); 335 336 const uint8_t *nalStart; 337 size_t nalSize; 338 while (getNextNALUnit(&data, &size, &nalStart, &nalSize, true) == OK) { 339 CHECK_GT(nalSize, 0u); 340 341 unsigned nalType = nalStart[0] & 0x1f; 342 343 if (nalType == 5) { 344 return true; 345 } else if (nalType == 1) { 346 unsigned nal_ref_idc = (nalStart[0] >> 5) & 3; 347 return nal_ref_idc != 0; 348 } 349 } 350 351 return true; 352 } 353 354 sp<MetaData> MakeAACCodecSpecificData( 355 unsigned profile, unsigned sampling_freq_index, 356 unsigned channel_configuration) { 357 sp<MetaData> meta = new MetaData; 358 meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_AAC); 359 360 CHECK_LE(sampling_freq_index, 11u); 361 static const int32_t kSamplingFreq[] = { 362 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 363 16000, 12000, 11025, 8000 364 }; 365 meta->setInt32(kKeySampleRate, kSamplingFreq[sampling_freq_index]); 366 meta->setInt32(kKeyChannelCount, channel_configuration); 367 368 static const uint8_t kStaticESDS[] = { 369 0x03, 22, 370 0x00, 0x00, // ES_ID 371 0x00, // streamDependenceFlag, URL_Flag, OCRstreamFlag 372 373 0x04, 17, 374 0x40, // Audio ISO/IEC 14496-3 375 0x00, 0x00, 0x00, 0x00, 376 0x00, 0x00, 0x00, 0x00, 377 0x00, 0x00, 0x00, 0x00, 378 379 0x05, 2, 380 // AudioSpecificInfo follows 381 382 // oooo offf fccc c000 383 // o - audioObjectType 384 // f - samplingFreqIndex 385 // c - channelConfig 386 }; 387 sp<ABuffer> csd = new ABuffer(sizeof(kStaticESDS) + 2); 388 memcpy(csd->data(), kStaticESDS, sizeof(kStaticESDS)); 389 390 csd->data()[sizeof(kStaticESDS)] = 391 ((profile + 1) << 3) | (sampling_freq_index >> 1); 392 393 csd->data()[sizeof(kStaticESDS) + 1] = 394 ((sampling_freq_index << 7) & 0x80) | (channel_configuration << 3); 395 396 meta->setData(kKeyESDS, 0, csd->data(), csd->size()); 397 398 return meta; 399 } 400 401 bool ExtractDimensionsFromVOLHeader( 402 const uint8_t *data, size_t size, int32_t *width, int32_t *height) { 403 ABitReader br(&data[4], size - 4); 404 br.skipBits(1); // random_accessible_vol 405 unsigned video_object_type_indication = br.getBits(8); 406 407 CHECK_NE(video_object_type_indication, 408 0x21u /* Fine Granularity Scalable */); 409 410 unsigned video_object_layer_verid; 411 unsigned video_object_layer_priority; 412 if (br.getBits(1)) { 413 video_object_layer_verid = br.getBits(4); 414 video_object_layer_priority = br.getBits(3); 415 } 416 unsigned aspect_ratio_info = br.getBits(4); 417 if (aspect_ratio_info == 0x0f /* extended PAR */) { 418 br.skipBits(8); // par_width 419 br.skipBits(8); // par_height 420 } 421 if (br.getBits(1)) { // vol_control_parameters 422 br.skipBits(2); // chroma_format 423 br.skipBits(1); // low_delay 424 if (br.getBits(1)) { // vbv_parameters 425 br.skipBits(15); // first_half_bit_rate 426 CHECK(br.getBits(1)); // marker_bit 427 br.skipBits(15); // latter_half_bit_rate 428 CHECK(br.getBits(1)); // marker_bit 429 br.skipBits(15); // first_half_vbv_buffer_size 430 CHECK(br.getBits(1)); // marker_bit 431 br.skipBits(3); // latter_half_vbv_buffer_size 432 br.skipBits(11); // first_half_vbv_occupancy 433 CHECK(br.getBits(1)); // marker_bit 434 br.skipBits(15); // latter_half_vbv_occupancy 435 CHECK(br.getBits(1)); // marker_bit 436 } 437 } 438 unsigned video_object_layer_shape = br.getBits(2); 439 CHECK_EQ(video_object_layer_shape, 0x00u /* rectangular */); 440 441 CHECK(br.getBits(1)); // marker_bit 442 unsigned vop_time_increment_resolution = br.getBits(16); 443 CHECK(br.getBits(1)); // marker_bit 444 445 if (br.getBits(1)) { // fixed_vop_rate 446 // range [0..vop_time_increment_resolution) 447 448 // vop_time_increment_resolution 449 // 2 => 0..1, 1 bit 450 // 3 => 0..2, 2 bits 451 // 4 => 0..3, 2 bits 452 // 5 => 0..4, 3 bits 453 // ... 454 455 CHECK_GT(vop_time_increment_resolution, 0u); 456 --vop_time_increment_resolution; 457 458 unsigned numBits = 0; 459 while (vop_time_increment_resolution > 0) { 460 ++numBits; 461 vop_time_increment_resolution >>= 1; 462 } 463 464 br.skipBits(numBits); // fixed_vop_time_increment 465 } 466 467 CHECK(br.getBits(1)); // marker_bit 468 unsigned video_object_layer_width = br.getBits(13); 469 CHECK(br.getBits(1)); // marker_bit 470 unsigned video_object_layer_height = br.getBits(13); 471 CHECK(br.getBits(1)); // marker_bit 472 473 unsigned interlaced = br.getBits(1); 474 475 *width = video_object_layer_width; 476 *height = video_object_layer_height; 477 478 return true; 479 } 480 481 bool GetMPEGAudioFrameSize( 482 uint32_t header, size_t *frame_size, 483 int *out_sampling_rate, int *out_channels, 484 int *out_bitrate, int *out_num_samples) { 485 *frame_size = 0; 486 487 if (out_sampling_rate) { 488 *out_sampling_rate = 0; 489 } 490 491 if (out_channels) { 492 *out_channels = 0; 493 } 494 495 if (out_bitrate) { 496 *out_bitrate = 0; 497 } 498 499 if (out_num_samples) { 500 *out_num_samples = 1152; 501 } 502 503 if ((header & 0xffe00000) != 0xffe00000) { 504 return false; 505 } 506 507 unsigned version = (header >> 19) & 3; 508 509 if (version == 0x01) { 510 return false; 511 } 512 513 unsigned layer = (header >> 17) & 3; 514 515 if (layer == 0x00) { 516 return false; 517 } 518 519 unsigned protection = (header >> 16) & 1; 520 521 unsigned bitrate_index = (header >> 12) & 0x0f; 522 523 if (bitrate_index == 0 || bitrate_index == 0x0f) { 524 // Disallow "free" bitrate. 525 return false; 526 } 527 528 unsigned sampling_rate_index = (header >> 10) & 3; 529 530 if (sampling_rate_index == 3) { 531 return false; 532 } 533 534 static const int kSamplingRateV1[] = { 44100, 48000, 32000 }; 535 int sampling_rate = kSamplingRateV1[sampling_rate_index]; 536 if (version == 2 /* V2 */) { 537 sampling_rate /= 2; 538 } else if (version == 0 /* V2.5 */) { 539 sampling_rate /= 4; 540 } 541 542 unsigned padding = (header >> 9) & 1; 543 544 if (layer == 3) { 545 // layer I 546 547 static const int kBitrateV1[] = { 548 32, 64, 96, 128, 160, 192, 224, 256, 549 288, 320, 352, 384, 416, 448 550 }; 551 552 static const int kBitrateV2[] = { 553 32, 48, 56, 64, 80, 96, 112, 128, 554 144, 160, 176, 192, 224, 256 555 }; 556 557 int bitrate = 558 (version == 3 /* V1 */) 559 ? kBitrateV1[bitrate_index - 1] 560 : kBitrateV2[bitrate_index - 1]; 561 562 if (out_bitrate) { 563 *out_bitrate = bitrate; 564 } 565 566 *frame_size = (12000 * bitrate / sampling_rate + padding) * 4; 567 568 if (out_num_samples) { 569 *out_num_samples = 384; 570 } 571 } else { 572 // layer II or III 573 574 static const int kBitrateV1L2[] = { 575 32, 48, 56, 64, 80, 96, 112, 128, 576 160, 192, 224, 256, 320, 384 577 }; 578 579 static const int kBitrateV1L3[] = { 580 32, 40, 48, 56, 64, 80, 96, 112, 581 128, 160, 192, 224, 256, 320 582 }; 583 584 static const int kBitrateV2[] = { 585 8, 16, 24, 32, 40, 48, 56, 64, 586 80, 96, 112, 128, 144, 160 587 }; 588 589 int bitrate; 590 if (version == 3 /* V1 */) { 591 bitrate = (layer == 2 /* L2 */) 592 ? kBitrateV1L2[bitrate_index - 1] 593 : kBitrateV1L3[bitrate_index - 1]; 594 595 if (out_num_samples) { 596 *out_num_samples = 1152; 597 } 598 } else { 599 // V2 (or 2.5) 600 601 bitrate = kBitrateV2[bitrate_index - 1]; 602 if (out_num_samples) { 603 *out_num_samples = 576; 604 } 605 } 606 607 if (out_bitrate) { 608 *out_bitrate = bitrate; 609 } 610 611 if (version == 3 /* V1 */) { 612 *frame_size = 144000 * bitrate / sampling_rate + padding; 613 } else { 614 // V2 or V2.5 615 *frame_size = 72000 * bitrate / sampling_rate + padding; 616 } 617 } 618 619 if (out_sampling_rate) { 620 *out_sampling_rate = sampling_rate; 621 } 622 623 if (out_channels) { 624 int channel_mode = (header >> 6) & 3; 625 626 *out_channels = (channel_mode == 3) ? 1 : 2; 627 } 628 629 return true; 630 } 631 632 } // namespace android 633 634