1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 //#define LOG_NDEBUG 0 18 #define LOG_TAG "avc_utils" 19 #include <utils/Log.h> 20 21 #include "include/avc_utils.h" 22 23 #include <media/stagefright/foundation/ABitReader.h> 24 #include <media/stagefright/foundation/ADebug.h> 25 #include <media/stagefright/foundation/hexdump.h> 26 #include <media/stagefright/MediaDefs.h> 27 #include <media/stagefright/MediaErrors.h> 28 #include <media/stagefright/MetaData.h> 29 30 namespace android { 31 32 unsigned parseUE(ABitReader *br) { 33 unsigned numZeroes = 0; 34 while (br->getBits(1) == 0) { 35 ++numZeroes; 36 } 37 38 unsigned x = br->getBits(numZeroes); 39 40 return x + (1u << numZeroes) - 1; 41 } 42 43 // Determine video dimensions from the sequence parameterset. 44 void FindAVCDimensions( 45 const sp<ABuffer> &seqParamSet, 46 int32_t *width, int32_t *height, 47 int32_t *sarWidth, int32_t *sarHeight) { 48 ABitReader br(seqParamSet->data() + 1, seqParamSet->size() - 1); 49 50 unsigned profile_idc = br.getBits(8); 51 br.skipBits(16); 52 parseUE(&br); // seq_parameter_set_id 53 54 unsigned chroma_format_idc = 1; // 4:2:0 chroma format 55 56 if (profile_idc == 100 || profile_idc == 110 57 || profile_idc == 122 || profile_idc == 244 58 || profile_idc == 44 || profile_idc == 83 || profile_idc == 86) { 59 chroma_format_idc = parseUE(&br); 60 if (chroma_format_idc == 3) { 61 br.skipBits(1); // residual_colour_transform_flag 62 } 63 parseUE(&br); // bit_depth_luma_minus8 64 parseUE(&br); // bit_depth_chroma_minus8 65 br.skipBits(1); // qpprime_y_zero_transform_bypass_flag 66 CHECK_EQ(br.getBits(1), 0u); // seq_scaling_matrix_present_flag 67 } 68 69 parseUE(&br); // log2_max_frame_num_minus4 70 unsigned pic_order_cnt_type = parseUE(&br); 71 72 if (pic_order_cnt_type == 0) { 73 parseUE(&br); // log2_max_pic_order_cnt_lsb_minus4 74 } else if (pic_order_cnt_type == 1) { 75 // offset_for_non_ref_pic, offset_for_top_to_bottom_field and 76 // offset_for_ref_frame are technically se(v), but since we are 77 // just skipping over them the midpoint does not matter. 78 79 br.getBits(1); // delta_pic_order_always_zero_flag 80 parseUE(&br); // offset_for_non_ref_pic 81 parseUE(&br); // offset_for_top_to_bottom_field 82 83 unsigned num_ref_frames_in_pic_order_cnt_cycle = parseUE(&br); 84 for (unsigned i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; ++i) { 85 parseUE(&br); // offset_for_ref_frame 86 } 87 } 88 89 parseUE(&br); // num_ref_frames 90 br.getBits(1); // gaps_in_frame_num_value_allowed_flag 91 92 unsigned pic_width_in_mbs_minus1 = parseUE(&br); 93 unsigned pic_height_in_map_units_minus1 = parseUE(&br); 94 unsigned frame_mbs_only_flag = br.getBits(1); 95 96 *width = pic_width_in_mbs_minus1 * 16 + 16; 97 98 *height = (2 - frame_mbs_only_flag) 99 * (pic_height_in_map_units_minus1 * 16 + 16); 100 101 if (!frame_mbs_only_flag) { 102 br.getBits(1); // mb_adaptive_frame_field_flag 103 } 104 105 br.getBits(1); // direct_8x8_inference_flag 106 107 if (br.getBits(1)) { // frame_cropping_flag 108 unsigned frame_crop_left_offset = parseUE(&br); 109 unsigned frame_crop_right_offset = parseUE(&br); 110 unsigned frame_crop_top_offset = parseUE(&br); 111 unsigned frame_crop_bottom_offset = parseUE(&br); 112 113 unsigned cropUnitX, cropUnitY; 114 if (chroma_format_idc == 0 /* monochrome */) { 115 cropUnitX = 1; 116 cropUnitY = 2 - frame_mbs_only_flag; 117 } else { 118 unsigned subWidthC = (chroma_format_idc == 3) ? 1 : 2; 119 unsigned subHeightC = (chroma_format_idc == 1) ? 2 : 1; 120 121 cropUnitX = subWidthC; 122 cropUnitY = subHeightC * (2 - frame_mbs_only_flag); 123 } 124 125 ALOGV("frame_crop = (%u, %u, %u, %u), cropUnitX = %u, cropUnitY = %u", 126 frame_crop_left_offset, frame_crop_right_offset, 127 frame_crop_top_offset, frame_crop_bottom_offset, 128 cropUnitX, cropUnitY); 129 130 *width -= 131 (frame_crop_left_offset + frame_crop_right_offset) * cropUnitX; 132 *height -= 133 (frame_crop_top_offset + frame_crop_bottom_offset) * cropUnitY; 134 } 135 136 if (sarWidth != NULL) { 137 *sarWidth = 0; 138 } 139 140 if (sarHeight != NULL) { 141 *sarHeight = 0; 142 } 143 144 if (br.getBits(1)) { // vui_parameters_present_flag 145 unsigned sar_width = 0, sar_height = 0; 146 147 if (br.getBits(1)) { // aspect_ratio_info_present_flag 148 unsigned aspect_ratio_idc = br.getBits(8); 149 150 if (aspect_ratio_idc == 255 /* extendedSAR */) { 151 sar_width = br.getBits(16); 152 sar_height = br.getBits(16); 153 } else if (aspect_ratio_idc > 0 && aspect_ratio_idc < 14) { 154 static const int32_t kFixedSARWidth[] = { 155 1, 12, 10, 16, 40, 24, 20, 32, 80, 18, 15, 64, 160 156 }; 157 158 static const int32_t kFixedSARHeight[] = { 159 1, 11, 11, 11, 33, 11, 11, 11, 33, 11, 11, 33, 99 160 }; 161 162 sar_width = kFixedSARWidth[aspect_ratio_idc - 1]; 163 sar_height = kFixedSARHeight[aspect_ratio_idc - 1]; 164 } 165 } 166 167 ALOGV("sample aspect ratio = %u : %u", sar_width, sar_height); 168 169 if (sarWidth != NULL) { 170 *sarWidth = sar_width; 171 } 172 173 if (sarHeight != NULL) { 174 *sarHeight = sar_height; 175 } 176 } 177 } 178 179 status_t getNextNALUnit( 180 const uint8_t **_data, size_t *_size, 181 const uint8_t **nalStart, size_t *nalSize, 182 bool startCodeFollows) { 183 const uint8_t *data = *_data; 184 size_t size = *_size; 185 186 *nalStart = NULL; 187 *nalSize = 0; 188 189 if (size == 0) { 190 return -EAGAIN; 191 } 192 193 // Skip any number of leading 0x00. 194 195 size_t offset = 0; 196 while (offset < size && data[offset] == 0x00) { 197 ++offset; 198 } 199 200 if (offset == size) { 201 return -EAGAIN; 202 } 203 204 // A valid startcode consists of at least two 0x00 bytes followed by 0x01. 205 206 if (offset < 2 || data[offset] != 0x01) { 207 return ERROR_MALFORMED; 208 } 209 210 ++offset; 211 212 size_t startOffset = offset; 213 214 for (;;) { 215 while (offset < size && data[offset] != 0x01) { 216 ++offset; 217 } 218 219 if (offset == size) { 220 if (startCodeFollows) { 221 offset = size + 2; 222 break; 223 } 224 225 return -EAGAIN; 226 } 227 228 if (data[offset - 1] == 0x00 && data[offset - 2] == 0x00) { 229 break; 230 } 231 232 ++offset; 233 } 234 235 size_t endOffset = offset - 2; 236 while (endOffset > startOffset + 1 && data[endOffset - 1] == 0x00) { 237 --endOffset; 238 } 239 240 *nalStart = &data[startOffset]; 241 *nalSize = endOffset - startOffset; 242 243 if (offset + 2 < size) { 244 *_data = &data[offset - 2]; 245 *_size = size - offset + 2; 246 } else { 247 *_data = NULL; 248 *_size = 0; 249 } 250 251 return OK; 252 } 253 254 static sp<ABuffer> FindNAL( 255 const uint8_t *data, size_t size, unsigned nalType, 256 size_t *stopOffset) { 257 const uint8_t *nalStart; 258 size_t nalSize; 259 while (getNextNALUnit(&data, &size, &nalStart, &nalSize, true) == OK) { 260 if ((nalStart[0] & 0x1f) == nalType) { 261 sp<ABuffer> buffer = new ABuffer(nalSize); 262 memcpy(buffer->data(), nalStart, nalSize); 263 return buffer; 264 } 265 } 266 267 return NULL; 268 } 269 270 const char *AVCProfileToString(uint8_t profile) { 271 switch (profile) { 272 case kAVCProfileBaseline: 273 return "Baseline"; 274 case kAVCProfileMain: 275 return "Main"; 276 case kAVCProfileExtended: 277 return "Extended"; 278 case kAVCProfileHigh: 279 return "High"; 280 case kAVCProfileHigh10: 281 return "High 10"; 282 case kAVCProfileHigh422: 283 return "High 422"; 284 case kAVCProfileHigh444: 285 return "High 444"; 286 case kAVCProfileCAVLC444Intra: 287 return "CAVLC 444 Intra"; 288 default: return "Unknown"; 289 } 290 } 291 292 sp<MetaData> MakeAVCCodecSpecificData(const sp<ABuffer> &accessUnit) { 293 const uint8_t *data = accessUnit->data(); 294 size_t size = accessUnit->size(); 295 296 sp<ABuffer> seqParamSet = FindNAL(data, size, 7, NULL); 297 if (seqParamSet == NULL) { 298 return NULL; 299 } 300 301 int32_t width, height; 302 int32_t sarWidth, sarHeight; 303 FindAVCDimensions( 304 seqParamSet, &width, &height, &sarWidth, &sarHeight); 305 306 size_t stopOffset; 307 sp<ABuffer> picParamSet = FindNAL(data, size, 8, &stopOffset); 308 CHECK(picParamSet != NULL); 309 310 size_t csdSize = 311 1 + 3 + 1 + 1 312 + 2 * 1 + seqParamSet->size() 313 + 1 + 2 * 1 + picParamSet->size(); 314 315 sp<ABuffer> csd = new ABuffer(csdSize); 316 uint8_t *out = csd->data(); 317 318 *out++ = 0x01; // configurationVersion 319 memcpy(out, seqParamSet->data() + 1, 3); // profile/level... 320 321 uint8_t profile = out[0]; 322 uint8_t level = out[2]; 323 324 out += 3; 325 *out++ = (0x3f << 2) | 1; // lengthSize == 2 bytes 326 *out++ = 0xe0 | 1; 327 328 *out++ = seqParamSet->size() >> 8; 329 *out++ = seqParamSet->size() & 0xff; 330 memcpy(out, seqParamSet->data(), seqParamSet->size()); 331 out += seqParamSet->size(); 332 333 *out++ = 1; 334 335 *out++ = picParamSet->size() >> 8; 336 *out++ = picParamSet->size() & 0xff; 337 memcpy(out, picParamSet->data(), picParamSet->size()); 338 339 #if 0 340 ALOGI("AVC seq param set"); 341 hexdump(seqParamSet->data(), seqParamSet->size()); 342 #endif 343 344 sp<MetaData> meta = new MetaData; 345 meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_AVC); 346 347 meta->setData(kKeyAVCC, kTypeAVCC, csd->data(), csd->size()); 348 meta->setInt32(kKeyWidth, width); 349 meta->setInt32(kKeyHeight, height); 350 351 if (sarWidth > 1 || sarHeight > 1) { 352 // We treat 0:0 (unspecified) as 1:1. 353 354 meta->setInt32(kKeySARWidth, sarWidth); 355 meta->setInt32(kKeySARHeight, sarHeight); 356 357 ALOGI("found AVC codec config (%d x %d, %s-profile level %d.%d) " 358 "SAR %d : %d", 359 width, 360 height, 361 AVCProfileToString(profile), 362 level / 10, 363 level % 10, 364 sarWidth, 365 sarHeight); 366 } else { 367 ALOGI("found AVC codec config (%d x %d, %s-profile level %d.%d)", 368 width, 369 height, 370 AVCProfileToString(profile), 371 level / 10, 372 level % 10); 373 } 374 375 return meta; 376 } 377 378 bool IsIDR(const sp<ABuffer> &buffer) { 379 const uint8_t *data = buffer->data(); 380 size_t size = buffer->size(); 381 382 bool foundIDR = false; 383 384 const uint8_t *nalStart; 385 size_t nalSize; 386 while (getNextNALUnit(&data, &size, &nalStart, &nalSize, true) == OK) { 387 CHECK_GT(nalSize, 0u); 388 389 unsigned nalType = nalStart[0] & 0x1f; 390 391 if (nalType == 5) { 392 foundIDR = true; 393 break; 394 } 395 } 396 397 return foundIDR; 398 } 399 400 bool IsAVCReferenceFrame(const sp<ABuffer> &accessUnit) { 401 const uint8_t *data = accessUnit->data(); 402 size_t size = accessUnit->size(); 403 404 const uint8_t *nalStart; 405 size_t nalSize; 406 while (getNextNALUnit(&data, &size, &nalStart, &nalSize, true) == OK) { 407 CHECK_GT(nalSize, 0u); 408 409 unsigned nalType = nalStart[0] & 0x1f; 410 411 if (nalType == 5) { 412 return true; 413 } else if (nalType == 1) { 414 unsigned nal_ref_idc = (nalStart[0] >> 5) & 3; 415 return nal_ref_idc != 0; 416 } 417 } 418 419 return true; 420 } 421 422 sp<MetaData> MakeAACCodecSpecificData( 423 unsigned profile, unsigned sampling_freq_index, 424 unsigned channel_configuration) { 425 sp<MetaData> meta = new MetaData; 426 meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_AAC); 427 428 CHECK_LE(sampling_freq_index, 11u); 429 static const int32_t kSamplingFreq[] = { 430 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 431 16000, 12000, 11025, 8000 432 }; 433 meta->setInt32(kKeySampleRate, kSamplingFreq[sampling_freq_index]); 434 meta->setInt32(kKeyChannelCount, channel_configuration); 435 436 static const uint8_t kStaticESDS[] = { 437 0x03, 22, 438 0x00, 0x00, // ES_ID 439 0x00, // streamDependenceFlag, URL_Flag, OCRstreamFlag 440 441 0x04, 17, 442 0x40, // Audio ISO/IEC 14496-3 443 0x00, 0x00, 0x00, 0x00, 444 0x00, 0x00, 0x00, 0x00, 445 0x00, 0x00, 0x00, 0x00, 446 447 0x05, 2, 448 // AudioSpecificInfo follows 449 450 // oooo offf fccc c000 451 // o - audioObjectType 452 // f - samplingFreqIndex 453 // c - channelConfig 454 }; 455 sp<ABuffer> csd = new ABuffer(sizeof(kStaticESDS) + 2); 456 memcpy(csd->data(), kStaticESDS, sizeof(kStaticESDS)); 457 458 csd->data()[sizeof(kStaticESDS)] = 459 ((profile + 1) << 3) | (sampling_freq_index >> 1); 460 461 csd->data()[sizeof(kStaticESDS) + 1] = 462 ((sampling_freq_index << 7) & 0x80) | (channel_configuration << 3); 463 464 meta->setData(kKeyESDS, 0, csd->data(), csd->size()); 465 466 return meta; 467 } 468 469 bool ExtractDimensionsFromVOLHeader( 470 const uint8_t *data, size_t size, int32_t *width, int32_t *height) { 471 ABitReader br(&data[4], size - 4); 472 br.skipBits(1); // random_accessible_vol 473 unsigned video_object_type_indication = br.getBits(8); 474 475 CHECK_NE(video_object_type_indication, 476 0x21u /* Fine Granularity Scalable */); 477 478 unsigned video_object_layer_verid; 479 unsigned video_object_layer_priority; 480 if (br.getBits(1)) { 481 video_object_layer_verid = br.getBits(4); 482 video_object_layer_priority = br.getBits(3); 483 } 484 unsigned aspect_ratio_info = br.getBits(4); 485 if (aspect_ratio_info == 0x0f /* extended PAR */) { 486 br.skipBits(8); // par_width 487 br.skipBits(8); // par_height 488 } 489 if (br.getBits(1)) { // vol_control_parameters 490 br.skipBits(2); // chroma_format 491 br.skipBits(1); // low_delay 492 if (br.getBits(1)) { // vbv_parameters 493 br.skipBits(15); // first_half_bit_rate 494 CHECK(br.getBits(1)); // marker_bit 495 br.skipBits(15); // latter_half_bit_rate 496 CHECK(br.getBits(1)); // marker_bit 497 br.skipBits(15); // first_half_vbv_buffer_size 498 CHECK(br.getBits(1)); // marker_bit 499 br.skipBits(3); // latter_half_vbv_buffer_size 500 br.skipBits(11); // first_half_vbv_occupancy 501 CHECK(br.getBits(1)); // marker_bit 502 br.skipBits(15); // latter_half_vbv_occupancy 503 CHECK(br.getBits(1)); // marker_bit 504 } 505 } 506 unsigned video_object_layer_shape = br.getBits(2); 507 CHECK_EQ(video_object_layer_shape, 0x00u /* rectangular */); 508 509 CHECK(br.getBits(1)); // marker_bit 510 unsigned vop_time_increment_resolution = br.getBits(16); 511 CHECK(br.getBits(1)); // marker_bit 512 513 if (br.getBits(1)) { // fixed_vop_rate 514 // range [0..vop_time_increment_resolution) 515 516 // vop_time_increment_resolution 517 // 2 => 0..1, 1 bit 518 // 3 => 0..2, 2 bits 519 // 4 => 0..3, 2 bits 520 // 5 => 0..4, 3 bits 521 // ... 522 523 CHECK_GT(vop_time_increment_resolution, 0u); 524 --vop_time_increment_resolution; 525 526 unsigned numBits = 0; 527 while (vop_time_increment_resolution > 0) { 528 ++numBits; 529 vop_time_increment_resolution >>= 1; 530 } 531 532 br.skipBits(numBits); // fixed_vop_time_increment 533 } 534 535 CHECK(br.getBits(1)); // marker_bit 536 unsigned video_object_layer_width = br.getBits(13); 537 CHECK(br.getBits(1)); // marker_bit 538 unsigned video_object_layer_height = br.getBits(13); 539 CHECK(br.getBits(1)); // marker_bit 540 541 unsigned interlaced = br.getBits(1); 542 543 *width = video_object_layer_width; 544 *height = video_object_layer_height; 545 546 return true; 547 } 548 549 bool GetMPEGAudioFrameSize( 550 uint32_t header, size_t *frame_size, 551 int *out_sampling_rate, int *out_channels, 552 int *out_bitrate, int *out_num_samples) { 553 *frame_size = 0; 554 555 if (out_sampling_rate) { 556 *out_sampling_rate = 0; 557 } 558 559 if (out_channels) { 560 *out_channels = 0; 561 } 562 563 if (out_bitrate) { 564 *out_bitrate = 0; 565 } 566 567 if (out_num_samples) { 568 *out_num_samples = 1152; 569 } 570 571 if ((header & 0xffe00000) != 0xffe00000) { 572 return false; 573 } 574 575 unsigned version = (header >> 19) & 3; 576 577 if (version == 0x01) { 578 return false; 579 } 580 581 unsigned layer = (header >> 17) & 3; 582 583 if (layer == 0x00) { 584 return false; 585 } 586 587 unsigned protection = (header >> 16) & 1; 588 589 unsigned bitrate_index = (header >> 12) & 0x0f; 590 591 if (bitrate_index == 0 || bitrate_index == 0x0f) { 592 // Disallow "free" bitrate. 593 return false; 594 } 595 596 unsigned sampling_rate_index = (header >> 10) & 3; 597 598 if (sampling_rate_index == 3) { 599 return false; 600 } 601 602 static const int kSamplingRateV1[] = { 44100, 48000, 32000 }; 603 int sampling_rate = kSamplingRateV1[sampling_rate_index]; 604 if (version == 2 /* V2 */) { 605 sampling_rate /= 2; 606 } else if (version == 0 /* V2.5 */) { 607 sampling_rate /= 4; 608 } 609 610 unsigned padding = (header >> 9) & 1; 611 612 if (layer == 3) { 613 // layer I 614 615 static const int kBitrateV1[] = { 616 32, 64, 96, 128, 160, 192, 224, 256, 617 288, 320, 352, 384, 416, 448 618 }; 619 620 static const int kBitrateV2[] = { 621 32, 48, 56, 64, 80, 96, 112, 128, 622 144, 160, 176, 192, 224, 256 623 }; 624 625 int bitrate = 626 (version == 3 /* V1 */) 627 ? kBitrateV1[bitrate_index - 1] 628 : kBitrateV2[bitrate_index - 1]; 629 630 if (out_bitrate) { 631 *out_bitrate = bitrate; 632 } 633 634 *frame_size = (12000 * bitrate / sampling_rate + padding) * 4; 635 636 if (out_num_samples) { 637 *out_num_samples = 384; 638 } 639 } else { 640 // layer II or III 641 642 static const int kBitrateV1L2[] = { 643 32, 48, 56, 64, 80, 96, 112, 128, 644 160, 192, 224, 256, 320, 384 645 }; 646 647 static const int kBitrateV1L3[] = { 648 32, 40, 48, 56, 64, 80, 96, 112, 649 128, 160, 192, 224, 256, 320 650 }; 651 652 static const int kBitrateV2[] = { 653 8, 16, 24, 32, 40, 48, 56, 64, 654 80, 96, 112, 128, 144, 160 655 }; 656 657 int bitrate; 658 if (version == 3 /* V1 */) { 659 bitrate = (layer == 2 /* L2 */) 660 ? kBitrateV1L2[bitrate_index - 1] 661 : kBitrateV1L3[bitrate_index - 1]; 662 663 if (out_num_samples) { 664 *out_num_samples = 1152; 665 } 666 } else { 667 // V2 (or 2.5) 668 669 bitrate = kBitrateV2[bitrate_index - 1]; 670 if (out_num_samples) { 671 *out_num_samples = (layer == 1 /* L3 */) ? 576 : 1152; 672 } 673 } 674 675 if (out_bitrate) { 676 *out_bitrate = bitrate; 677 } 678 679 if (version == 3 /* V1 */) { 680 *frame_size = 144000 * bitrate / sampling_rate + padding; 681 } else { 682 // V2 or V2.5 683 size_t tmp = (layer == 1 /* L3 */) ? 72000 : 144000; 684 *frame_size = tmp * bitrate / sampling_rate + padding; 685 } 686 } 687 688 if (out_sampling_rate) { 689 *out_sampling_rate = sampling_rate; 690 } 691 692 if (out_channels) { 693 int channel_mode = (header >> 6) & 3; 694 695 *out_channels = (channel_mode == 3) ? 1 : 2; 696 } 697 698 return true; 699 } 700 701 } // namespace android 702 703