Home | History | Annotate | Download | only in source
      1 /*
      2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "webrtc/modules/rtp_rtcp/source/rtp_receiver_audio.h"
     12 
     13 #include <assert.h>  // assert
     14 #include <math.h>   // pow()
     15 #include <string.h>  // memcpy()
     16 
     17 #include "webrtc/base/logging.h"
     18 #include "webrtc/base/trace_event.h"
     19 #include "webrtc/system_wrappers/include/critical_section_wrapper.h"
     20 
     21 namespace webrtc {
     22 RTPReceiverStrategy* RTPReceiverStrategy::CreateAudioStrategy(
     23     RtpData* data_callback,
     24     RtpAudioFeedback* incoming_messages_callback) {
     25   return new RTPReceiverAudio(data_callback, incoming_messages_callback);
     26 }
     27 
     28 RTPReceiverAudio::RTPReceiverAudio(RtpData* data_callback,
     29                                    RtpAudioFeedback* incoming_messages_callback)
     30     : RTPReceiverStrategy(data_callback),
     31       TelephoneEventHandler(),
     32       last_received_frequency_(8000),
     33       telephone_event_forward_to_decoder_(false),
     34       telephone_event_payload_type_(-1),
     35       cng_nb_payload_type_(-1),
     36       cng_wb_payload_type_(-1),
     37       cng_swb_payload_type_(-1),
     38       cng_fb_payload_type_(-1),
     39       cng_payload_type_(-1),
     40       g722_payload_type_(-1),
     41       last_received_g722_(false),
     42       num_energy_(0),
     43       current_remote_energy_(),
     44       cb_audio_feedback_(incoming_messages_callback) {
     45   last_payload_.Audio.channels = 1;
     46   memset(current_remote_energy_, 0, sizeof(current_remote_energy_));
     47 }
     48 
     49 // Outband TelephoneEvent(DTMF) detection
     50 void RTPReceiverAudio::SetTelephoneEventForwardToDecoder(
     51     bool forward_to_decoder) {
     52   CriticalSectionScoped lock(crit_sect_.get());
     53   telephone_event_forward_to_decoder_ = forward_to_decoder;
     54 }
     55 
     56 // Is forwarding of outband telephone events turned on/off?
     57 bool RTPReceiverAudio::TelephoneEventForwardToDecoder() const {
     58   CriticalSectionScoped lock(crit_sect_.get());
     59   return telephone_event_forward_to_decoder_;
     60 }
     61 
     62 bool RTPReceiverAudio::TelephoneEventPayloadType(
     63     int8_t payload_type) const {
     64   CriticalSectionScoped lock(crit_sect_.get());
     65   return telephone_event_payload_type_ == payload_type;
     66 }
     67 
     68 bool RTPReceiverAudio::CNGPayloadType(int8_t payload_type,
     69                                       uint32_t* frequency,
     70                                       bool* cng_payload_type_has_changed) {
     71   CriticalSectionScoped lock(crit_sect_.get());
     72   *cng_payload_type_has_changed = false;
     73 
     74   //  We can have four CNG on 8000Hz, 16000Hz, 32000Hz and 48000Hz.
     75   if (cng_nb_payload_type_ == payload_type) {
     76     *frequency = 8000;
     77     if (cng_payload_type_ != -1 && cng_payload_type_ != cng_nb_payload_type_)
     78       *cng_payload_type_has_changed = true;
     79 
     80     cng_payload_type_ = cng_nb_payload_type_;
     81     return true;
     82   } else if (cng_wb_payload_type_ == payload_type) {
     83     // if last received codec is G.722 we must use frequency 8000
     84     if (last_received_g722_) {
     85       *frequency = 8000;
     86     } else {
     87       *frequency = 16000;
     88     }
     89     if (cng_payload_type_ != -1 && cng_payload_type_ != cng_wb_payload_type_)
     90       *cng_payload_type_has_changed = true;
     91     cng_payload_type_ = cng_wb_payload_type_;
     92     return true;
     93   } else if (cng_swb_payload_type_ == payload_type) {
     94     *frequency = 32000;
     95     if ((cng_payload_type_ != -1) &&
     96         (cng_payload_type_ != cng_swb_payload_type_))
     97       *cng_payload_type_has_changed = true;
     98     cng_payload_type_ = cng_swb_payload_type_;
     99     return true;
    100   } else if (cng_fb_payload_type_ == payload_type) {
    101     *frequency = 48000;
    102     if (cng_payload_type_ != -1 && cng_payload_type_ != cng_fb_payload_type_)
    103       *cng_payload_type_has_changed = true;
    104     cng_payload_type_ = cng_fb_payload_type_;
    105     return true;
    106   } else {
    107     //  not CNG
    108     if (g722_payload_type_ == payload_type) {
    109       last_received_g722_ = true;
    110     } else {
    111       last_received_g722_ = false;
    112     }
    113   }
    114   return false;
    115 }
    116 
    117 bool RTPReceiverAudio::ShouldReportCsrcChanges(uint8_t payload_type) const {
    118   // Don't do this for DTMF packets, otherwise it's fine.
    119   return !TelephoneEventPayloadType(payload_type);
    120 }
    121 
    122 // -   Sample based or frame based codecs based on RFC 3551
    123 // -
    124 // -   NOTE! There is one error in the RFC, stating G.722 uses 8 bits/samples.
    125 // -   The correct rate is 4 bits/sample.
    126 // -
    127 // -   name of                              sampling              default
    128 // -   encoding  sample/frame  bits/sample      rate  ms/frame  ms/packet
    129 // -
    130 // -   Sample based audio codecs
    131 // -   DVI4      sample        4                var.                   20
    132 // -   G722      sample        4              16,000                   20
    133 // -   G726-40   sample        5               8,000                   20
    134 // -   G726-32   sample        4               8,000                   20
    135 // -   G726-24   sample        3               8,000                   20
    136 // -   G726-16   sample        2               8,000                   20
    137 // -   L8        sample        8                var.                   20
    138 // -   L16       sample        16               var.                   20
    139 // -   PCMA      sample        8                var.                   20
    140 // -   PCMU      sample        8                var.                   20
    141 // -
    142 // -   Frame based audio codecs
    143 // -   G723      frame         N/A             8,000        30         30
    144 // -   G728      frame         N/A             8,000       2.5         20
    145 // -   G729      frame         N/A             8,000        10         20
    146 // -   G729D     frame         N/A             8,000        10         20
    147 // -   G729E     frame         N/A             8,000        10         20
    148 // -   GSM       frame         N/A             8,000        20         20
    149 // -   GSM-EFR   frame         N/A             8,000        20         20
    150 // -   LPC       frame         N/A             8,000        20         20
    151 // -   MPA       frame         N/A              var.      var.
    152 // -
    153 // -   G7221     frame         N/A
    154 int32_t RTPReceiverAudio::OnNewPayloadTypeCreated(
    155     const char payload_name[RTP_PAYLOAD_NAME_SIZE],
    156     int8_t payload_type,
    157     uint32_t frequency) {
    158   CriticalSectionScoped lock(crit_sect_.get());
    159 
    160   if (RtpUtility::StringCompare(payload_name, "telephone-event", 15)) {
    161     telephone_event_payload_type_ = payload_type;
    162   }
    163   if (RtpUtility::StringCompare(payload_name, "cn", 2)) {
    164     //  we can have three CNG on 8000Hz, 16000Hz and 32000Hz
    165     if (frequency == 8000) {
    166       cng_nb_payload_type_ = payload_type;
    167     } else if (frequency == 16000) {
    168       cng_wb_payload_type_ = payload_type;
    169     } else if (frequency == 32000) {
    170       cng_swb_payload_type_ = payload_type;
    171     } else if (frequency == 48000) {
    172       cng_fb_payload_type_ = payload_type;
    173     } else {
    174       assert(false);
    175       return -1;
    176     }
    177   }
    178   return 0;
    179 }
    180 
    181 int32_t RTPReceiverAudio::ParseRtpPacket(WebRtcRTPHeader* rtp_header,
    182                                          const PayloadUnion& specific_payload,
    183                                          bool is_red,
    184                                          const uint8_t* payload,
    185                                          size_t payload_length,
    186                                          int64_t timestamp_ms,
    187                                          bool is_first_packet) {
    188   TRACE_EVENT2(TRACE_DISABLED_BY_DEFAULT("webrtc_rtp"), "Audio::ParseRtp",
    189                "seqnum", rtp_header->header.sequenceNumber, "timestamp",
    190                rtp_header->header.timestamp);
    191   rtp_header->type.Audio.numEnergy = rtp_header->header.numCSRCs;
    192   num_energy_ = rtp_header->type.Audio.numEnergy;
    193   if (rtp_header->type.Audio.numEnergy > 0 &&
    194       rtp_header->type.Audio.numEnergy <= kRtpCsrcSize) {
    195     memcpy(current_remote_energy_,
    196            rtp_header->type.Audio.arrOfEnergy,
    197            rtp_header->type.Audio.numEnergy);
    198   }
    199 
    200   return ParseAudioCodecSpecific(rtp_header,
    201                                  payload,
    202                                  payload_length,
    203                                  specific_payload.Audio,
    204                                  is_red);
    205 }
    206 
    207 int RTPReceiverAudio::GetPayloadTypeFrequency() const {
    208   CriticalSectionScoped lock(crit_sect_.get());
    209   if (last_received_g722_) {
    210     return 8000;
    211   }
    212   return last_received_frequency_;
    213 }
    214 
    215 RTPAliveType RTPReceiverAudio::ProcessDeadOrAlive(
    216     uint16_t last_payload_length) const {
    217 
    218   // Our CNG is 9 bytes; if it's a likely CNG the receiver needs to check
    219   // kRtpNoRtp against NetEq speech_type kOutputPLCtoCNG.
    220   if (last_payload_length < 10) {  // our CNG is 9 bytes
    221     return kRtpNoRtp;
    222   } else {
    223     return kRtpDead;
    224   }
    225 }
    226 
    227 void RTPReceiverAudio::CheckPayloadChanged(int8_t payload_type,
    228                                            PayloadUnion* specific_payload,
    229                                            bool* should_discard_changes) {
    230   *should_discard_changes = false;
    231 
    232   if (TelephoneEventPayloadType(payload_type)) {
    233     // Don't do callbacks for DTMF packets.
    234     *should_discard_changes = true;
    235     return;
    236   }
    237   // frequency is updated for CNG
    238   bool cng_payload_type_has_changed = false;
    239   bool is_cng_payload_type = CNGPayloadType(payload_type,
    240                                             &specific_payload->Audio.frequency,
    241                                             &cng_payload_type_has_changed);
    242 
    243   if (is_cng_payload_type) {
    244     // Don't do callbacks for DTMF packets.
    245     *should_discard_changes = true;
    246     return;
    247   }
    248 }
    249 
    250 int RTPReceiverAudio::Energy(uint8_t array_of_energy[kRtpCsrcSize]) const {
    251   CriticalSectionScoped cs(crit_sect_.get());
    252 
    253   assert(num_energy_ <= kRtpCsrcSize);
    254 
    255   if (num_energy_ > 0) {
    256     memcpy(array_of_energy, current_remote_energy_,
    257            sizeof(uint8_t) * num_energy_);
    258   }
    259   return num_energy_;
    260 }
    261 
    262 int32_t RTPReceiverAudio::InvokeOnInitializeDecoder(
    263     RtpFeedback* callback,
    264     int8_t payload_type,
    265     const char payload_name[RTP_PAYLOAD_NAME_SIZE],
    266     const PayloadUnion& specific_payload) const {
    267   if (-1 ==
    268       callback->OnInitializeDecoder(
    269           payload_type, payload_name, specific_payload.Audio.frequency,
    270           specific_payload.Audio.channels, specific_payload.Audio.rate)) {
    271     LOG(LS_ERROR) << "Failed to create decoder for payload type: "
    272                   << payload_name << "/" << static_cast<int>(payload_type);
    273     return -1;
    274   }
    275   return 0;
    276 }
    277 
    278 // We are not allowed to have any critsects when calling data_callback.
    279 int32_t RTPReceiverAudio::ParseAudioCodecSpecific(
    280     WebRtcRTPHeader* rtp_header,
    281     const uint8_t* payload_data,
    282     size_t payload_length,
    283     const AudioPayload& audio_specific,
    284     bool is_red) {
    285 
    286   if (payload_length == 0) {
    287     return 0;
    288   }
    289 
    290   bool telephone_event_packet =
    291       TelephoneEventPayloadType(rtp_header->header.payloadType);
    292   if (telephone_event_packet) {
    293     CriticalSectionScoped lock(crit_sect_.get());
    294 
    295     // RFC 4733 2.3
    296     // 0                   1                   2                   3
    297     // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
    298     // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    299     // |     event     |E|R| volume    |          duration             |
    300     // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    301     //
    302     if (payload_length % 4 != 0) {
    303       return -1;
    304     }
    305     size_t number_of_events = payload_length / 4;
    306 
    307     // sanity
    308     if (number_of_events >= MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS) {
    309       number_of_events = MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS;
    310     }
    311     for (size_t n = 0; n < number_of_events; ++n) {
    312       bool end = (payload_data[(4 * n) + 1] & 0x80) ? true : false;
    313 
    314       std::set<uint8_t>::iterator event =
    315           telephone_event_reported_.find(payload_data[4 * n]);
    316 
    317       if (event != telephone_event_reported_.end()) {
    318         // we have already seen this event
    319         if (end) {
    320           telephone_event_reported_.erase(payload_data[4 * n]);
    321         }
    322       } else {
    323         if (end) {
    324           // don't add if it's a end of a tone
    325         } else {
    326           telephone_event_reported_.insert(payload_data[4 * n]);
    327         }
    328       }
    329     }
    330 
    331     // RFC 4733 2.5.1.3 & 2.5.2.3 Long-Duration Events
    332     // should not be a problem since we don't care about the duration
    333 
    334     // RFC 4733 See 2.5.1.5. & 2.5.2.4.  Multiple Events in a Packet
    335   }
    336 
    337   {
    338     CriticalSectionScoped lock(crit_sect_.get());
    339 
    340     if (!telephone_event_packet) {
    341       last_received_frequency_ = audio_specific.frequency;
    342     }
    343 
    344     // Check if this is a CNG packet, receiver might want to know
    345     uint32_t ignored;
    346     bool also_ignored;
    347     if (CNGPayloadType(rtp_header->header.payloadType,
    348                        &ignored,
    349                        &also_ignored)) {
    350       rtp_header->type.Audio.isCNG = true;
    351       rtp_header->frameType = kAudioFrameCN;
    352     } else {
    353       rtp_header->frameType = kAudioFrameSpeech;
    354       rtp_header->type.Audio.isCNG = false;
    355     }
    356 
    357     // check if it's a DTMF event, hence something we can playout
    358     if (telephone_event_packet) {
    359       if (!telephone_event_forward_to_decoder_) {
    360         // don't forward event to decoder
    361         return 0;
    362       }
    363       std::set<uint8_t>::iterator first =
    364           telephone_event_reported_.begin();
    365       if (first != telephone_event_reported_.end() && *first > 15) {
    366         // don't forward non DTMF events
    367         return 0;
    368       }
    369     }
    370   }
    371   // TODO(holmer): Break this out to have RED parsing handled generically.
    372   if (is_red && !(payload_data[0] & 0x80)) {
    373     // we recive only one frame packed in a RED packet remove the RED wrapper
    374     rtp_header->header.payloadType = payload_data[0];
    375 
    376     // only one frame in the RED strip the one byte to help NetEq
    377     return data_callback_->OnReceivedPayloadData(
    378         payload_data + 1, payload_length - 1, rtp_header);
    379   }
    380 
    381   rtp_header->type.Audio.channel = audio_specific.channels;
    382   return data_callback_->OnReceivedPayloadData(
    383       payload_data, payload_length, rtp_header);
    384 }
    385 }  // namespace webrtc
    386