Home | History | Annotate | Download | only in common_time
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 /*
     18  * A service that exchanges time synchronization information between
     19  * a master that defines a timeline and clients that follow the timeline.
     20  */
     21 
     22 #define __STDC_LIMIT_MACROS
     23 #define LOG_TAG "common_time"
     24 #include <utils/Log.h>
     25 #include <inttypes.h>
     26 #include <stdint.h>
     27 
     28 #include <common_time/local_clock.h>
     29 #include <assert.h>
     30 
     31 #include "clock_recovery.h"
     32 #include "common_clock.h"
     33 #ifdef TIME_SERVICE_DEBUG
     34 #include "diag_thread.h"
     35 #endif
     36 
     37 // Define log macro so we can make LOGV into LOGE when we are exclusively
     38 // debugging this code.
     39 #ifdef TIME_SERVICE_DEBUG
     40 #define LOG_TS ALOGE
     41 #else
     42 #define LOG_TS ALOGV
     43 #endif
     44 
     45 namespace android {
     46 
     47 ClockRecoveryLoop::ClockRecoveryLoop(LocalClock* local_clock,
     48                                      CommonClock* common_clock) {
     49     assert(NULL != local_clock);
     50     assert(NULL != common_clock);
     51 
     52     local_clock_  = local_clock;
     53     common_clock_ = common_clock;
     54 
     55     local_clock_can_slew_ = local_clock_->initCheck() &&
     56                            (local_clock_->setLocalSlew(0) == OK);
     57     tgt_correction_ = 0;
     58     cur_correction_ = 0;
     59 
     60     // Precompute the max rate at which we are allowed to change the VCXO
     61     // control.
     62     uint64_t N = 0x10000ull * 1000ull;
     63     uint64_t D = local_clock_->getLocalFreq() * kMinFullRangeSlewChange_mSec;
     64     LinearTransform::reduce(&N, &D);
     65     while ((N > INT32_MAX) || (D > UINT32_MAX)) {
     66         N >>= 1;
     67         D >>= 1;
     68         LinearTransform::reduce(&N, &D);
     69     }
     70     time_to_cur_slew_.a_to_b_numer = static_cast<int32_t>(N);
     71     time_to_cur_slew_.a_to_b_denom = static_cast<uint32_t>(D);
     72 
     73     reset(true, true);
     74 
     75 #ifdef TIME_SERVICE_DEBUG
     76     diag_thread_ = new DiagThread(common_clock_, local_clock_);
     77     if (diag_thread_ != NULL) {
     78         status_t res = diag_thread_->startWorkThread();
     79         if (res != OK)
     80             ALOGW("Failed to start A@H clock recovery diagnostic thread.");
     81     } else
     82         ALOGW("Failed to allocate diagnostic thread.");
     83 #endif
     84 }
     85 
     86 ClockRecoveryLoop::~ClockRecoveryLoop() {
     87 #ifdef TIME_SERVICE_DEBUG
     88     diag_thread_->stopWorkThread();
     89 #endif
     90 }
     91 
     92 // Constants.
     93 const float ClockRecoveryLoop::dT = 1.0;
     94 const float ClockRecoveryLoop::Kc = 1.0f;
     95 const float ClockRecoveryLoop::Ti = 15.0f;
     96 const float ClockRecoveryLoop::Tf = 0.05;
     97 const float ClockRecoveryLoop::bias_Fc = 0.01;
     98 const float ClockRecoveryLoop::bias_RC = (dT / (2 * 3.14159f * bias_Fc));
     99 const float ClockRecoveryLoop::bias_Alpha = (dT / (bias_RC + dT));
    100 const int64_t ClockRecoveryLoop::panic_thresh_ = 50000;
    101 const int64_t ClockRecoveryLoop::control_thresh_ = 10000;
    102 const float ClockRecoveryLoop::COmin = -100.0f;
    103 const float ClockRecoveryLoop::COmax = 100.0f;
    104 const uint32_t ClockRecoveryLoop::kMinFullRangeSlewChange_mSec = 300;
    105 const int ClockRecoveryLoop::kSlewChangeStepPeriod_mSec = 10;
    106 
    107 
    108 void ClockRecoveryLoop::reset(bool position, bool frequency) {
    109     Mutex::Autolock lock(&lock_);
    110     reset_l(position, frequency);
    111 }
    112 
    113 uint32_t ClockRecoveryLoop::findMinRTTNdx(DisciplineDataPoint* data,
    114                                           uint32_t count) {
    115     uint32_t min_rtt = 0;
    116     for (uint32_t i = 1; i < count; ++i)
    117         if (data[min_rtt].rtt > data[i].rtt)
    118             min_rtt = i;
    119 
    120     return min_rtt;
    121 }
    122 
    123 bool ClockRecoveryLoop::pushDisciplineEvent(int64_t local_time,
    124                                             int64_t nominal_common_time,
    125                                             int64_t rtt) {
    126     Mutex::Autolock lock(&lock_);
    127 
    128     int64_t local_common_time = 0;
    129     common_clock_->localToCommon(local_time, &local_common_time);
    130     int64_t raw_delta = nominal_common_time - local_common_time;
    131 
    132 #ifdef TIME_SERVICE_DEBUG
    133     ALOGE("local=%lld, common=%lld, delta=%lld, rtt=%lld\n",
    134          local_common_time, nominal_common_time,
    135          raw_delta, rtt);
    136 #endif
    137 
    138     // If we have not defined a basis for common time, then we need to use these
    139     // initial points to do so.  In order to avoid significant initial error
    140     // from a particularly bad startup data point, we collect the first N data
    141     // points and choose the best of them before moving on.
    142     if (!common_clock_->isValid()) {
    143         if (startup_filter_wr_ < kStartupFilterSize) {
    144             DisciplineDataPoint& d =  startup_filter_data_[startup_filter_wr_];
    145             d.local_time = local_time;
    146             d.nominal_common_time = nominal_common_time;
    147             d.rtt = rtt;
    148             startup_filter_wr_++;
    149         }
    150 
    151         if (startup_filter_wr_ == kStartupFilterSize) {
    152             uint32_t min_rtt = findMinRTTNdx(startup_filter_data_,
    153                     kStartupFilterSize);
    154 
    155             common_clock_->setBasis(
    156                     startup_filter_data_[min_rtt].local_time,
    157                     startup_filter_data_[min_rtt].nominal_common_time);
    158         }
    159 
    160         return true;
    161     }
    162 
    163     int64_t observed_common;
    164     int64_t delta;
    165     float delta_f, dCO;
    166     int32_t tgt_correction;
    167 
    168     if (OK != common_clock_->localToCommon(local_time, &observed_common)) {
    169         // Since we just checked to make certain that this conversion was valid,
    170         // and no one else in the system should be messing with it, if this
    171         // conversion is suddenly invalid, it is a good reason to panic.
    172         ALOGE("Failed to convert local time to common time in %s:%d",
    173                 __PRETTY_FUNCTION__, __LINE__);
    174         return false;
    175     }
    176 
    177     // Implement a filter which should match NTP filtering behavior when a
    178     // client is associated with only one peer of lower stratum.  Basically,
    179     // always use the best of the N last data points, where best is defined as
    180     // lowest round trip time.  NTP uses an N of 8; we use a value of 6.
    181     //
    182     // TODO(johngro) : experiment with other filter strategies.  The goal here
    183     // is to mitigate the effects of high RTT data points which typically have
    184     // large asymmetries in the TX/RX legs.  Downside of the existing NTP
    185     // approach (particularly because of the PID controller we are using to
    186     // produce the control signal from the filtered data) are that the rate at
    187     // which discipline events are actually acted upon becomes irregular and can
    188     // become drawn out (the time between actionable event can go way up).  If
    189     // the system receives a strong high quality data point, the proportional
    190     // component of the controller can produce a strong correction which is left
    191     // in place for too long causing overshoot.  In addition, the integral
    192     // component of the system currently is an approximation based on the
    193     // assumption of a more or less homogeneous sampling of the error.  Its
    194     // unclear what the effect of undermining this assumption would be right
    195     // now.
    196 
    197     // Two ideas which come to mind immediately would be to...
    198     // 1) Keep a history of more data points (32 or so) and ignore data points
    199     //    whose RTT is more than a certain number of standard deviations outside
    200     //    of the norm.
    201     // 2) Eliminate the PID controller portion of this system entirely.
    202     //    Instead, move to a system which uses a very wide filter (128 data
    203     //    points or more) with a sum-of-least-squares line fitting approach to
    204     //    tracking the long term drift.  This would take the place of the I
    205     //    component in the current PID controller.  Also use a much more narrow
    206     //    outlier-rejector filter (as described in #1) to drive a short term
    207     //    correction factor similar to the P component of the PID controller.
    208     assert(filter_wr_ < kFilterSize);
    209     filter_data_[filter_wr_].local_time           = local_time;
    210     filter_data_[filter_wr_].observed_common_time = observed_common;
    211     filter_data_[filter_wr_].nominal_common_time  = nominal_common_time;
    212     filter_data_[filter_wr_].rtt                  = rtt;
    213     filter_data_[filter_wr_].point_used           = false;
    214     uint32_t current_point = filter_wr_;
    215     filter_wr_ = (filter_wr_ + 1) % kFilterSize;
    216     if (!filter_wr_)
    217         filter_full_ = true;
    218 
    219     uint32_t scan_end = filter_full_ ? kFilterSize : filter_wr_;
    220     uint32_t min_rtt = findMinRTTNdx(filter_data_, scan_end);
    221     // We only use packets with low RTTs for control. If the packet RTT
    222     // is less than the panic threshold, we can probably eat the jitter with the
    223     // control loop. Otherwise, take the packet only if it better than all
    224     // of the packets we have in the history. That way we try to track
    225     // something, even if it is noisy.
    226     if (current_point == min_rtt || rtt < control_thresh_) {
    227         delta_f = delta = nominal_common_time - observed_common;
    228 
    229         last_error_est_valid_ = true;
    230         last_error_est_usec_ = delta;
    231 
    232         // Compute the error then clamp to the panic threshold.  If we ever
    233         // exceed this amt of error, its time to panic and reset the system.
    234         // Given that the error in the measurement of the error could be as
    235         // high as the RTT of the data point, we don't actually panic until
    236         // the implied error (delta) is greater than the absolute panic
    237         // threashold plus the RTT.  IOW - we don't panic until we are
    238         // absoluely sure that our best case sync is worse than the absolute
    239         // panic threshold.
    240         int64_t effective_panic_thresh = panic_thresh_ + rtt;
    241         if ((delta > effective_panic_thresh) ||
    242             (delta < -effective_panic_thresh)) {
    243             // PANIC!!!
    244             reset_l(false, true);
    245             return false;
    246         }
    247 
    248     } else {
    249         // We do not have a good packet to look at, but we also do not want to
    250         // free-run the clock at some crazy slew rate. So we guess the
    251         // trajectory of the clock based on the last controller output and the
    252         // estimated bias of our clock against the master.
    253         // The net effect of this is that CO == CObias after some extended
    254         // period of no feedback.
    255         delta_f = last_delta_f_ - dT*(CO - CObias);
    256         delta = delta_f;
    257     }
    258 
    259     // Velocity form PI control equation.
    260     dCO = Kc * (1.0f + dT/Ti) * delta_f - Kc * last_delta_f_;
    261     CO += dCO * Tf; // Filter CO by applying gain <1 here.
    262 
    263     // Save error terms for later.
    264     last_delta_f_ = delta_f;
    265 
    266     // Clamp CO to +/- 100ppm.
    267     if (CO < COmin)
    268         CO = COmin;
    269     else if (CO > COmax)
    270         CO = COmax;
    271 
    272     // Update the controller bias.
    273     CObias = bias_Alpha * CO + (1.0f - bias_Alpha) * lastCObias;
    274     lastCObias = CObias;
    275 
    276     // Convert PPM to 16-bit int range. Add some guard band (-0.01) so we
    277     // don't get fp weirdness.
    278     tgt_correction = CO * 327.66;
    279 
    280     // If there was a change in the amt of correction to use, update the
    281     // system.
    282     setTargetCorrection_l(tgt_correction);
    283 
    284     LOG_TS("clock_loop %" PRId64 " %f %f %f %d\n", raw_delta, delta_f, CO, CObias, tgt_correction);
    285 
    286 #ifdef TIME_SERVICE_DEBUG
    287     diag_thread_->pushDisciplineEvent(
    288             local_time,
    289             observed_common,
    290             nominal_common_time,
    291             tgt_correction,
    292             rtt);
    293 #endif
    294 
    295     return true;
    296 }
    297 
    298 int32_t ClockRecoveryLoop::getLastErrorEstimate() {
    299     Mutex::Autolock lock(&lock_);
    300 
    301     if (last_error_est_valid_)
    302         return last_error_est_usec_;
    303     else
    304         return ICommonClock::kErrorEstimateUnknown;
    305 }
    306 
    307 void ClockRecoveryLoop::reset_l(bool position, bool frequency) {
    308     assert(NULL != common_clock_);
    309 
    310     if (position) {
    311         common_clock_->resetBasis();
    312         startup_filter_wr_ = 0;
    313     }
    314 
    315     if (frequency) {
    316         last_error_est_valid_ = false;
    317         last_error_est_usec_ = 0;
    318         last_delta_f_ = 0.0;
    319         CO = 0.0f;
    320         lastCObias = CObias = 0.0f;
    321         setTargetCorrection_l(0);
    322         applySlew_l();
    323     }
    324 
    325     filter_wr_   = 0;
    326     filter_full_ = false;
    327 }
    328 
    329 void ClockRecoveryLoop::setTargetCorrection_l(int32_t tgt) {
    330     // When we make a change to the slew rate, we need to be careful to not
    331     // change it too quickly as it can anger some HDMI sinks out there, notably
    332     // some Sony panels from the 2010-2011 timeframe.  From experimenting with
    333     // some of these sinks, it seems like swinging from one end of the range to
    334     // another in less that 190mSec or so can start to cause trouble.  Adding in
    335     // a hefty margin, we limit the system to a full range sweep in no less than
    336     // 300mSec.
    337     if (tgt_correction_ != tgt) {
    338         int64_t now = local_clock_->getLocalTime();
    339 
    340         tgt_correction_ = tgt;
    341 
    342         // Set up the transformation to figure out what the slew should be at
    343         // any given point in time in the future.
    344         time_to_cur_slew_.a_zero = now;
    345         time_to_cur_slew_.b_zero = cur_correction_;
    346 
    347         // Make sure the sign of the slope is headed in the proper direction.
    348         bool needs_increase = (cur_correction_ < tgt_correction_);
    349         bool is_increasing  = (time_to_cur_slew_.a_to_b_numer > 0);
    350         if (( needs_increase && !is_increasing) ||
    351             (!needs_increase &&  is_increasing)) {
    352             time_to_cur_slew_.a_to_b_numer = -time_to_cur_slew_.a_to_b_numer;
    353         }
    354 
    355         // Finally, figure out when the change will be finished and start the
    356         // slew operation.
    357         time_to_cur_slew_.doReverseTransform(tgt_correction_,
    358                                              &slew_change_end_time_);
    359 
    360         applySlew_l();
    361     }
    362 }
    363 
    364 bool ClockRecoveryLoop::applySlew_l() {
    365     bool ret = true;
    366 
    367     // If cur == tgt, there is no ongoing sleq rate change and we are already
    368     // finished.
    369     if (cur_correction_ == tgt_correction_)
    370         goto bailout;
    371 
    372     if (local_clock_can_slew_) {
    373         int64_t now = local_clock_->getLocalTime();
    374         int64_t tmp;
    375 
    376         if (now >= slew_change_end_time_) {
    377             cur_correction_ = tgt_correction_;
    378             next_slew_change_timeout_.setTimeout(-1);
    379         } else {
    380             time_to_cur_slew_.doForwardTransform(now, &tmp);
    381 
    382             if (tmp > INT16_MAX)
    383                 cur_correction_ = INT16_MAX;
    384             else if (tmp < INT16_MIN)
    385                 cur_correction_ = INT16_MIN;
    386             else
    387                 cur_correction_ = static_cast<int16_t>(tmp);
    388 
    389             next_slew_change_timeout_.setTimeout(kSlewChangeStepPeriod_mSec);
    390             ret = false;
    391         }
    392 
    393         local_clock_->setLocalSlew(cur_correction_);
    394     } else {
    395         // Since we are not actually changing the rate of a HW clock, we don't
    396         // need to worry to much about changing the slew rate so fast that we
    397         // anger any downstream HDMI devices.
    398         cur_correction_ = tgt_correction_;
    399         next_slew_change_timeout_.setTimeout(-1);
    400 
    401         // The SW clock recovery implemented by the common clock class expects
    402         // values expressed in PPM. CO is in ppm.
    403         common_clock_->setSlew(local_clock_->getLocalTime(), CO);
    404     }
    405 
    406 bailout:
    407     return ret;
    408 }
    409 
    410 int ClockRecoveryLoop::applyRateLimitedSlew() {
    411     Mutex::Autolock lock(&lock_);
    412 
    413     int ret = next_slew_change_timeout_.msecTillTimeout();
    414     if (!ret) {
    415         if (applySlew_l())
    416             next_slew_change_timeout_.setTimeout(-1);
    417         ret = next_slew_change_timeout_.msecTillTimeout();
    418     }
    419 
    420     return ret;
    421 }
    422 
    423 }  // namespace android
    424