Home | History | Annotate | Download | only in common_time
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 /*
     18  * A service that exchanges time synchronization information between
     19  * a master that defines a timeline and clients that follow the timeline.
     20  */
     21 
     22 #define __STDC_LIMIT_MACROS
     23 #define LOG_TAG "common_time"
     24 #include <utils/Log.h>
     25 #include <stdint.h>
     26 
     27 #include <common_time/local_clock.h>
     28 #include <assert.h>
     29 
     30 #include "clock_recovery.h"
     31 #include "common_clock.h"
     32 #ifdef TIME_SERVICE_DEBUG
     33 #include "diag_thread.h"
     34 #endif
     35 
     36 // Define log macro so we can make LOGV into LOGE when we are exclusively
     37 // debugging this code.
     38 #ifdef TIME_SERVICE_DEBUG
     39 #define LOG_TS ALOGE
     40 #else
     41 #define LOG_TS ALOGV
     42 #endif
     43 
     44 namespace android {
     45 
     46 ClockRecoveryLoop::ClockRecoveryLoop(LocalClock* local_clock,
     47                                      CommonClock* common_clock) {
     48     assert(NULL != local_clock);
     49     assert(NULL != common_clock);
     50 
     51     local_clock_  = local_clock;
     52     common_clock_ = common_clock;
     53 
     54     local_clock_can_slew_ = local_clock_->initCheck() &&
     55                            (local_clock_->setLocalSlew(0) == OK);
     56 
     57     reset(true, true);
     58 
     59 #ifdef TIME_SERVICE_DEBUG
     60     diag_thread_ = new DiagThread(common_clock_, local_clock_);
     61     if (diag_thread_ != NULL) {
     62         status_t res = diag_thread_->startWorkThread();
     63         if (res != OK)
     64             ALOGW("Failed to start A@H clock recovery diagnostic thread.");
     65     } else
     66         ALOGW("Failed to allocate diagnostic thread.");
     67 #endif
     68 }
     69 
     70 ClockRecoveryLoop::~ClockRecoveryLoop() {
     71 #ifdef TIME_SERVICE_DEBUG
     72     diag_thread_->stopWorkThread();
     73 #endif
     74 }
     75 
     76 // Constants.
     77 const float ClockRecoveryLoop::dT = 1.0;
     78 const float ClockRecoveryLoop::Kc = 1.0f;
     79 const float ClockRecoveryLoop::Ti = 15.0f;
     80 const float ClockRecoveryLoop::Tf = 0.05;
     81 const float ClockRecoveryLoop::bias_Fc = 0.01;
     82 const float ClockRecoveryLoop::bias_RC = (dT / (2 * 3.14159f * bias_Fc));
     83 const float ClockRecoveryLoop::bias_Alpha = (dT / (bias_RC + dT));
     84 const int64_t ClockRecoveryLoop::panic_thresh_ = 50000;
     85 const int64_t ClockRecoveryLoop::control_thresh_ = 10000;
     86 const float ClockRecoveryLoop::COmin = -100.0f;
     87 const float ClockRecoveryLoop::COmax = 100.0f;
     88 
     89 void ClockRecoveryLoop::reset(bool position, bool frequency) {
     90     Mutex::Autolock lock(&lock_);
     91     reset_l(position, frequency);
     92 }
     93 
     94 uint32_t ClockRecoveryLoop::findMinRTTNdx(DisciplineDataPoint* data,
     95                                           uint32_t count) {
     96     uint32_t min_rtt = 0;
     97     for (uint32_t i = 1; i < count; ++i)
     98         if (data[min_rtt].rtt > data[i].rtt)
     99             min_rtt = i;
    100 
    101     return min_rtt;
    102 }
    103 
    104 bool ClockRecoveryLoop::pushDisciplineEvent(int64_t local_time,
    105                                             int64_t nominal_common_time,
    106                                             int64_t rtt) {
    107     Mutex::Autolock lock(&lock_);
    108 
    109     int64_t local_common_time = 0;
    110     common_clock_->localToCommon(local_time, &local_common_time);
    111     int64_t raw_delta = nominal_common_time - local_common_time;
    112 
    113 #ifdef TIME_SERVICE_DEBUG
    114     ALOGE("local=%lld, common=%lld, delta=%lld, rtt=%lld\n",
    115          local_common_time, nominal_common_time,
    116          raw_delta, rtt);
    117 #endif
    118 
    119     // If we have not defined a basis for common time, then we need to use these
    120     // initial points to do so.  In order to avoid significant initial error
    121     // from a particularly bad startup data point, we collect the first N data
    122     // points and choose the best of them before moving on.
    123     if (!common_clock_->isValid()) {
    124         if (startup_filter_wr_ < kStartupFilterSize) {
    125             DisciplineDataPoint& d =  startup_filter_data_[startup_filter_wr_];
    126             d.local_time = local_time;
    127             d.nominal_common_time = nominal_common_time;
    128             d.rtt = rtt;
    129             startup_filter_wr_++;
    130         }
    131 
    132         if (startup_filter_wr_ == kStartupFilterSize) {
    133             uint32_t min_rtt = findMinRTTNdx(startup_filter_data_,
    134                     kStartupFilterSize);
    135 
    136             common_clock_->setBasis(
    137                     startup_filter_data_[min_rtt].local_time,
    138                     startup_filter_data_[min_rtt].nominal_common_time);
    139         }
    140 
    141         return true;
    142     }
    143 
    144     int64_t observed_common;
    145     int64_t delta;
    146     float delta_f, dCO;
    147     int32_t correction_cur;
    148 
    149     if (OK != common_clock_->localToCommon(local_time, &observed_common)) {
    150         // Since we just checked to make certain that this conversion was valid,
    151         // and no one else in the system should be messing with it, if this
    152         // conversion is suddenly invalid, it is a good reason to panic.
    153         ALOGE("Failed to convert local time to common time in %s:%d",
    154                 __PRETTY_FUNCTION__, __LINE__);
    155         return false;
    156     }
    157 
    158     // Implement a filter which should match NTP filtering behavior when a
    159     // client is associated with only one peer of lower stratum.  Basically,
    160     // always use the best of the N last data points, where best is defined as
    161     // lowest round trip time.  NTP uses an N of 8; we use a value of 6.
    162     //
    163     // TODO(johngro) : experiment with other filter strategies.  The goal here
    164     // is to mitigate the effects of high RTT data points which typically have
    165     // large asymmetries in the TX/RX legs.  Downside of the existing NTP
    166     // approach (particularly because of the PID controller we are using to
    167     // produce the control signal from the filtered data) are that the rate at
    168     // which discipline events are actually acted upon becomes irregular and can
    169     // become drawn out (the time between actionable event can go way up).  If
    170     // the system receives a strong high quality data point, the proportional
    171     // component of the controller can produce a strong correction which is left
    172     // in place for too long causing overshoot.  In addition, the integral
    173     // component of the system currently is an approximation based on the
    174     // assumption of a more or less homogeneous sampling of the error.  Its
    175     // unclear what the effect of undermining this assumption would be right
    176     // now.
    177 
    178     // Two ideas which come to mind immediately would be to...
    179     // 1) Keep a history of more data points (32 or so) and ignore data points
    180     //    whose RTT is more than a certain number of standard deviations outside
    181     //    of the norm.
    182     // 2) Eliminate the PID controller portion of this system entirely.
    183     //    Instead, move to a system which uses a very wide filter (128 data
    184     //    points or more) with a sum-of-least-squares line fitting approach to
    185     //    tracking the long term drift.  This would take the place of the I
    186     //    component in the current PID controller.  Also use a much more narrow
    187     //    outlier-rejector filter (as described in #1) to drive a short term
    188     //    correction factor similar to the P component of the PID controller.
    189     assert(filter_wr_ < kFilterSize);
    190     filter_data_[filter_wr_].local_time           = local_time;
    191     filter_data_[filter_wr_].observed_common_time = observed_common;
    192     filter_data_[filter_wr_].nominal_common_time  = nominal_common_time;
    193     filter_data_[filter_wr_].rtt                  = rtt;
    194     filter_data_[filter_wr_].point_used           = false;
    195     uint32_t current_point = filter_wr_;
    196     filter_wr_ = (filter_wr_ + 1) % kFilterSize;
    197     if (!filter_wr_)
    198         filter_full_ = true;
    199 
    200     uint32_t scan_end = filter_full_ ? kFilterSize : filter_wr_;
    201     uint32_t min_rtt = findMinRTTNdx(filter_data_, scan_end);
    202     // We only use packets with low RTTs for control. If the packet RTT
    203     // is less than the panic threshold, we can probably eat the jitter with the
    204     // control loop. Otherwise, take the packet only if it better than all
    205     // of the packets we have in the history. That way we try to track
    206     // something, even if it is noisy.
    207     if (current_point == min_rtt || rtt < control_thresh_) {
    208         delta_f = delta = nominal_common_time - observed_common;
    209 
    210         // Compute the error then clamp to the panic threshold.  If we ever
    211         // exceed this amt of error, its time to panic and reset the system.
    212         // Given that the error in the measurement of the error could be as
    213         // high as the RTT of the data point, we don't actually panic until
    214         // the implied error (delta) is greater than the absolute panic
    215         // threashold plus the RTT.  IOW - we don't panic until we are
    216         // absoluely sure that our best case sync is worse than the absolute
    217         // panic threshold.
    218         int64_t effective_panic_thresh = panic_thresh_ + rtt;
    219         if ((delta > effective_panic_thresh) ||
    220             (delta < -effective_panic_thresh)) {
    221             // PANIC!!!
    222             reset_l(false, true);
    223             return false;
    224         }
    225 
    226     } else {
    227         // We do not have a good packet to look at, but we also do not want to
    228         // free-run the clock at some crazy slew rate. So we guess the
    229         // trajectory of the clock based on the last controller output and the
    230         // estimated bias of our clock against the master.
    231         // The net effect of this is that CO == CObias after some extended
    232         // period of no feedback.
    233         delta_f = last_delta_f_ - dT*(CO - CObias);
    234         delta = delta_f;
    235     }
    236 
    237     // Velocity form PI control equation.
    238     dCO = Kc * (1.0f + dT/Ti) * delta_f - Kc * last_delta_f_;
    239     CO += dCO * Tf; // Filter CO by applying gain <1 here.
    240 
    241     // Save error terms for later.
    242     last_delta_f_ = delta_f;
    243     last_delta_ = delta;
    244 
    245     // Clamp CO to +/- 100ppm.
    246     if (CO < COmin)
    247         CO = COmin;
    248     else if (CO > COmax)
    249         CO = COmax;
    250 
    251     // Update the controller bias.
    252     CObias = bias_Alpha * CO + (1.0f - bias_Alpha) * lastCObias;
    253     lastCObias = CObias;
    254 
    255     // Convert PPM to 16-bit int range. Add some guard band (-0.01) so we
    256     // don't get fp weirdness.
    257     correction_cur = CO * 327.66;
    258 
    259     // If there was a change in the amt of correction to use, update the
    260     // system.
    261     if (correction_cur_ != correction_cur) {
    262         correction_cur_ = correction_cur;
    263         applySlew();
    264     }
    265 
    266     LOG_TS("clock_loop %lld %f %f %f %d\n", raw_delta, delta_f, CO, CObias, correction_cur);
    267 
    268 #ifdef TIME_SERVICE_DEBUG
    269     diag_thread_->pushDisciplineEvent(
    270             local_time,
    271             observed_common,
    272             nominal_common_time,
    273             correction_cur,
    274             rtt);
    275 #endif
    276 
    277     return true;
    278 }
    279 
    280 int32_t ClockRecoveryLoop::getLastErrorEstimate() {
    281     Mutex::Autolock lock(&lock_);
    282 
    283     if (last_delta_valid_)
    284         return last_delta_;
    285     else
    286         return ICommonClock::kErrorEstimateUnknown;
    287 }
    288 
    289 void ClockRecoveryLoop::reset_l(bool position, bool frequency) {
    290     assert(NULL != common_clock_);
    291 
    292     if (position) {
    293         common_clock_->resetBasis();
    294         startup_filter_wr_ = 0;
    295     }
    296 
    297     if (frequency) {
    298         last_delta_valid_ = false;
    299         last_delta_ = 0;
    300         last_delta_f_ = 0.0;
    301         correction_cur_ = 0x0;
    302         CO = 0.0f;
    303         lastCObias = CObias = 0.0f;
    304         applySlew();
    305     }
    306 
    307     filter_wr_   = 0;
    308     filter_full_ = false;
    309 }
    310 
    311 void ClockRecoveryLoop::applySlew() {
    312     if (local_clock_can_slew_) {
    313         local_clock_->setLocalSlew(correction_cur_);
    314     } else {
    315         // The SW clock recovery implemented by the common clock class expects
    316         // values expressed in PPM. CO is in ppm.
    317         common_clock_->setSlew(local_clock_->getLocalTime(), CO);
    318     }
    319 }
    320 
    321 }  // namespace android
    322