1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /* 18 * A service that exchanges time synchronization information between 19 * a master that defines a timeline and clients that follow the timeline. 20 */ 21 22 #define __STDC_LIMIT_MACROS 23 #define LOG_TAG "common_time" 24 #include <utils/Log.h> 25 #include <stdint.h> 26 27 #include <common_time/local_clock.h> 28 #include <assert.h> 29 30 #include "clock_recovery.h" 31 #include "common_clock.h" 32 #ifdef TIME_SERVICE_DEBUG 33 #include "diag_thread.h" 34 #endif 35 36 // Define log macro so we can make LOGV into LOGE when we are exclusively 37 // debugging this code. 38 #ifdef TIME_SERVICE_DEBUG 39 #define LOG_TS ALOGE 40 #else 41 #define LOG_TS ALOGV 42 #endif 43 44 namespace android { 45 46 ClockRecoveryLoop::ClockRecoveryLoop(LocalClock* local_clock, 47 CommonClock* common_clock) { 48 assert(NULL != local_clock); 49 assert(NULL != common_clock); 50 51 local_clock_ = local_clock; 52 common_clock_ = common_clock; 53 54 local_clock_can_slew_ = local_clock_->initCheck() && 55 (local_clock_->setLocalSlew(0) == OK); 56 57 reset(true, true); 58 59 #ifdef TIME_SERVICE_DEBUG 60 diag_thread_ = new DiagThread(common_clock_, local_clock_); 61 if (diag_thread_ != NULL) { 62 status_t res = diag_thread_->startWorkThread(); 63 if (res != OK) 64 ALOGW("Failed to start A@H clock recovery diagnostic thread."); 65 } else 66 ALOGW("Failed to allocate diagnostic thread."); 67 #endif 68 } 69 70 ClockRecoveryLoop::~ClockRecoveryLoop() { 71 #ifdef TIME_SERVICE_DEBUG 72 diag_thread_->stopWorkThread(); 73 #endif 74 } 75 76 // Constants. 77 const float ClockRecoveryLoop::dT = 1.0; 78 const float ClockRecoveryLoop::Kc = 1.0f; 79 const float ClockRecoveryLoop::Ti = 15.0f; 80 const float ClockRecoveryLoop::Tf = 0.05; 81 const float ClockRecoveryLoop::bias_Fc = 0.01; 82 const float ClockRecoveryLoop::bias_RC = (dT / (2 * 3.14159f * bias_Fc)); 83 const float ClockRecoveryLoop::bias_Alpha = (dT / (bias_RC + dT)); 84 const int64_t ClockRecoveryLoop::panic_thresh_ = 50000; 85 const int64_t ClockRecoveryLoop::control_thresh_ = 10000; 86 const float ClockRecoveryLoop::COmin = -100.0f; 87 const float ClockRecoveryLoop::COmax = 100.0f; 88 89 void ClockRecoveryLoop::reset(bool position, bool frequency) { 90 Mutex::Autolock lock(&lock_); 91 reset_l(position, frequency); 92 } 93 94 uint32_t ClockRecoveryLoop::findMinRTTNdx(DisciplineDataPoint* data, 95 uint32_t count) { 96 uint32_t min_rtt = 0; 97 for (uint32_t i = 1; i < count; ++i) 98 if (data[min_rtt].rtt > data[i].rtt) 99 min_rtt = i; 100 101 return min_rtt; 102 } 103 104 bool ClockRecoveryLoop::pushDisciplineEvent(int64_t local_time, 105 int64_t nominal_common_time, 106 int64_t rtt) { 107 Mutex::Autolock lock(&lock_); 108 109 int64_t local_common_time = 0; 110 common_clock_->localToCommon(local_time, &local_common_time); 111 int64_t raw_delta = nominal_common_time - local_common_time; 112 113 #ifdef TIME_SERVICE_DEBUG 114 ALOGE("local=%lld, common=%lld, delta=%lld, rtt=%lld\n", 115 local_common_time, nominal_common_time, 116 raw_delta, rtt); 117 #endif 118 119 // If we have not defined a basis for common time, then we need to use these 120 // initial points to do so. In order to avoid significant initial error 121 // from a particularly bad startup data point, we collect the first N data 122 // points and choose the best of them before moving on. 123 if (!common_clock_->isValid()) { 124 if (startup_filter_wr_ < kStartupFilterSize) { 125 DisciplineDataPoint& d = startup_filter_data_[startup_filter_wr_]; 126 d.local_time = local_time; 127 d.nominal_common_time = nominal_common_time; 128 d.rtt = rtt; 129 startup_filter_wr_++; 130 } 131 132 if (startup_filter_wr_ == kStartupFilterSize) { 133 uint32_t min_rtt = findMinRTTNdx(startup_filter_data_, 134 kStartupFilterSize); 135 136 common_clock_->setBasis( 137 startup_filter_data_[min_rtt].local_time, 138 startup_filter_data_[min_rtt].nominal_common_time); 139 } 140 141 return true; 142 } 143 144 int64_t observed_common; 145 int64_t delta; 146 float delta_f, dCO; 147 int32_t correction_cur; 148 149 if (OK != common_clock_->localToCommon(local_time, &observed_common)) { 150 // Since we just checked to make certain that this conversion was valid, 151 // and no one else in the system should be messing with it, if this 152 // conversion is suddenly invalid, it is a good reason to panic. 153 ALOGE("Failed to convert local time to common time in %s:%d", 154 __PRETTY_FUNCTION__, __LINE__); 155 return false; 156 } 157 158 // Implement a filter which should match NTP filtering behavior when a 159 // client is associated with only one peer of lower stratum. Basically, 160 // always use the best of the N last data points, where best is defined as 161 // lowest round trip time. NTP uses an N of 8; we use a value of 6. 162 // 163 // TODO(johngro) : experiment with other filter strategies. The goal here 164 // is to mitigate the effects of high RTT data points which typically have 165 // large asymmetries in the TX/RX legs. Downside of the existing NTP 166 // approach (particularly because of the PID controller we are using to 167 // produce the control signal from the filtered data) are that the rate at 168 // which discipline events are actually acted upon becomes irregular and can 169 // become drawn out (the time between actionable event can go way up). If 170 // the system receives a strong high quality data point, the proportional 171 // component of the controller can produce a strong correction which is left 172 // in place for too long causing overshoot. In addition, the integral 173 // component of the system currently is an approximation based on the 174 // assumption of a more or less homogeneous sampling of the error. Its 175 // unclear what the effect of undermining this assumption would be right 176 // now. 177 178 // Two ideas which come to mind immediately would be to... 179 // 1) Keep a history of more data points (32 or so) and ignore data points 180 // whose RTT is more than a certain number of standard deviations outside 181 // of the norm. 182 // 2) Eliminate the PID controller portion of this system entirely. 183 // Instead, move to a system which uses a very wide filter (128 data 184 // points or more) with a sum-of-least-squares line fitting approach to 185 // tracking the long term drift. This would take the place of the I 186 // component in the current PID controller. Also use a much more narrow 187 // outlier-rejector filter (as described in #1) to drive a short term 188 // correction factor similar to the P component of the PID controller. 189 assert(filter_wr_ < kFilterSize); 190 filter_data_[filter_wr_].local_time = local_time; 191 filter_data_[filter_wr_].observed_common_time = observed_common; 192 filter_data_[filter_wr_].nominal_common_time = nominal_common_time; 193 filter_data_[filter_wr_].rtt = rtt; 194 filter_data_[filter_wr_].point_used = false; 195 uint32_t current_point = filter_wr_; 196 filter_wr_ = (filter_wr_ + 1) % kFilterSize; 197 if (!filter_wr_) 198 filter_full_ = true; 199 200 uint32_t scan_end = filter_full_ ? kFilterSize : filter_wr_; 201 uint32_t min_rtt = findMinRTTNdx(filter_data_, scan_end); 202 // We only use packets with low RTTs for control. If the packet RTT 203 // is less than the panic threshold, we can probably eat the jitter with the 204 // control loop. Otherwise, take the packet only if it better than all 205 // of the packets we have in the history. That way we try to track 206 // something, even if it is noisy. 207 if (current_point == min_rtt || rtt < control_thresh_) { 208 delta_f = delta = nominal_common_time - observed_common; 209 210 // Compute the error then clamp to the panic threshold. If we ever 211 // exceed this amt of error, its time to panic and reset the system. 212 // Given that the error in the measurement of the error could be as 213 // high as the RTT of the data point, we don't actually panic until 214 // the implied error (delta) is greater than the absolute panic 215 // threashold plus the RTT. IOW - we don't panic until we are 216 // absoluely sure that our best case sync is worse than the absolute 217 // panic threshold. 218 int64_t effective_panic_thresh = panic_thresh_ + rtt; 219 if ((delta > effective_panic_thresh) || 220 (delta < -effective_panic_thresh)) { 221 // PANIC!!! 222 reset_l(false, true); 223 return false; 224 } 225 226 } else { 227 // We do not have a good packet to look at, but we also do not want to 228 // free-run the clock at some crazy slew rate. So we guess the 229 // trajectory of the clock based on the last controller output and the 230 // estimated bias of our clock against the master. 231 // The net effect of this is that CO == CObias after some extended 232 // period of no feedback. 233 delta_f = last_delta_f_ - dT*(CO - CObias); 234 delta = delta_f; 235 } 236 237 // Velocity form PI control equation. 238 dCO = Kc * (1.0f + dT/Ti) * delta_f - Kc * last_delta_f_; 239 CO += dCO * Tf; // Filter CO by applying gain <1 here. 240 241 // Save error terms for later. 242 last_delta_f_ = delta_f; 243 last_delta_ = delta; 244 245 // Clamp CO to +/- 100ppm. 246 if (CO < COmin) 247 CO = COmin; 248 else if (CO > COmax) 249 CO = COmax; 250 251 // Update the controller bias. 252 CObias = bias_Alpha * CO + (1.0f - bias_Alpha) * lastCObias; 253 lastCObias = CObias; 254 255 // Convert PPM to 16-bit int range. Add some guard band (-0.01) so we 256 // don't get fp weirdness. 257 correction_cur = CO * 327.66; 258 259 // If there was a change in the amt of correction to use, update the 260 // system. 261 if (correction_cur_ != correction_cur) { 262 correction_cur_ = correction_cur; 263 applySlew(); 264 } 265 266 LOG_TS("clock_loop %lld %f %f %f %d\n", raw_delta, delta_f, CO, CObias, correction_cur); 267 268 #ifdef TIME_SERVICE_DEBUG 269 diag_thread_->pushDisciplineEvent( 270 local_time, 271 observed_common, 272 nominal_common_time, 273 correction_cur, 274 rtt); 275 #endif 276 277 return true; 278 } 279 280 int32_t ClockRecoveryLoop::getLastErrorEstimate() { 281 Mutex::Autolock lock(&lock_); 282 283 if (last_delta_valid_) 284 return last_delta_; 285 else 286 return ICommonClock::kErrorEstimateUnknown; 287 } 288 289 void ClockRecoveryLoop::reset_l(bool position, bool frequency) { 290 assert(NULL != common_clock_); 291 292 if (position) { 293 common_clock_->resetBasis(); 294 startup_filter_wr_ = 0; 295 } 296 297 if (frequency) { 298 last_delta_valid_ = false; 299 last_delta_ = 0; 300 last_delta_f_ = 0.0; 301 correction_cur_ = 0x0; 302 CO = 0.0f; 303 lastCObias = CObias = 0.0f; 304 applySlew(); 305 } 306 307 filter_wr_ = 0; 308 filter_full_ = false; 309 } 310 311 void ClockRecoveryLoop::applySlew() { 312 if (local_clock_can_slew_) { 313 local_clock_->setLocalSlew(correction_cur_); 314 } else { 315 // The SW clock recovery implemented by the common clock class expects 316 // values expressed in PPM. CO is in ppm. 317 common_clock_->setSlew(local_clock_->getLocalTime(), CO); 318 } 319 } 320 321 } // namespace android 322