1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /* 18 * A service that exchanges time synchronization information between 19 * a master that defines a timeline and clients that follow the timeline. 20 */ 21 22 #define __STDC_LIMIT_MACROS 23 #define LOG_TAG "common_time" 24 #include <utils/Log.h> 25 #include <inttypes.h> 26 #include <stdint.h> 27 28 #include <common_time/local_clock.h> 29 #include <assert.h> 30 31 #include "clock_recovery.h" 32 #include "common_clock.h" 33 #ifdef TIME_SERVICE_DEBUG 34 #include "diag_thread.h" 35 #endif 36 37 // Define log macro so we can make LOGV into LOGE when we are exclusively 38 // debugging this code. 39 #ifdef TIME_SERVICE_DEBUG 40 #define LOG_TS ALOGE 41 #else 42 #define LOG_TS ALOGV 43 #endif 44 45 namespace android { 46 47 ClockRecoveryLoop::ClockRecoveryLoop(LocalClock* local_clock, 48 CommonClock* common_clock) { 49 assert(NULL != local_clock); 50 assert(NULL != common_clock); 51 52 local_clock_ = local_clock; 53 common_clock_ = common_clock; 54 55 local_clock_can_slew_ = local_clock_->initCheck() && 56 (local_clock_->setLocalSlew(0) == OK); 57 tgt_correction_ = 0; 58 cur_correction_ = 0; 59 60 // Precompute the max rate at which we are allowed to change the VCXO 61 // control. 62 uint64_t N = 0x10000ull * 1000ull; 63 uint64_t D = local_clock_->getLocalFreq() * kMinFullRangeSlewChange_mSec; 64 LinearTransform::reduce(&N, &D); 65 while ((N > INT32_MAX) || (D > UINT32_MAX)) { 66 N >>= 1; 67 D >>= 1; 68 LinearTransform::reduce(&N, &D); 69 } 70 time_to_cur_slew_.a_to_b_numer = static_cast<int32_t>(N); 71 time_to_cur_slew_.a_to_b_denom = static_cast<uint32_t>(D); 72 73 reset(true, true); 74 75 #ifdef TIME_SERVICE_DEBUG 76 diag_thread_ = new DiagThread(common_clock_, local_clock_); 77 if (diag_thread_ != NULL) { 78 status_t res = diag_thread_->startWorkThread(); 79 if (res != OK) 80 ALOGW("Failed to start A@H clock recovery diagnostic thread."); 81 } else 82 ALOGW("Failed to allocate diagnostic thread."); 83 #endif 84 } 85 86 ClockRecoveryLoop::~ClockRecoveryLoop() { 87 #ifdef TIME_SERVICE_DEBUG 88 diag_thread_->stopWorkThread(); 89 #endif 90 } 91 92 // Constants. 93 const float ClockRecoveryLoop::dT = 1.0; 94 const float ClockRecoveryLoop::Kc = 1.0f; 95 const float ClockRecoveryLoop::Ti = 15.0f; 96 const float ClockRecoveryLoop::Tf = 0.05; 97 const float ClockRecoveryLoop::bias_Fc = 0.01; 98 const float ClockRecoveryLoop::bias_RC = (dT / (2 * 3.14159f * bias_Fc)); 99 const float ClockRecoveryLoop::bias_Alpha = (dT / (bias_RC + dT)); 100 const int64_t ClockRecoveryLoop::panic_thresh_ = 50000; 101 const int64_t ClockRecoveryLoop::control_thresh_ = 10000; 102 const float ClockRecoveryLoop::COmin = -100.0f; 103 const float ClockRecoveryLoop::COmax = 100.0f; 104 const uint32_t ClockRecoveryLoop::kMinFullRangeSlewChange_mSec = 300; 105 const int ClockRecoveryLoop::kSlewChangeStepPeriod_mSec = 10; 106 107 108 void ClockRecoveryLoop::reset(bool position, bool frequency) { 109 Mutex::Autolock lock(&lock_); 110 reset_l(position, frequency); 111 } 112 113 uint32_t ClockRecoveryLoop::findMinRTTNdx(DisciplineDataPoint* data, 114 uint32_t count) { 115 uint32_t min_rtt = 0; 116 for (uint32_t i = 1; i < count; ++i) 117 if (data[min_rtt].rtt > data[i].rtt) 118 min_rtt = i; 119 120 return min_rtt; 121 } 122 123 bool ClockRecoveryLoop::pushDisciplineEvent(int64_t local_time, 124 int64_t nominal_common_time, 125 int64_t rtt) { 126 Mutex::Autolock lock(&lock_); 127 128 int64_t local_common_time = 0; 129 common_clock_->localToCommon(local_time, &local_common_time); 130 int64_t raw_delta = nominal_common_time - local_common_time; 131 132 #ifdef TIME_SERVICE_DEBUG 133 ALOGE("local=%lld, common=%lld, delta=%lld, rtt=%lld\n", 134 local_common_time, nominal_common_time, 135 raw_delta, rtt); 136 #endif 137 138 // If we have not defined a basis for common time, then we need to use these 139 // initial points to do so. In order to avoid significant initial error 140 // from a particularly bad startup data point, we collect the first N data 141 // points and choose the best of them before moving on. 142 if (!common_clock_->isValid()) { 143 if (startup_filter_wr_ < kStartupFilterSize) { 144 DisciplineDataPoint& d = startup_filter_data_[startup_filter_wr_]; 145 d.local_time = local_time; 146 d.nominal_common_time = nominal_common_time; 147 d.rtt = rtt; 148 startup_filter_wr_++; 149 } 150 151 if (startup_filter_wr_ == kStartupFilterSize) { 152 uint32_t min_rtt = findMinRTTNdx(startup_filter_data_, 153 kStartupFilterSize); 154 155 common_clock_->setBasis( 156 startup_filter_data_[min_rtt].local_time, 157 startup_filter_data_[min_rtt].nominal_common_time); 158 } 159 160 return true; 161 } 162 163 int64_t observed_common; 164 int64_t delta; 165 float delta_f, dCO; 166 int32_t tgt_correction; 167 168 if (OK != common_clock_->localToCommon(local_time, &observed_common)) { 169 // Since we just checked to make certain that this conversion was valid, 170 // and no one else in the system should be messing with it, if this 171 // conversion is suddenly invalid, it is a good reason to panic. 172 ALOGE("Failed to convert local time to common time in %s:%d", 173 __PRETTY_FUNCTION__, __LINE__); 174 return false; 175 } 176 177 // Implement a filter which should match NTP filtering behavior when a 178 // client is associated with only one peer of lower stratum. Basically, 179 // always use the best of the N last data points, where best is defined as 180 // lowest round trip time. NTP uses an N of 8; we use a value of 6. 181 // 182 // TODO(johngro) : experiment with other filter strategies. The goal here 183 // is to mitigate the effects of high RTT data points which typically have 184 // large asymmetries in the TX/RX legs. Downside of the existing NTP 185 // approach (particularly because of the PID controller we are using to 186 // produce the control signal from the filtered data) are that the rate at 187 // which discipline events are actually acted upon becomes irregular and can 188 // become drawn out (the time between actionable event can go way up). If 189 // the system receives a strong high quality data point, the proportional 190 // component of the controller can produce a strong correction which is left 191 // in place for too long causing overshoot. In addition, the integral 192 // component of the system currently is an approximation based on the 193 // assumption of a more or less homogeneous sampling of the error. Its 194 // unclear what the effect of undermining this assumption would be right 195 // now. 196 197 // Two ideas which come to mind immediately would be to... 198 // 1) Keep a history of more data points (32 or so) and ignore data points 199 // whose RTT is more than a certain number of standard deviations outside 200 // of the norm. 201 // 2) Eliminate the PID controller portion of this system entirely. 202 // Instead, move to a system which uses a very wide filter (128 data 203 // points or more) with a sum-of-least-squares line fitting approach to 204 // tracking the long term drift. This would take the place of the I 205 // component in the current PID controller. Also use a much more narrow 206 // outlier-rejector filter (as described in #1) to drive a short term 207 // correction factor similar to the P component of the PID controller. 208 assert(filter_wr_ < kFilterSize); 209 filter_data_[filter_wr_].local_time = local_time; 210 filter_data_[filter_wr_].observed_common_time = observed_common; 211 filter_data_[filter_wr_].nominal_common_time = nominal_common_time; 212 filter_data_[filter_wr_].rtt = rtt; 213 filter_data_[filter_wr_].point_used = false; 214 uint32_t current_point = filter_wr_; 215 filter_wr_ = (filter_wr_ + 1) % kFilterSize; 216 if (!filter_wr_) 217 filter_full_ = true; 218 219 uint32_t scan_end = filter_full_ ? kFilterSize : filter_wr_; 220 uint32_t min_rtt = findMinRTTNdx(filter_data_, scan_end); 221 // We only use packets with low RTTs for control. If the packet RTT 222 // is less than the panic threshold, we can probably eat the jitter with the 223 // control loop. Otherwise, take the packet only if it better than all 224 // of the packets we have in the history. That way we try to track 225 // something, even if it is noisy. 226 if (current_point == min_rtt || rtt < control_thresh_) { 227 delta_f = delta = nominal_common_time - observed_common; 228 229 last_error_est_valid_ = true; 230 last_error_est_usec_ = delta; 231 232 // Compute the error then clamp to the panic threshold. If we ever 233 // exceed this amt of error, its time to panic and reset the system. 234 // Given that the error in the measurement of the error could be as 235 // high as the RTT of the data point, we don't actually panic until 236 // the implied error (delta) is greater than the absolute panic 237 // threashold plus the RTT. IOW - we don't panic until we are 238 // absoluely sure that our best case sync is worse than the absolute 239 // panic threshold. 240 int64_t effective_panic_thresh = panic_thresh_ + rtt; 241 if ((delta > effective_panic_thresh) || 242 (delta < -effective_panic_thresh)) { 243 // PANIC!!! 244 reset_l(false, true); 245 return false; 246 } 247 248 } else { 249 // We do not have a good packet to look at, but we also do not want to 250 // free-run the clock at some crazy slew rate. So we guess the 251 // trajectory of the clock based on the last controller output and the 252 // estimated bias of our clock against the master. 253 // The net effect of this is that CO == CObias after some extended 254 // period of no feedback. 255 delta_f = last_delta_f_ - dT*(CO - CObias); 256 delta = delta_f; 257 } 258 259 // Velocity form PI control equation. 260 dCO = Kc * (1.0f + dT/Ti) * delta_f - Kc * last_delta_f_; 261 CO += dCO * Tf; // Filter CO by applying gain <1 here. 262 263 // Save error terms for later. 264 last_delta_f_ = delta_f; 265 266 // Clamp CO to +/- 100ppm. 267 if (CO < COmin) 268 CO = COmin; 269 else if (CO > COmax) 270 CO = COmax; 271 272 // Update the controller bias. 273 CObias = bias_Alpha * CO + (1.0f - bias_Alpha) * lastCObias; 274 lastCObias = CObias; 275 276 // Convert PPM to 16-bit int range. Add some guard band (-0.01) so we 277 // don't get fp weirdness. 278 tgt_correction = CO * 327.66; 279 280 // If there was a change in the amt of correction to use, update the 281 // system. 282 setTargetCorrection_l(tgt_correction); 283 284 LOG_TS("clock_loop %" PRId64 " %f %f %f %d\n", raw_delta, delta_f, CO, CObias, tgt_correction); 285 286 #ifdef TIME_SERVICE_DEBUG 287 diag_thread_->pushDisciplineEvent( 288 local_time, 289 observed_common, 290 nominal_common_time, 291 tgt_correction, 292 rtt); 293 #endif 294 295 return true; 296 } 297 298 int32_t ClockRecoveryLoop::getLastErrorEstimate() { 299 Mutex::Autolock lock(&lock_); 300 301 if (last_error_est_valid_) 302 return last_error_est_usec_; 303 else 304 return ICommonClock::kErrorEstimateUnknown; 305 } 306 307 void ClockRecoveryLoop::reset_l(bool position, bool frequency) { 308 assert(NULL != common_clock_); 309 310 if (position) { 311 common_clock_->resetBasis(); 312 startup_filter_wr_ = 0; 313 } 314 315 if (frequency) { 316 last_error_est_valid_ = false; 317 last_error_est_usec_ = 0; 318 last_delta_f_ = 0.0; 319 CO = 0.0f; 320 lastCObias = CObias = 0.0f; 321 setTargetCorrection_l(0); 322 applySlew_l(); 323 } 324 325 filter_wr_ = 0; 326 filter_full_ = false; 327 } 328 329 void ClockRecoveryLoop::setTargetCorrection_l(int32_t tgt) { 330 // When we make a change to the slew rate, we need to be careful to not 331 // change it too quickly as it can anger some HDMI sinks out there, notably 332 // some Sony panels from the 2010-2011 timeframe. From experimenting with 333 // some of these sinks, it seems like swinging from one end of the range to 334 // another in less that 190mSec or so can start to cause trouble. Adding in 335 // a hefty margin, we limit the system to a full range sweep in no less than 336 // 300mSec. 337 if (tgt_correction_ != tgt) { 338 int64_t now = local_clock_->getLocalTime(); 339 340 tgt_correction_ = tgt; 341 342 // Set up the transformation to figure out what the slew should be at 343 // any given point in time in the future. 344 time_to_cur_slew_.a_zero = now; 345 time_to_cur_slew_.b_zero = cur_correction_; 346 347 // Make sure the sign of the slope is headed in the proper direction. 348 bool needs_increase = (cur_correction_ < tgt_correction_); 349 bool is_increasing = (time_to_cur_slew_.a_to_b_numer > 0); 350 if (( needs_increase && !is_increasing) || 351 (!needs_increase && is_increasing)) { 352 time_to_cur_slew_.a_to_b_numer = -time_to_cur_slew_.a_to_b_numer; 353 } 354 355 // Finally, figure out when the change will be finished and start the 356 // slew operation. 357 time_to_cur_slew_.doReverseTransform(tgt_correction_, 358 &slew_change_end_time_); 359 360 applySlew_l(); 361 } 362 } 363 364 bool ClockRecoveryLoop::applySlew_l() { 365 bool ret = true; 366 367 // If cur == tgt, there is no ongoing sleq rate change and we are already 368 // finished. 369 if (cur_correction_ == tgt_correction_) 370 goto bailout; 371 372 if (local_clock_can_slew_) { 373 int64_t now = local_clock_->getLocalTime(); 374 int64_t tmp; 375 376 if (now >= slew_change_end_time_) { 377 cur_correction_ = tgt_correction_; 378 next_slew_change_timeout_.setTimeout(-1); 379 } else { 380 time_to_cur_slew_.doForwardTransform(now, &tmp); 381 382 if (tmp > INT16_MAX) 383 cur_correction_ = INT16_MAX; 384 else if (tmp < INT16_MIN) 385 cur_correction_ = INT16_MIN; 386 else 387 cur_correction_ = static_cast<int16_t>(tmp); 388 389 next_slew_change_timeout_.setTimeout(kSlewChangeStepPeriod_mSec); 390 ret = false; 391 } 392 393 local_clock_->setLocalSlew(cur_correction_); 394 } else { 395 // Since we are not actually changing the rate of a HW clock, we don't 396 // need to worry to much about changing the slew rate so fast that we 397 // anger any downstream HDMI devices. 398 cur_correction_ = tgt_correction_; 399 next_slew_change_timeout_.setTimeout(-1); 400 401 // The SW clock recovery implemented by the common clock class expects 402 // values expressed in PPM. CO is in ppm. 403 common_clock_->setSlew(local_clock_->getLocalTime(), CO); 404 } 405 406 bailout: 407 return ret; 408 } 409 410 int ClockRecoveryLoop::applyRateLimitedSlew() { 411 Mutex::Autolock lock(&lock_); 412 413 int ret = next_slew_change_timeout_.msecTillTimeout(); 414 if (!ret) { 415 if (applySlew_l()) 416 next_slew_change_timeout_.setTimeout(-1); 417 ret = next_slew_change_timeout_.msecTillTimeout(); 418 } 419 420 return ret; 421 } 422 423 } // namespace android 424