1 // Copyright 2010 the V8 project authors. All rights reserved. 2 // Redistribution and use in source and binary forms, with or without 3 // modification, are permitted provided that the following conditions are 4 // met: 5 // 6 // * Redistributions of source code must retain the above copyright 7 // notice, this list of conditions and the following disclaimer. 8 // * Redistributions in binary form must reproduce the above 9 // copyright notice, this list of conditions and the following 10 // disclaimer in the documentation and/or other materials provided 11 // with the distribution. 12 // * Neither the name of Google Inc. nor the names of its 13 // contributors may be used to endorse or promote products derived 14 // from this software without specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28 #include "v8.h" 29 30 #include "runtime-profiler.h" 31 32 #include "assembler.h" 33 #include "code-stubs.h" 34 #include "compilation-cache.h" 35 #include "deoptimizer.h" 36 #include "execution.h" 37 #include "global-handles.h" 38 #include "mark-compact.h" 39 #include "platform.h" 40 #include "scopeinfo.h" 41 42 namespace v8 { 43 namespace internal { 44 45 46 class PendingListNode : public Malloced { 47 public: 48 explicit PendingListNode(JSFunction* function); 49 ~PendingListNode() { Destroy(); } 50 51 PendingListNode* next() const { return next_; } 52 void set_next(PendingListNode* node) { next_ = node; } 53 Handle<JSFunction> function() { return Handle<JSFunction>::cast(function_); } 54 55 // If the function is garbage collected before we've had the chance 56 // to optimize it the weak handle will be null. 57 bool IsValid() { return !function_.is_null(); } 58 59 // Returns the number of microseconds this node has been pending. 60 int Delay() const { return static_cast<int>(OS::Ticks() - start_); } 61 62 private: 63 void Destroy(); 64 static void WeakCallback(v8::Persistent<v8::Value> object, void* data); 65 66 PendingListNode* next_; 67 Handle<Object> function_; // Weak handle. 68 int64_t start_; 69 }; 70 71 72 // Optimization sampler constants. 73 static const int kSamplerFrameCount = 2; 74 static const int kSamplerFrameWeight[kSamplerFrameCount] = { 2, 1 }; 75 76 static const int kSamplerTicksBetweenThresholdAdjustment = 32; 77 78 static const int kSamplerThresholdInit = 3; 79 static const int kSamplerThresholdMin = 1; 80 static const int kSamplerThresholdDelta = 1; 81 82 static const int kSamplerThresholdSizeFactorInit = 3; 83 static const int kSamplerThresholdSizeFactorMin = 1; 84 static const int kSamplerThresholdSizeFactorDelta = 1; 85 86 static const int kSizeLimit = 1500; 87 88 89 PendingListNode::PendingListNode(JSFunction* function) : next_(NULL) { 90 GlobalHandles* global_handles = Isolate::Current()->global_handles(); 91 function_ = global_handles->Create(function); 92 start_ = OS::Ticks(); 93 global_handles->MakeWeak(function_.location(), this, &WeakCallback); 94 } 95 96 97 void PendingListNode::Destroy() { 98 if (!IsValid()) return; 99 GlobalHandles* global_handles = Isolate::Current()->global_handles(); 100 global_handles->Destroy(function_.location()); 101 function_= Handle<Object>::null(); 102 } 103 104 105 void PendingListNode::WeakCallback(v8::Persistent<v8::Value>, void* data) { 106 reinterpret_cast<PendingListNode*>(data)->Destroy(); 107 } 108 109 110 Atomic32 RuntimeProfiler::state_ = 0; 111 // TODO(isolates): Create the semaphore lazily and clean it up when no 112 // longer required. 113 #ifdef ENABLE_LOGGING_AND_PROFILING 114 Semaphore* RuntimeProfiler::semaphore_ = OS::CreateSemaphore(0); 115 #endif 116 117 #ifdef DEBUG 118 bool RuntimeProfiler::has_been_globally_setup_ = false; 119 #endif 120 bool RuntimeProfiler::enabled_ = false; 121 122 123 RuntimeProfiler::RuntimeProfiler(Isolate* isolate) 124 : isolate_(isolate), 125 sampler_threshold_(kSamplerThresholdInit), 126 sampler_threshold_size_factor_(kSamplerThresholdSizeFactorInit), 127 sampler_ticks_until_threshold_adjustment_( 128 kSamplerTicksBetweenThresholdAdjustment), 129 js_ratio_(0), 130 sampler_window_position_(0), 131 optimize_soon_list_(NULL), 132 state_window_position_(0), 133 state_window_ticks_(0) { 134 state_counts_[IN_NON_JS_STATE] = kStateWindowSize; 135 state_counts_[IN_JS_STATE] = 0; 136 STATIC_ASSERT(IN_NON_JS_STATE == 0); 137 memset(state_window_, 0, sizeof(state_window_)); 138 ClearSampleBuffer(); 139 } 140 141 142 void RuntimeProfiler::GlobalSetup() { 143 ASSERT(!has_been_globally_setup_); 144 enabled_ = V8::UseCrankshaft() && FLAG_opt; 145 #ifdef DEBUG 146 has_been_globally_setup_ = true; 147 #endif 148 } 149 150 151 void RuntimeProfiler::Optimize(JSFunction* function, bool eager, int delay) { 152 ASSERT(function->IsOptimizable()); 153 if (FLAG_trace_opt) { 154 PrintF("[marking (%s) ", eager ? "eagerly" : "lazily"); 155 function->PrintName(); 156 PrintF(" for recompilation"); 157 if (delay > 0) { 158 PrintF(" (delayed %0.3f ms)", static_cast<double>(delay) / 1000); 159 } 160 PrintF("]\n"); 161 } 162 163 // The next call to the function will trigger optimization. 164 function->MarkForLazyRecompilation(); 165 } 166 167 168 void RuntimeProfiler::AttemptOnStackReplacement(JSFunction* function) { 169 // See AlwaysFullCompiler (in compiler.cc) comment on why we need 170 // Debug::has_break_points(). 171 ASSERT(function->IsMarkedForLazyRecompilation()); 172 if (!FLAG_use_osr || 173 isolate_->debug()->has_break_points() || 174 function->IsBuiltin()) { 175 return; 176 } 177 178 SharedFunctionInfo* shared = function->shared(); 179 // If the code is not optimizable or references context slots, don't try OSR. 180 if (!shared->code()->optimizable() || !shared->allows_lazy_compilation()) { 181 return; 182 } 183 184 // We are not prepared to do OSR for a function that already has an 185 // allocated arguments object. The optimized code would bypass it for 186 // arguments accesses, which is unsound. Don't try OSR. 187 if (shared->scope_info()->HasArgumentsShadow()) return; 188 189 // We're using on-stack replacement: patch the unoptimized code so that 190 // any back edge in any unoptimized frame will trigger on-stack 191 // replacement for that frame. 192 if (FLAG_trace_osr) { 193 PrintF("[patching stack checks in "); 194 function->PrintName(); 195 PrintF(" for on-stack replacement]\n"); 196 } 197 198 // Get the stack check stub code object to match against. We aren't 199 // prepared to generate it, but we don't expect to have to. 200 StackCheckStub check_stub; 201 Object* check_code; 202 MaybeObject* maybe_check_code = check_stub.TryGetCode(); 203 if (maybe_check_code->ToObject(&check_code)) { 204 Code* replacement_code = 205 isolate_->builtins()->builtin(Builtins::kOnStackReplacement); 206 Code* unoptimized_code = shared->code(); 207 Deoptimizer::PatchStackCheckCode(unoptimized_code, 208 Code::cast(check_code), 209 replacement_code); 210 } 211 } 212 213 214 void RuntimeProfiler::ClearSampleBuffer() { 215 memset(sampler_window_, 0, sizeof(sampler_window_)); 216 memset(sampler_window_weight_, 0, sizeof(sampler_window_weight_)); 217 } 218 219 220 int RuntimeProfiler::LookupSample(JSFunction* function) { 221 int weight = 0; 222 for (int i = 0; i < kSamplerWindowSize; i++) { 223 Object* sample = sampler_window_[i]; 224 if (sample != NULL) { 225 if (function == sample) { 226 weight += sampler_window_weight_[i]; 227 } 228 } 229 } 230 return weight; 231 } 232 233 234 void RuntimeProfiler::AddSample(JSFunction* function, int weight) { 235 ASSERT(IsPowerOf2(kSamplerWindowSize)); 236 sampler_window_[sampler_window_position_] = function; 237 sampler_window_weight_[sampler_window_position_] = weight; 238 sampler_window_position_ = (sampler_window_position_ + 1) & 239 (kSamplerWindowSize - 1); 240 } 241 242 243 void RuntimeProfiler::OptimizeNow() { 244 HandleScope scope(isolate_); 245 PendingListNode* current = optimize_soon_list_; 246 while (current != NULL) { 247 PendingListNode* next = current->next(); 248 if (current->IsValid()) { 249 Handle<JSFunction> function = current->function(); 250 int delay = current->Delay(); 251 if (function->IsOptimizable()) { 252 Optimize(*function, true, delay); 253 } 254 } 255 delete current; 256 current = next; 257 } 258 optimize_soon_list_ = NULL; 259 260 // Run through the JavaScript frames and collect them. If we already 261 // have a sample of the function, we mark it for optimizations 262 // (eagerly or lazily). 263 JSFunction* samples[kSamplerFrameCount]; 264 int sample_count = 0; 265 int frame_count = 0; 266 for (JavaScriptFrameIterator it(isolate_); 267 frame_count++ < kSamplerFrameCount && !it.done(); 268 it.Advance()) { 269 JavaScriptFrame* frame = it.frame(); 270 JSFunction* function = JSFunction::cast(frame->function()); 271 272 // Adjust threshold each time we have processed 273 // a certain number of ticks. 274 if (sampler_ticks_until_threshold_adjustment_ > 0) { 275 sampler_ticks_until_threshold_adjustment_--; 276 if (sampler_ticks_until_threshold_adjustment_ <= 0) { 277 // If the threshold is not already at the minimum 278 // modify and reset the ticks until next adjustment. 279 if (sampler_threshold_ > kSamplerThresholdMin) { 280 sampler_threshold_ -= kSamplerThresholdDelta; 281 sampler_ticks_until_threshold_adjustment_ = 282 kSamplerTicksBetweenThresholdAdjustment; 283 } 284 } 285 } 286 287 if (function->IsMarkedForLazyRecompilation()) { 288 Code* unoptimized = function->shared()->code(); 289 int nesting = unoptimized->allow_osr_at_loop_nesting_level(); 290 if (nesting == 0) AttemptOnStackReplacement(function); 291 int new_nesting = Min(nesting + 1, Code::kMaxLoopNestingMarker); 292 unoptimized->set_allow_osr_at_loop_nesting_level(new_nesting); 293 } 294 295 // Do not record non-optimizable functions. 296 if (!function->IsOptimizable()) continue; 297 samples[sample_count++] = function; 298 299 int function_size = function->shared()->SourceSize(); 300 int threshold_size_factor = (function_size > kSizeLimit) 301 ? sampler_threshold_size_factor_ 302 : 1; 303 304 int threshold = sampler_threshold_ * threshold_size_factor; 305 int current_js_ratio = NoBarrier_Load(&js_ratio_); 306 307 // Adjust threshold depending on the ratio of time spent 308 // in JS code. 309 if (current_js_ratio < 20) { 310 // If we spend less than 20% of the time in JS code, 311 // do not optimize. 312 continue; 313 } else if (current_js_ratio < 75) { 314 // Below 75% of time spent in JS code, only optimize very 315 // frequently used functions. 316 threshold *= 3; 317 } 318 319 if (LookupSample(function) >= threshold) { 320 Optimize(function, false, 0); 321 isolate_->compilation_cache()->MarkForEagerOptimizing( 322 Handle<JSFunction>(function)); 323 } 324 } 325 326 // Add the collected functions as samples. It's important not to do 327 // this as part of collecting them because this will interfere with 328 // the sample lookup in case of recursive functions. 329 for (int i = 0; i < sample_count; i++) { 330 AddSample(samples[i], kSamplerFrameWeight[i]); 331 } 332 } 333 334 335 void RuntimeProfiler::OptimizeSoon(JSFunction* function) { 336 if (!function->IsOptimizable()) return; 337 PendingListNode* node = new PendingListNode(function); 338 node->set_next(optimize_soon_list_); 339 optimize_soon_list_ = node; 340 } 341 342 343 #ifdef ENABLE_LOGGING_AND_PROFILING 344 void RuntimeProfiler::UpdateStateRatio(SamplerState current_state) { 345 SamplerState old_state = state_window_[state_window_position_]; 346 state_counts_[old_state]--; 347 state_window_[state_window_position_] = current_state; 348 state_counts_[current_state]++; 349 ASSERT(IsPowerOf2(kStateWindowSize)); 350 state_window_position_ = (state_window_position_ + 1) & 351 (kStateWindowSize - 1); 352 // Note: to calculate correct ratio we have to track how many valid 353 // ticks are actually in the state window, because on profiler 354 // startup this number can be less than the window size. 355 state_window_ticks_ = Min(kStateWindowSize, state_window_ticks_ + 1); 356 NoBarrier_Store(&js_ratio_, state_counts_[IN_JS_STATE] * 100 / 357 state_window_ticks_); 358 } 359 #endif 360 361 362 void RuntimeProfiler::NotifyTick() { 363 #ifdef ENABLE_LOGGING_AND_PROFILING 364 // Record state sample. 365 SamplerState state = IsSomeIsolateInJS() 366 ? IN_JS_STATE 367 : IN_NON_JS_STATE; 368 UpdateStateRatio(state); 369 isolate_->stack_guard()->RequestRuntimeProfilerTick(); 370 #endif 371 } 372 373 374 void RuntimeProfiler::Setup() { 375 ASSERT(has_been_globally_setup_); 376 ClearSampleBuffer(); 377 // If the ticker hasn't already started, make sure to do so to get 378 // the ticks for the runtime profiler. 379 if (IsEnabled()) isolate_->logger()->EnsureTickerStarted(); 380 } 381 382 383 void RuntimeProfiler::Reset() { 384 sampler_threshold_ = kSamplerThresholdInit; 385 sampler_threshold_size_factor_ = kSamplerThresholdSizeFactorInit; 386 sampler_ticks_until_threshold_adjustment_ = 387 kSamplerTicksBetweenThresholdAdjustment; 388 } 389 390 391 void RuntimeProfiler::TearDown() { 392 // Nothing to do. 393 } 394 395 396 int RuntimeProfiler::SamplerWindowSize() { 397 return kSamplerWindowSize; 398 } 399 400 401 // Update the pointers in the sampler window after a GC. 402 void RuntimeProfiler::UpdateSamplesAfterScavenge() { 403 for (int i = 0; i < kSamplerWindowSize; i++) { 404 Object* function = sampler_window_[i]; 405 if (function != NULL && isolate_->heap()->InNewSpace(function)) { 406 MapWord map_word = HeapObject::cast(function)->map_word(); 407 if (map_word.IsForwardingAddress()) { 408 sampler_window_[i] = map_word.ToForwardingAddress(); 409 } else { 410 sampler_window_[i] = NULL; 411 } 412 } 413 } 414 } 415 416 417 void RuntimeProfiler::HandleWakeUp(Isolate* isolate) { 418 #ifdef ENABLE_LOGGING_AND_PROFILING 419 // The profiler thread must still be waiting. 420 ASSERT(NoBarrier_Load(&state_) >= 0); 421 // In IsolateEnteredJS we have already incremented the counter and 422 // undid the decrement done by the profiler thread. Increment again 423 // to get the right count of active isolates. 424 NoBarrier_AtomicIncrement(&state_, 1); 425 semaphore_->Signal(); 426 isolate->ResetEagerOptimizingData(); 427 #endif 428 } 429 430 431 bool RuntimeProfiler::IsSomeIsolateInJS() { 432 return NoBarrier_Load(&state_) > 0; 433 } 434 435 436 bool RuntimeProfiler::WaitForSomeIsolateToEnterJS() { 437 #ifdef ENABLE_LOGGING_AND_PROFILING 438 Atomic32 old_state = NoBarrier_CompareAndSwap(&state_, 0, -1); 439 ASSERT(old_state >= -1); 440 if (old_state != 0) return false; 441 semaphore_->Wait(); 442 #endif 443 return true; 444 } 445 446 447 void RuntimeProfiler::WakeUpRuntimeProfilerThreadBeforeShutdown() { 448 #ifdef ENABLE_LOGGING_AND_PROFILING 449 semaphore_->Signal(); 450 #endif 451 } 452 453 454 void RuntimeProfiler::RemoveDeadSamples() { 455 for (int i = 0; i < kSamplerWindowSize; i++) { 456 Object* function = sampler_window_[i]; 457 if (function != NULL && !HeapObject::cast(function)->IsMarked()) { 458 sampler_window_[i] = NULL; 459 } 460 } 461 } 462 463 464 void RuntimeProfiler::UpdateSamplesAfterCompact(ObjectVisitor* visitor) { 465 for (int i = 0; i < kSamplerWindowSize; i++) { 466 visitor->VisitPointer(&sampler_window_[i]); 467 } 468 } 469 470 471 bool RuntimeProfilerRateLimiter::SuspendIfNecessary() { 472 #ifdef ENABLE_LOGGING_AND_PROFILING 473 static const int kNonJSTicksThreshold = 100; 474 if (RuntimeProfiler::IsSomeIsolateInJS()) { 475 non_js_ticks_ = 0; 476 } else { 477 if (non_js_ticks_ < kNonJSTicksThreshold) { 478 ++non_js_ticks_; 479 } else { 480 return RuntimeProfiler::WaitForSomeIsolateToEnterJS(); 481 } 482 } 483 #endif 484 return false; 485 } 486 487 488 } } // namespace v8::internal 489