1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "gpu/command_buffer/service/gpu_tracer.h" 6 7 #include <deque> 8 9 #include "base/bind.h" 10 #include "base/debug/trace_event.h" 11 #include "base/strings/string_util.h" 12 #include "base/time/time.h" 13 #include "gpu/command_buffer/common/gles2_cmd_utils.h" 14 15 namespace gpu { 16 namespace gles2 { 17 18 static const unsigned int kProcessInterval = 16; 19 static TraceOutputter* g_outputter_thread = NULL; 20 21 scoped_refptr<TraceOutputter> TraceOutputter::Create(const std::string& name) { 22 if (!g_outputter_thread) { 23 g_outputter_thread = new TraceOutputter(name); 24 } 25 return g_outputter_thread; 26 } 27 28 TraceOutputter::TraceOutputter(const std::string& name) 29 : named_thread_(name.c_str()), local_trace_id_(0) { 30 named_thread_.Start(); 31 named_thread_.Stop(); 32 } 33 34 TraceOutputter::~TraceOutputter() { g_outputter_thread = NULL; } 35 36 void TraceOutputter::Trace(const std::string& name, 37 int64 start_time, 38 int64 end_time) { 39 TRACE_EVENT_COPY_BEGIN_WITH_ID_TID_AND_TIMESTAMP0( 40 TRACE_DISABLED_BY_DEFAULT("gpu.device"), 41 name.c_str(), 42 local_trace_id_, 43 named_thread_.thread_id(), 44 start_time); 45 TRACE_EVENT_COPY_END_WITH_ID_TID_AND_TIMESTAMP0( 46 TRACE_DISABLED_BY_DEFAULT("gpu.device"), 47 name.c_str(), 48 local_trace_id_, 49 named_thread_.thread_id(), 50 end_time); 51 ++local_trace_id_; 52 } 53 54 class NoopTrace : public Trace { 55 public: 56 explicit NoopTrace(const std::string& name) : Trace(name) {} 57 58 // Implementation of Tracer 59 virtual void Start() OVERRIDE { 60 TRACE_EVENT_COPY_ASYNC_BEGIN0( 61 TRACE_DISABLED_BY_DEFAULT("gpu.service"), name().c_str(), this); 62 } 63 virtual void End() OVERRIDE { 64 TRACE_EVENT_COPY_ASYNC_END0( 65 TRACE_DISABLED_BY_DEFAULT("gpu.service"), name().c_str(), this); 66 } 67 virtual bool IsAvailable() OVERRIDE { return true; } 68 virtual bool IsProcessable() OVERRIDE { return false; } 69 virtual void Process() OVERRIDE {} 70 71 private: 72 virtual ~NoopTrace() {} 73 74 DISALLOW_COPY_AND_ASSIGN(NoopTrace); 75 }; 76 77 struct TraceMarker { 78 TraceMarker(const std::string& name, GpuTracerSource source) 79 : name_(name), source_(source) {} 80 81 std::string name_; 82 GpuTracerSource source_; 83 scoped_refptr<Trace> trace_; 84 }; 85 86 class GPUTracerImpl 87 : public GPUTracer, 88 public base::SupportsWeakPtr<GPUTracerImpl> { 89 public: 90 GPUTracerImpl() 91 : gpu_trace_srv_category(TRACE_EVENT_API_GET_CATEGORY_GROUP_ENABLED( 92 TRACE_DISABLED_BY_DEFAULT("gpu.service"))), 93 gpu_trace_dev_category(TRACE_EVENT_API_GET_CATEGORY_GROUP_ENABLED( 94 TRACE_DISABLED_BY_DEFAULT("gpu.device"))), 95 gpu_executing_(false), 96 process_posted_(false) {} 97 virtual ~GPUTracerImpl() {} 98 99 // Implementation of gpu::gles2::GPUTracer 100 virtual bool BeginDecoding() OVERRIDE; 101 virtual bool EndDecoding() OVERRIDE; 102 virtual bool Begin(const std::string& name, GpuTracerSource source) OVERRIDE; 103 virtual bool End(GpuTracerSource source) OVERRIDE; 104 virtual const std::string& CurrentName() const OVERRIDE; 105 virtual bool IsTracing() OVERRIDE { 106 return (*gpu_trace_srv_category != 0) || (*gpu_trace_dev_category != 0); 107 } 108 virtual void CalculateTimerOffset() {} 109 110 // Process any completed traces. 111 virtual void Process(); 112 virtual void ProcessTraces(); 113 114 protected: 115 // Create a new trace. 116 virtual scoped_refptr<Trace> CreateTrace(const std::string& name); 117 118 const unsigned char* gpu_trace_srv_category; 119 const unsigned char* gpu_trace_dev_category; 120 121 protected: 122 void IssueProcessTask(); 123 124 std::vector<TraceMarker> markers_; 125 std::deque<scoped_refptr<Trace> > traces_; 126 127 bool gpu_executing_; 128 bool process_posted_; 129 130 DISALLOW_COPY_AND_ASSIGN(GPUTracerImpl); 131 }; 132 133 class GPUTracerARBTimerQuery : public GPUTracerImpl { 134 public: 135 explicit GPUTracerARBTimerQuery(gles2::GLES2Decoder* decoder); 136 virtual ~GPUTracerARBTimerQuery(); 137 138 // Implementation of GPUTracerImpl 139 virtual void ProcessTraces() OVERRIDE; 140 141 protected: 142 // Implementation of GPUTracerImpl. 143 virtual bool BeginDecoding() OVERRIDE; 144 virtual bool EndDecoding() OVERRIDE; 145 virtual scoped_refptr<Trace> CreateTrace(const std::string& name) OVERRIDE; 146 virtual void CalculateTimerOffset() OVERRIDE; 147 148 scoped_refptr<Outputter> outputter_; 149 150 bool gpu_timing_synced_; 151 int64 timer_offset_; 152 153 gles2::GLES2Decoder* decoder_; 154 155 DISALLOW_COPY_AND_ASSIGN(GPUTracerARBTimerQuery); 156 }; 157 158 bool Trace::IsProcessable() { return true; } 159 160 const std::string& Trace::name() { return name_; } 161 162 GLARBTimerTrace::GLARBTimerTrace(scoped_refptr<Outputter> outputter, 163 const std::string& name, 164 int64 offset) 165 : Trace(name), 166 outputter_(outputter), 167 offset_(offset), 168 start_time_(0), 169 end_time_(0), 170 end_requested_(false) { 171 glGenQueries(2, queries_); 172 } 173 174 GLARBTimerTrace::~GLARBTimerTrace() { glDeleteQueries(2, queries_); } 175 176 void GLARBTimerTrace::Start() { 177 TRACE_EVENT_COPY_ASYNC_BEGIN0( 178 TRACE_DISABLED_BY_DEFAULT("gpu.service"), name().c_str(), this); 179 glQueryCounter(queries_[0], GL_TIMESTAMP); 180 } 181 182 void GLARBTimerTrace::End() { 183 glQueryCounter(queries_[1], GL_TIMESTAMP); 184 end_requested_ = true; 185 TRACE_EVENT_COPY_ASYNC_END0( 186 TRACE_DISABLED_BY_DEFAULT("gpu.service"), name().c_str(), this); 187 } 188 189 bool GLARBTimerTrace::IsAvailable() { 190 if (!end_requested_) 191 return false; 192 193 GLint done = 0; 194 glGetQueryObjectiv(queries_[1], GL_QUERY_RESULT_AVAILABLE, &done); 195 return !!done; 196 } 197 198 void GLARBTimerTrace::Process() { 199 DCHECK(IsAvailable()); 200 201 GLuint64 timestamp; 202 203 // TODO(dsinclair): It's possible for the timer to wrap during the start/end. 204 // We need to detect if the end is less then the start and correct for the 205 // wrapping. 206 glGetQueryObjectui64v(queries_[0], GL_QUERY_RESULT, ×tamp); 207 start_time_ = (timestamp / base::Time::kNanosecondsPerMicrosecond) + offset_; 208 209 glGetQueryObjectui64v(queries_[1], GL_QUERY_RESULT, ×tamp); 210 end_time_ = (timestamp / base::Time::kNanosecondsPerMicrosecond) + offset_; 211 212 glDeleteQueries(2, queries_); 213 outputter_->Trace(name(), start_time_, end_time_); 214 } 215 216 bool GPUTracerImpl::BeginDecoding() { 217 if (gpu_executing_) 218 return false; 219 220 gpu_executing_ = true; 221 222 if (IsTracing()) { 223 // Begin a Trace for all active markers 224 for (size_t i = 0; i < markers_.size(); i++) { 225 markers_[i].trace_ = CreateTrace(markers_[i].name_); 226 markers_[i].trace_->Start(); 227 } 228 } 229 return true; 230 } 231 232 bool GPUTracerImpl::EndDecoding() { 233 if (!gpu_executing_) 234 return false; 235 236 // End Trace for all active markers 237 if (IsTracing()) { 238 for (size_t i = 0; i < markers_.size(); i++) { 239 if (markers_[i].trace_) { 240 markers_[i].trace_->End(); 241 if (markers_[i].trace_->IsProcessable()) 242 traces_.push_back(markers_[i].trace_); 243 markers_[i].trace_ = 0; 244 } 245 } 246 IssueProcessTask(); 247 } 248 249 gpu_executing_ = false; 250 return true; 251 } 252 253 bool GPUTracerImpl::Begin(const std::string& name, GpuTracerSource source) { 254 if (!gpu_executing_) 255 return false; 256 257 // Push new marker from given 'source' 258 markers_.push_back(TraceMarker(name, source)); 259 260 // Create trace 261 if (IsTracing()) { 262 scoped_refptr<Trace> trace = CreateTrace(name); 263 trace->Start(); 264 markers_.back().trace_ = trace; 265 } 266 return true; 267 } 268 269 bool GPUTracerImpl::End(GpuTracerSource source) { 270 if (!gpu_executing_) 271 return false; 272 273 // Pop last marker with matching 'source' 274 for (int i = markers_.size() - 1; i >= 0; i--) { 275 if (markers_[i].source_ == source) { 276 // End trace 277 if (IsTracing()) { 278 scoped_refptr<Trace> trace = markers_[i].trace_; 279 if (trace) { 280 trace->End(); 281 if (trace->IsProcessable()) 282 traces_.push_back(trace); 283 IssueProcessTask(); 284 } 285 } 286 287 markers_.erase(markers_.begin() + i); 288 return true; 289 } 290 } 291 return false; 292 } 293 294 void GPUTracerImpl::Process() { 295 process_posted_ = false; 296 ProcessTraces(); 297 IssueProcessTask(); 298 } 299 300 void GPUTracerImpl::ProcessTraces() { 301 while (!traces_.empty() && traces_.front()->IsAvailable()) { 302 traces_.front()->Process(); 303 traces_.pop_front(); 304 } 305 } 306 307 const std::string& GPUTracerImpl::CurrentName() const { 308 if (markers_.empty()) 309 return base::EmptyString(); 310 return markers_.back().name_; 311 } 312 313 scoped_refptr<Trace> GPUTracerImpl::CreateTrace(const std::string& name) { 314 return new NoopTrace(name); 315 } 316 317 void GPUTracerImpl::IssueProcessTask() { 318 if (traces_.empty() || process_posted_) 319 return; 320 321 process_posted_ = true; 322 base::MessageLoop::current()->PostDelayedTask( 323 FROM_HERE, 324 base::Bind(&GPUTracerImpl::Process, base::AsWeakPtr(this)), 325 base::TimeDelta::FromMilliseconds(kProcessInterval)); 326 } 327 328 GPUTracerARBTimerQuery::GPUTracerARBTimerQuery(gles2::GLES2Decoder* decoder) 329 : timer_offset_(0), decoder_(decoder) { 330 outputter_ = TraceOutputter::Create("GL_ARB_timer_query"); 331 } 332 333 GPUTracerARBTimerQuery::~GPUTracerARBTimerQuery() { 334 } 335 336 scoped_refptr<Trace> GPUTracerARBTimerQuery::CreateTrace( 337 const std::string& name) { 338 if (*gpu_trace_dev_category) 339 return new GLARBTimerTrace(outputter_, name, timer_offset_); 340 return GPUTracerImpl::CreateTrace(name); 341 } 342 343 bool GPUTracerARBTimerQuery::BeginDecoding() { 344 if (*gpu_trace_dev_category) { 345 // Make sure timing is synced before tracing 346 if (!gpu_timing_synced_) { 347 CalculateTimerOffset(); 348 gpu_timing_synced_ = true; 349 } 350 } else { 351 // If GPU device category is off, invalidate timing sync 352 gpu_timing_synced_ = false; 353 } 354 355 return GPUTracerImpl::BeginDecoding(); 356 } 357 358 bool GPUTracerARBTimerQuery::EndDecoding() { 359 bool ret = GPUTracerImpl::EndDecoding(); 360 361 // NOTE(vmiura_: glFlush() here can help give better trace results, 362 // but it distorts the normal device behavior. 363 return ret; 364 } 365 366 void GPUTracerARBTimerQuery::ProcessTraces() { 367 TRACE_EVENT0("gpu", "GPUTracerARBTimerQuery::ProcessTraces"); 368 369 // Make owning decoder's GL context current 370 if (!decoder_->MakeCurrent()) { 371 // Skip subsequent GL calls if MakeCurrent fails 372 traces_.clear(); 373 return; 374 } 375 376 while (!traces_.empty() && traces_.front()->IsAvailable()) { 377 traces_.front()->Process(); 378 traces_.pop_front(); 379 } 380 381 // Clear pending traces if there were are any errors 382 GLenum err = glGetError(); 383 if (err != GL_NO_ERROR) 384 traces_.clear(); 385 } 386 387 void GPUTracerARBTimerQuery::CalculateTimerOffset() { 388 TRACE_EVENT0("gpu", "GPUTracerARBTimerQuery::CalculateTimerOffset"); 389 390 // NOTE(vmiura): It would be better to use glGetInteger64v, however 391 // it's not available everywhere. 392 GLuint64 gl_now = 0; 393 GLuint query; 394 glFinish(); 395 glGenQueries(1, &query); 396 glQueryCounter(query, GL_TIMESTAMP); 397 glFinish(); 398 glGetQueryObjectui64v(query, GL_QUERY_RESULT, &gl_now); 399 base::TimeTicks system_now = base::TimeTicks::NowFromSystemTraceTime(); 400 401 gl_now /= base::Time::kNanosecondsPerMicrosecond; 402 timer_offset_ = system_now.ToInternalValue() - gl_now; 403 glDeleteQueries(1, &query); 404 } 405 406 GPUTracer::GPUTracer() {} 407 408 GPUTracer::~GPUTracer() {} 409 410 scoped_ptr<GPUTracer> GPUTracer::Create(gles2::GLES2Decoder* decoder) { 411 if (gfx::g_driver_gl.ext.b_GL_ARB_timer_query) { 412 return scoped_ptr<GPUTracer>(new GPUTracerARBTimerQuery(decoder)); 413 } 414 return scoped_ptr<GPUTracer>(new GPUTracerImpl()); 415 } 416 417 } // namespace gles2 418 } // namespace gpu 419