1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "ui/surface/accelerated_surface_transformer_win.h" 6 7 #include <vector> 8 9 #include "accelerated_surface_transformer_win_hlsl_compiled.h" 10 #include "base/debug/trace_event.h" 11 #include "base/memory/ref_counted.h" 12 #include "base/metrics/histogram.h" 13 #include "base/single_thread_task_runner.h" 14 #include "base/synchronization/lock.h" 15 #include "base/synchronization/waitable_event.h" 16 #include "base/win/scoped_comptr.h" 17 #include "ui/gfx/native_widget_types.h" 18 #include "ui/gfx/rect.h" 19 #include "ui/gfx/size.h" 20 #include "ui/surface/d3d9_utils_win.h" 21 #include "ui/surface/surface_export.h" 22 23 using base::win::ScopedComPtr; 24 using std::vector; 25 using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertRGBtoY8UV44; 26 using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertUV44toU2V2; 27 using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsOneTexture; 28 using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kVsFetch2Pixels; 29 using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kVsFetch4Pixels; 30 using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kVsOneTexture; 31 using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kVsFetch4PixelsScale2; 32 using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertRGBtoY; 33 using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertRGBtoU; 34 using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertRGBtoV; 35 36 namespace d3d_utils = ui_surface_d3d9_utils; 37 38 namespace { 39 40 struct Vertex { 41 float x, y, z, w; 42 float u, v; 43 }; 44 45 const static D3DVERTEXELEMENT9 g_vertexElements[] = { 46 { 0, 0, D3DDECLTYPE_FLOAT4, 0, D3DDECLUSAGE_POSITION, 0 }, 47 { 0, 16, D3DDECLTYPE_FLOAT2, 0, D3DDECLUSAGE_TEXCOORD, 0 }, 48 D3DDECL_END() 49 }; 50 51 class ScopedRenderTargetRestorer { 52 public: 53 ScopedRenderTargetRestorer(IDirect3DDevice9* device, 54 int render_target_id) 55 : device_(device), 56 target_id_(render_target_id) { 57 device_->GetRenderTarget(target_id_, original_render_target_.Receive()); 58 } 59 ~ScopedRenderTargetRestorer() { 60 device_->SetRenderTarget(target_id_, original_render_target_); 61 } 62 private: 63 ScopedComPtr<IDirect3DDevice9> device_; 64 int target_id_; 65 ScopedComPtr<IDirect3DSurface9> original_render_target_; 66 }; 67 68 // Calculate the number necessary to transform |src_subrect| into |dst_size| 69 // by repeating downsampling of the image of |src_subrect| by a factor no more 70 // than 2. 71 int GetResampleCount(const gfx::Rect& src_subrect, 72 const gfx::Size& dst_size) { 73 // At least one copy is required, since the back buffer itself is not 74 // lockable. 75 int min_resample_count = 1; 76 int width_count = 0; 77 int width = src_subrect.width(); 78 while (width > dst_size.width()) { 79 ++width_count; 80 width >>= 1; 81 } 82 int height_count = 0; 83 int height = src_subrect.height(); 84 while (height > dst_size.height()) { 85 ++height_count; 86 height >>= 1; 87 } 88 return std::max(std::max(width_count, height_count), 89 min_resample_count); 90 } 91 92 // Returns half the size of |size| no smaller than |min_size|. 93 gfx::Size GetHalfSizeNoLessThan(const gfx::Size& size, 94 const gfx::Size& min_size) { 95 return gfx::Size(std::max(min_size.width(), size.width() / 2), 96 std::max(min_size.height(), size.height() / 2)); 97 } 98 99 } // namespace 100 101 AcceleratedSurfaceTransformer::AcceleratedSurfaceTransformer() 102 : device_supports_multiple_render_targets_(false), 103 vertex_shader_sources_(), 104 pixel_shader_sources_() { 105 106 // Associate passes with actual shader programs. 107 vertex_shader_sources_[ONE_TEXTURE] = kVsOneTexture; 108 pixel_shader_sources_[ONE_TEXTURE] = kPsOneTexture; 109 110 vertex_shader_sources_[RGB_TO_YV12_FAST__PASS_1_OF_2] = kVsFetch4Pixels; 111 pixel_shader_sources_[RGB_TO_YV12_FAST__PASS_1_OF_2] = kPsConvertRGBtoY8UV44; 112 113 vertex_shader_sources_[RGB_TO_YV12_FAST__PASS_2_OF_2] = kVsFetch2Pixels; 114 pixel_shader_sources_[RGB_TO_YV12_FAST__PASS_2_OF_2] = kPsConvertUV44toU2V2; 115 116 vertex_shader_sources_[RGB_TO_YV12_SLOW__PASS_1_OF_3] = kVsFetch4Pixels; 117 pixel_shader_sources_[RGB_TO_YV12_SLOW__PASS_1_OF_3] = kPsConvertRGBtoY; 118 119 vertex_shader_sources_[RGB_TO_YV12_SLOW__PASS_2_OF_3] = kVsFetch4PixelsScale2; 120 pixel_shader_sources_[RGB_TO_YV12_SLOW__PASS_2_OF_3] = kPsConvertRGBtoU; 121 122 vertex_shader_sources_[RGB_TO_YV12_SLOW__PASS_3_OF_3] = kVsFetch4PixelsScale2; 123 pixel_shader_sources_[RGB_TO_YV12_SLOW__PASS_3_OF_3] = kPsConvertRGBtoV; 124 125 COMPILE_ASSERT(NUM_SHADERS == 6, must_initialize_shader_sources); 126 } 127 128 bool AcceleratedSurfaceTransformer::Init(IDirect3DDevice9* device) { 129 bool result = DoInit(device); 130 if (!result) { 131 ReleaseAll(); 132 } 133 return result; 134 } 135 136 bool AcceleratedSurfaceTransformer::DoInit(IDirect3DDevice9* device) { 137 device_ = device; 138 139 { 140 D3DCAPS9 caps; 141 HRESULT hr = device->GetDeviceCaps(&caps); 142 if (FAILED(hr)) 143 return false; 144 145 device_supports_multiple_render_targets_ = (caps.NumSimultaneousRTs >= 2); 146 147 // Log statistics about which paths we take. 148 UMA_HISTOGRAM_BOOLEAN("GPU.AcceleratedSurfaceTransformerCanUseMRT", 149 device_supports_multiple_render_targets()); 150 } 151 152 // Force compilation of all shaders that could be used on this GPU. 153 if (!CompileShaderCombo(ONE_TEXTURE)) 154 return false; 155 156 if (device_supports_multiple_render_targets()) { 157 if (!CompileShaderCombo(RGB_TO_YV12_FAST__PASS_1_OF_2) || 158 !CompileShaderCombo(RGB_TO_YV12_FAST__PASS_2_OF_2)) { 159 return false; 160 } 161 } else { 162 if (!CompileShaderCombo(RGB_TO_YV12_SLOW__PASS_1_OF_3) || 163 !CompileShaderCombo(RGB_TO_YV12_SLOW__PASS_2_OF_3) || 164 !CompileShaderCombo(RGB_TO_YV12_SLOW__PASS_3_OF_3)) { 165 return false; 166 } 167 } 168 COMPILE_ASSERT(NUM_SHADERS == 6, must_compile_at_doinit); 169 170 ScopedComPtr<IDirect3DVertexDeclaration9> vertex_declaration; 171 HRESULT hr = device_->CreateVertexDeclaration(g_vertexElements, 172 vertex_declaration.Receive()); 173 if (FAILED(hr)) 174 return false; 175 hr = device_->SetVertexDeclaration(vertex_declaration); 176 if (FAILED(hr)) 177 return false; 178 179 return true; 180 } 181 182 bool AcceleratedSurfaceTransformer::CompileShaderCombo( 183 ShaderCombo shader) { 184 if (!vertex_shaders_[shader]) { 185 HRESULT hr = device_->CreateVertexShader( 186 reinterpret_cast<const DWORD*>(vertex_shader_sources_[shader]), 187 vertex_shaders_[shader].Receive()); 188 189 if (FAILED(hr)) 190 return false; 191 192 for (int i = 0; i < NUM_SHADERS; ++i) { 193 if (vertex_shader_sources_[i] == vertex_shader_sources_[shader] && 194 i != shader) { 195 vertex_shaders_[i] = vertex_shaders_[shader]; 196 } 197 } 198 } 199 200 if (!pixel_shaders_[shader]) { 201 HRESULT hr = device_->CreatePixelShader( 202 reinterpret_cast<const DWORD*>(pixel_shader_sources_[shader]), 203 pixel_shaders_[shader].Receive()); 204 205 if (FAILED(hr)) 206 return false; 207 208 for (int i = 0; i < NUM_SHADERS; ++i) { 209 if (pixel_shader_sources_[i] == pixel_shader_sources_[shader] && 210 i != shader) { 211 pixel_shaders_[i] = pixel_shaders_[shader]; 212 } 213 } 214 } 215 216 return true; 217 } 218 219 void AcceleratedSurfaceTransformer::ReleaseAll() { 220 for (int i = 0; i < NUM_SHADERS; i++) { 221 vertex_shaders_[i] = NULL; 222 pixel_shaders_[i] = NULL; 223 } 224 225 user_scratch_texture_ = NULL; 226 uv_scratch_texture_ = NULL; 227 y_scratch_surface_ = NULL; 228 u_scratch_surface_ = NULL; 229 v_scratch_surface_ = NULL; 230 for (int i = 0; i < arraysize(scaler_scratch_surfaces_); i++) 231 scaler_scratch_surfaces_[i] = NULL; 232 233 device_ = NULL; 234 } 235 void AcceleratedSurfaceTransformer::DetachAll() { 236 for (int i = 0; i < NUM_SHADERS; i++) { 237 vertex_shaders_[i].Detach(); 238 pixel_shaders_[i].Detach(); 239 } 240 241 user_scratch_texture_.Detach(); 242 uv_scratch_texture_.Detach(); 243 y_scratch_surface_.Detach(); 244 u_scratch_surface_.Detach(); 245 v_scratch_surface_.Detach(); 246 for (int i = 0; i < arraysize(scaler_scratch_surfaces_); i++) 247 scaler_scratch_surfaces_[i].Detach(); 248 249 device_.Detach(); 250 } 251 252 bool AcceleratedSurfaceTransformer::CopyInverted( 253 IDirect3DTexture9* src_texture, 254 IDirect3DSurface9* dst_surface, 255 const gfx::Size& dst_size) { 256 return CopyWithTextureScale(src_texture, dst_surface, dst_size, 1.0f, -1.0f); 257 } 258 259 bool AcceleratedSurfaceTransformer::Copy( 260 IDirect3DTexture9* src_texture, 261 IDirect3DSurface9* dst_surface, 262 const gfx::Size& dst_size) { 263 return CopyWithTextureScale(src_texture, dst_surface, dst_size, 1.0f, 1.0f); 264 } 265 266 bool AcceleratedSurfaceTransformer::CopyWithTextureScale( 267 IDirect3DTexture9* src_texture, 268 IDirect3DSurface9* dst_surface, 269 const gfx::Size& dst_size, 270 float texture_scale_x, 271 float texture_scale_y) { 272 273 if (!SetShaderCombo(ONE_TEXTURE)) 274 return false; 275 276 // Set the kTextureScale vertex shader constant, which is assigned to 277 // register 1. 278 float texture_scale[4] = {texture_scale_x, texture_scale_y, 0, 0}; 279 device()->SetVertexShaderConstantF(1, texture_scale, 1); 280 281 ScopedRenderTargetRestorer render_target_restorer(device(), 0); 282 device()->SetRenderTarget(0, dst_surface); 283 device()->SetTexture(0, src_texture); 284 285 D3DVIEWPORT9 viewport = { 286 0, 0, 287 dst_size.width(), dst_size.height(), 288 0, 1 289 }; 290 device()->SetViewport(&viewport); 291 292 if (d3d_utils::GetSize(src_texture) == dst_size) { 293 device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT); 294 device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT); 295 } else { 296 device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR); 297 device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); 298 } 299 device()->SetSamplerState(0, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP); 300 device()->SetSamplerState(0, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP); 301 302 DrawScreenAlignedQuad(dst_size); 303 304 // Clear surface references. 305 device()->SetTexture(0, NULL); 306 return true; 307 } 308 309 void AcceleratedSurfaceTransformer::DrawScreenAlignedQuad( 310 const gfx::Size& size) { 311 const float target_size[4] = { size.width(), size.height(), 0, 0}; 312 313 // Set the uniform shader constant |kRenderTargetSize|, which is bound 314 // to register c0. 315 device()->SetVertexShaderConstantF(0, target_size, 1); 316 317 // We always send down the same vertices. The vertex program will take 318 // care of doing resolution-dependent position adjustment. 319 Vertex vertices[] = { 320 { -1, +1, 0.5f, 1, 0, 0 }, 321 { +1, +1, 0.5f, 1, 1, 0 }, 322 { +1, -1, 0.5f, 1, 1, 1 }, 323 { -1, -1, 0.5f, 1, 0, 1 } 324 }; 325 326 device()->BeginScene(); 327 device()->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 328 2, 329 vertices, 330 sizeof(vertices[0])); 331 device()->EndScene(); 332 333 } 334 335 bool AcceleratedSurfaceTransformer::GetIntermediateTexture( 336 const gfx::Size& size, 337 IDirect3DTexture9** texture, 338 IDirect3DSurface9** texture_level_zero) { 339 if (!d3d_utils::CreateOrReuseRenderTargetTexture(device(), 340 size, 341 &user_scratch_texture_, 342 texture_level_zero)) 343 return false; 344 345 *texture = ScopedComPtr<IDirect3DTexture9>(user_scratch_texture_).Detach(); 346 return true; 347 } 348 349 // Resize an RGB surface using repeated linear interpolation. 350 bool AcceleratedSurfaceTransformer::ResizeBilinear( 351 IDirect3DSurface9* src_surface, 352 const gfx::Rect& src_subrect, 353 IDirect3DSurface9* dst_surface, 354 const gfx::Rect& dst_rect) { 355 COMPILE_ASSERT(arraysize(scaler_scratch_surfaces_) == 2, surface_count); 356 357 gfx::Size src_size = src_subrect.size(); 358 gfx::Size dst_size = dst_rect.size(); 359 360 if (src_size.IsEmpty() || dst_size.IsEmpty()) 361 return false; 362 363 HRESULT hr = S_OK; 364 // Set up intermediate buffers needed for downsampling. 365 const int resample_count = GetResampleCount(src_subrect, dst_size); 366 const gfx::Size half_size = 367 GetHalfSizeNoLessThan(src_subrect.size(), dst_size); 368 if (resample_count > 1) { 369 if (!d3d_utils::CreateOrReuseLockableSurface(device(), 370 half_size, 371 &scaler_scratch_surfaces_[0])) 372 return false; 373 } 374 if (resample_count > 2) { 375 const gfx::Size quarter_size = GetHalfSizeNoLessThan(half_size, dst_size); 376 if (!d3d_utils::CreateOrReuseLockableSurface(device(), 377 quarter_size, 378 &scaler_scratch_surfaces_[1])) 379 return false; 380 } 381 382 // Repeat downsampling the surface until its size becomes identical to 383 // |dst_size|. We keep the factor of each downsampling no more than two 384 // because using a factor more than two can introduce aliasing. 385 RECT read_rect = src_subrect.ToRECT(); 386 gfx::Size write_size = half_size; 387 int read_buffer_index = 1; 388 int write_buffer_index = 0; 389 for (int i = 0; i < resample_count; ++i) { 390 TRACE_EVENT0("gpu", "StretchRect"); 391 IDirect3DSurface9* read_buffer = 392 (i == 0) ? src_surface : scaler_scratch_surfaces_[read_buffer_index]; 393 IDirect3DSurface9* write_buffer; 394 RECT write_rect; 395 if (i == resample_count - 1) { 396 write_buffer = dst_surface; 397 write_rect = dst_rect.ToRECT(); 398 } else { 399 write_buffer = scaler_scratch_surfaces_[write_buffer_index]; 400 write_rect = gfx::Rect(write_size).ToRECT(); 401 } 402 403 hr = device()->StretchRect(read_buffer, 404 &read_rect, 405 write_buffer, 406 &write_rect, 407 D3DTEXF_LINEAR); 408 409 if (FAILED(hr)) 410 return false; 411 read_rect = write_rect; 412 write_size = GetHalfSizeNoLessThan(write_size, dst_size); 413 std::swap(read_buffer_index, write_buffer_index); 414 } 415 416 return true; 417 } 418 419 bool AcceleratedSurfaceTransformer::TransformRGBToYV12( 420 IDirect3DTexture9* src_surface, 421 const gfx::Size& dst_size, 422 IDirect3DSurface9** dst_y, 423 IDirect3DSurface9** dst_u, 424 IDirect3DSurface9** dst_v) { 425 gfx::Size packed_y_size; 426 gfx::Size packed_uv_size; 427 if (!AllocYUVBuffers(dst_size, &packed_y_size, &packed_uv_size, 428 dst_y, dst_u, dst_v)) { 429 return false; 430 } 431 432 if (device_supports_multiple_render_targets()) { 433 return TransformRGBToYV12_MRT(src_surface, 434 dst_size, 435 packed_y_size, 436 packed_uv_size, 437 *dst_y, 438 *dst_u, 439 *dst_v); 440 } else { 441 return TransformRGBToYV12_WithoutMRT(src_surface, 442 dst_size, 443 packed_y_size, 444 packed_uv_size, 445 *dst_y, 446 *dst_u, 447 *dst_v); 448 } 449 } 450 451 bool AcceleratedSurfaceTransformer::ReadFast(IDirect3DSurface9* gpu_surface, 452 uint8* dst, 453 int dst_bytes_per_row, 454 int dst_num_rows, 455 int dst_stride) { 456 // TODO(nick): Compared to GetRenderTargetData, LockRect+memcpy is 50% faster 457 // on some systems, but 100x slower on others. We should have logic here to 458 // choose the best path, probably by adaptively trying both and picking the 459 // faster one. http://crbug.com/168532 460 return ReadByGetRenderTargetData(gpu_surface, dst, dst_bytes_per_row, 461 dst_num_rows, dst_stride); 462 } 463 464 bool AcceleratedSurfaceTransformer::ReadByLockAndCopy( 465 IDirect3DSurface9* gpu_surface, 466 uint8* dst, 467 int dst_bytes_per_row, 468 int dst_num_rows, 469 int dst_stride) { 470 D3DLOCKED_RECT locked_rect; 471 { 472 TRACE_EVENT0("gpu", "LockRect"); 473 HRESULT hr = gpu_surface->LockRect(&locked_rect, NULL, 474 D3DLOCK_READONLY | D3DLOCK_NOSYSLOCK); 475 if (FAILED(hr)) { 476 LOG(ERROR) << "Failed to lock surface"; 477 return false; 478 } 479 } 480 481 { 482 TRACE_EVENT0("gpu", "memcpy"); 483 uint8* dst_row = dst; 484 uint8* src_row = reinterpret_cast<uint8*>(locked_rect.pBits); 485 for (int i = 0; i < dst_num_rows; i++) { 486 memcpy(dst_row, src_row, dst_bytes_per_row); 487 src_row += locked_rect.Pitch; 488 dst_row += dst_stride; 489 } 490 } 491 gpu_surface->UnlockRect(); 492 return true; 493 } 494 495 bool AcceleratedSurfaceTransformer::ReadByGetRenderTargetData( 496 IDirect3DSurface9* gpu_surface, 497 uint8* dst, 498 int dst_bytes_per_row, 499 int dst_num_rows, 500 int dst_stride) { 501 HRESULT hr = 0; 502 ScopedComPtr<IDirect3DSurface9> system_surface; 503 gfx::Size src_size = d3d_utils::GetSize(gpu_surface); 504 505 // Depending on pitch and alignment, we might be able to wrap |dst| in an 506 // offscreen- plain surface for a direct copy. 507 const bool direct_copy = (dst_stride == dst_bytes_per_row && 508 src_size.width() * 4 == dst_bytes_per_row && 509 dst_num_rows >= src_size.height()); 510 511 { 512 TRACE_EVENT0("gpu", "CreateOffscreenPlainSurface"); 513 HANDLE handle = reinterpret_cast<HANDLE>(dst); 514 hr = device()->CreateOffscreenPlainSurface(src_size.width(), 515 src_size.height(), 516 D3DFMT_A8R8G8B8, 517 D3DPOOL_SYSTEMMEM, 518 system_surface.Receive(), 519 direct_copy ? &handle : NULL); 520 if (!SUCCEEDED(hr)) { 521 LOG(ERROR) << "Failed to create offscreen plain surface."; 522 return false; 523 } 524 } 525 526 { 527 TRACE_EVENT0("gpu", "GetRenderTargetData"); 528 hr = device()->GetRenderTargetData(gpu_surface, system_surface); 529 if (FAILED(hr)) { 530 LOG(ERROR) << "Failed GetRenderTargetData"; 531 return false; 532 } 533 } 534 535 if (direct_copy) { 536 // We're done: |system_surface| is a wrapper around |dst|. 537 return true; 538 } else { 539 // Extra memcpy required from |system_surface| to |dst|. 540 return ReadByLockAndCopy(system_surface, dst, dst_bytes_per_row, 541 dst_num_rows, dst_stride); 542 } 543 } 544 545 bool AcceleratedSurfaceTransformer::AllocYUVBuffers( 546 const gfx::Size& dst_size, 547 gfx::Size* y_size, 548 gfx::Size* uv_size, 549 IDirect3DSurface9** dst_y, 550 IDirect3DSurface9** dst_u, 551 IDirect3DSurface9** dst_v) { 552 553 // Y is full height, packed into 4 components. 554 *y_size = gfx::Size((dst_size.width() + 3) / 4, dst_size.height()); 555 556 // U and V are half the size (rounded up) of Y. 557 *uv_size = gfx::Size((y_size->width() + 1) / 2, (y_size->height() + 1) / 2); 558 559 if (!d3d_utils::CreateOrReuseLockableSurface(device(), *y_size, 560 &y_scratch_surface_)) { 561 return false; 562 } 563 if (!d3d_utils::CreateOrReuseLockableSurface(device(), *uv_size, 564 &u_scratch_surface_)) { 565 return false; 566 } 567 if (!d3d_utils::CreateOrReuseLockableSurface(device(), *uv_size, 568 &v_scratch_surface_)) { 569 return false; 570 } 571 572 *dst_y = ScopedComPtr<IDirect3DSurface9>(y_scratch_surface_).Detach(); 573 *dst_u = ScopedComPtr<IDirect3DSurface9>(u_scratch_surface_).Detach(); 574 *dst_v = ScopedComPtr<IDirect3DSurface9>(v_scratch_surface_).Detach(); 575 576 return true; 577 } 578 579 bool AcceleratedSurfaceTransformer::TransformRGBToYV12_MRT( 580 IDirect3DTexture9* src_surface, 581 const gfx::Size& dst_size, 582 const gfx::Size& packed_y_size, 583 const gfx::Size& packed_uv_size, 584 IDirect3DSurface9* dst_y, 585 IDirect3DSurface9* dst_u, 586 IDirect3DSurface9* dst_v) { 587 TRACE_EVENT0("gpu", "RGBToYV12_MRT"); 588 589 ScopedRenderTargetRestorer color0_restorer(device(), 0); 590 ScopedRenderTargetRestorer color1_restorer(device(), 1); 591 592 // Create an intermediate surface to hold the UUVV values. This is color 593 // target 1 for the first pass, and texture 0 for the second pass. Its 594 // values are not read afterwards. 595 596 ScopedComPtr<IDirect3DSurface9> uv_as_surface; 597 if (!d3d_utils::CreateOrReuseRenderTargetTexture(device(), 598 packed_y_size, 599 &uv_scratch_texture_, 600 uv_as_surface.Receive())) { 601 return false; 602 } 603 604 // Clamping is required if (dst_size.width() % 8 != 0) or if 605 // (dst_size.height != 0), so we set it always. Both passes rely on this. 606 device()->SetSamplerState(0, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP); 607 device()->SetSamplerState(0, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP); 608 609 ///////////////////////////////////////// 610 // Pass 1: RGB --(scaled)--> YYYY + UUVV 611 SetShaderCombo(RGB_TO_YV12_FAST__PASS_1_OF_2); 612 613 // Enable bilinear filtering if scaling is required. The filtering will take 614 // place entirely in the first pass. 615 if (d3d_utils::GetSize(src_surface) != dst_size) { 616 device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR); 617 device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); 618 } else { 619 device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT); 620 device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT); 621 } 622 623 device()->SetTexture(0, src_surface); 624 device()->SetRenderTarget(0, dst_y); 625 device()->SetRenderTarget(1, uv_as_surface); 626 DrawScreenAlignedQuad(dst_size); 627 628 ///////////////////////////////////////// 629 // Pass 2: UUVV -> UUUU + VVVV 630 SetShaderCombo(RGB_TO_YV12_FAST__PASS_2_OF_2); 631 632 // The second pass uses bilinear minification to achieve vertical scaling, 633 // so enable it always. 634 device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT); 635 device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); 636 637 device()->SetTexture(0, uv_scratch_texture_); 638 device()->SetRenderTarget(0, dst_u); 639 device()->SetRenderTarget(1, dst_v); 640 DrawScreenAlignedQuad(packed_y_size); 641 642 // Clear surface references. 643 device()->SetTexture(0, NULL); 644 return true; 645 } 646 647 bool AcceleratedSurfaceTransformer::TransformRGBToYV12_WithoutMRT( 648 IDirect3DTexture9* src_surface, 649 const gfx::Size& dst_size, 650 const gfx::Size& packed_y_size, 651 const gfx::Size& packed_uv_size, 652 IDirect3DSurface9* dst_y, 653 IDirect3DSurface9* dst_u, 654 IDirect3DSurface9* dst_v) { 655 TRACE_EVENT0("gpu", "RGBToYV12_WithoutMRT"); 656 657 ScopedRenderTargetRestorer color0_restorer(device(), 0); 658 659 ScopedComPtr<IDirect3DTexture9> scaled_src_surface; 660 661 // If scaling is requested, do it to a temporary texture. The MRT path 662 // gets a scale for free, so we need to support it here too (even though 663 // it's an extra operation). 664 if (d3d_utils::GetSize(src_surface) == dst_size) { 665 scaled_src_surface = src_surface; 666 } else { 667 ScopedComPtr<IDirect3DSurface9> dst_level0; 668 if (!d3d_utils::CreateOrReuseRenderTargetTexture( 669 device(), dst_size, &uv_scratch_texture_, dst_level0.Receive())) { 670 return false; 671 } 672 if (!Copy(src_surface, dst_level0, dst_size)) { 673 return false; 674 } 675 scaled_src_surface = uv_scratch_texture_; 676 } 677 678 // Input texture is the same for all three passes. 679 device()->SetTexture(0, scaled_src_surface); 680 681 // Clamping is required if (dst_size.width() % 8 != 0) or if 682 // (dst_size.height != 0), so we set it always. All passes rely on this. 683 device()->SetSamplerState(0, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP); 684 device()->SetSamplerState(0, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP); 685 686 ///////////////////// 687 // Pass 1: RGB -> Y. 688 SetShaderCombo(RGB_TO_YV12_SLOW__PASS_1_OF_3); 689 690 // Pass 1 just needs point sampling. 691 device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT); 692 device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT); 693 694 device()->SetRenderTarget(0, dst_y); 695 DrawScreenAlignedQuad(dst_size); 696 697 // Passes 2 and 3 rely on bilinear minification to downsample U and V. 698 device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT); 699 device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); 700 701 ///////////////////// 702 // Pass 2: RGB -> U. 703 SetShaderCombo(RGB_TO_YV12_SLOW__PASS_2_OF_3); 704 device()->SetRenderTarget(0, dst_u); 705 DrawScreenAlignedQuad(dst_size); 706 707 ///////////////////// 708 // Pass 3: RGB -> V. 709 SetShaderCombo(RGB_TO_YV12_SLOW__PASS_3_OF_3); 710 device()->SetRenderTarget(0, dst_v); 711 DrawScreenAlignedQuad(dst_size); 712 713 // Clear surface references. 714 device()->SetTexture(0, NULL); 715 return true; 716 } 717 718 IDirect3DDevice9* AcceleratedSurfaceTransformer::device() { 719 return device_; 720 } 721 722 bool AcceleratedSurfaceTransformer::SetShaderCombo(ShaderCombo combo) { 723 // Compile shaders on first use, if needed. Normally the compilation should 724 // already have happened at Init() time, but test code might force 725 // us down an unusual path. 726 if (!CompileShaderCombo(combo)) 727 return false; 728 729 HRESULT hr = device()->SetVertexShader(vertex_shaders_[combo]); 730 if (!SUCCEEDED(hr)) 731 return false; 732 hr = device()->SetPixelShader(pixel_shaders_[combo]); 733 if (!SUCCEEDED(hr)) 734 return false; 735 return true; 736 } 737