Home | History | Annotate | Download | only in surface
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "ui/surface/accelerated_surface_transformer_win.h"
      6 
      7 #include <vector>
      8 
      9 #include "accelerated_surface_transformer_win_hlsl_compiled.h"
     10 #include "base/debug/trace_event.h"
     11 #include "base/memory/ref_counted.h"
     12 #include "base/metrics/histogram.h"
     13 #include "base/single_thread_task_runner.h"
     14 #include "base/synchronization/lock.h"
     15 #include "base/synchronization/waitable_event.h"
     16 #include "base/win/scoped_comptr.h"
     17 #include "ui/gfx/native_widget_types.h"
     18 #include "ui/gfx/rect.h"
     19 #include "ui/gfx/size.h"
     20 #include "ui/surface/d3d9_utils_win.h"
     21 #include "ui/surface/surface_export.h"
     22 
     23 using base::win::ScopedComPtr;
     24 using std::vector;
     25 using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertRGBtoY8UV44;
     26 using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertUV44toU2V2;
     27 using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsOneTexture;
     28 using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kVsFetch2Pixels;
     29 using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kVsFetch4Pixels;
     30 using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kVsOneTexture;
     31 using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kVsFetch4PixelsScale2;
     32 using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertRGBtoY;
     33 using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertRGBtoU;
     34 using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertRGBtoV;
     35 
     36 namespace d3d_utils = ui_surface_d3d9_utils;
     37 
     38 namespace {
     39 
     40 struct Vertex {
     41   float x, y, z, w;
     42   float u, v;
     43 };
     44 
     45 const static D3DVERTEXELEMENT9 g_vertexElements[] = {
     46   { 0, 0, D3DDECLTYPE_FLOAT4, 0, D3DDECLUSAGE_POSITION, 0 },
     47   { 0, 16, D3DDECLTYPE_FLOAT2, 0, D3DDECLUSAGE_TEXCOORD, 0 },
     48   D3DDECL_END()
     49 };
     50 
     51 class ScopedRenderTargetRestorer {
     52  public:
     53   ScopedRenderTargetRestorer(IDirect3DDevice9* device,
     54                              int render_target_id)
     55     : device_(device),
     56       target_id_(render_target_id) {
     57     device_->GetRenderTarget(target_id_, original_render_target_.Receive());
     58   }
     59   ~ScopedRenderTargetRestorer() {
     60     device_->SetRenderTarget(target_id_, original_render_target_);
     61   }
     62  private:
     63   ScopedComPtr<IDirect3DDevice9> device_;
     64   int target_id_;
     65   ScopedComPtr<IDirect3DSurface9> original_render_target_;
     66 };
     67 
     68 // Calculate the number necessary to transform |src_subrect| into |dst_size|
     69 // by repeating downsampling of the image of |src_subrect| by a factor no more
     70 // than 2.
     71 int GetResampleCount(const gfx::Rect& src_subrect,
     72                      const gfx::Size& dst_size) {
     73   // At least one copy is required, since the back buffer itself is not
     74   // lockable.
     75   int min_resample_count = 1;
     76   int width_count = 0;
     77   int width = src_subrect.width();
     78   while (width > dst_size.width()) {
     79     ++width_count;
     80     width >>= 1;
     81   }
     82   int height_count = 0;
     83   int height = src_subrect.height();
     84   while (height > dst_size.height()) {
     85     ++height_count;
     86     height >>= 1;
     87   }
     88   return std::max(std::max(width_count, height_count),
     89                   min_resample_count);
     90 }
     91 
     92 // Returns half the size of |size| no smaller than |min_size|.
     93 gfx::Size GetHalfSizeNoLessThan(const gfx::Size& size,
     94                                 const gfx::Size& min_size) {
     95   return gfx::Size(std::max(min_size.width(), size.width() / 2),
     96                    std::max(min_size.height(), size.height() / 2));
     97 }
     98 
     99 }  // namespace
    100 
    101 AcceleratedSurfaceTransformer::AcceleratedSurfaceTransformer()
    102     : device_supports_multiple_render_targets_(false),
    103       vertex_shader_sources_(),
    104       pixel_shader_sources_() {
    105 
    106   // Associate passes with actual shader programs.
    107   vertex_shader_sources_[ONE_TEXTURE] = kVsOneTexture;
    108   pixel_shader_sources_[ONE_TEXTURE] = kPsOneTexture;
    109 
    110   vertex_shader_sources_[RGB_TO_YV12_FAST__PASS_1_OF_2] = kVsFetch4Pixels;
    111   pixel_shader_sources_[RGB_TO_YV12_FAST__PASS_1_OF_2] = kPsConvertRGBtoY8UV44;
    112 
    113   vertex_shader_sources_[RGB_TO_YV12_FAST__PASS_2_OF_2] = kVsFetch2Pixels;
    114   pixel_shader_sources_[RGB_TO_YV12_FAST__PASS_2_OF_2] = kPsConvertUV44toU2V2;
    115 
    116   vertex_shader_sources_[RGB_TO_YV12_SLOW__PASS_1_OF_3] = kVsFetch4Pixels;
    117   pixel_shader_sources_[RGB_TO_YV12_SLOW__PASS_1_OF_3] = kPsConvertRGBtoY;
    118 
    119   vertex_shader_sources_[RGB_TO_YV12_SLOW__PASS_2_OF_3] = kVsFetch4PixelsScale2;
    120   pixel_shader_sources_[RGB_TO_YV12_SLOW__PASS_2_OF_3] = kPsConvertRGBtoU;
    121 
    122   vertex_shader_sources_[RGB_TO_YV12_SLOW__PASS_3_OF_3] = kVsFetch4PixelsScale2;
    123   pixel_shader_sources_[RGB_TO_YV12_SLOW__PASS_3_OF_3] = kPsConvertRGBtoV;
    124 
    125   COMPILE_ASSERT(NUM_SHADERS == 6, must_initialize_shader_sources);
    126 }
    127 
    128 bool AcceleratedSurfaceTransformer::Init(IDirect3DDevice9* device) {
    129   bool result = DoInit(device);
    130   if (!result) {
    131     ReleaseAll();
    132   }
    133   return result;
    134 }
    135 
    136 bool AcceleratedSurfaceTransformer::DoInit(IDirect3DDevice9* device) {
    137   device_ = device;
    138 
    139   {
    140     D3DCAPS9 caps;
    141     HRESULT hr = device->GetDeviceCaps(&caps);
    142     if (FAILED(hr))
    143       return false;
    144 
    145     device_supports_multiple_render_targets_ = (caps.NumSimultaneousRTs >= 2);
    146 
    147     // Log statistics about which paths we take.
    148     UMA_HISTOGRAM_BOOLEAN("GPU.AcceleratedSurfaceTransformerCanUseMRT",
    149                           device_supports_multiple_render_targets());
    150   }
    151 
    152   // Force compilation of all shaders that could be used on this GPU.
    153   if (!CompileShaderCombo(ONE_TEXTURE))
    154     return false;
    155 
    156   if (device_supports_multiple_render_targets()) {
    157     if (!CompileShaderCombo(RGB_TO_YV12_FAST__PASS_1_OF_2) ||
    158         !CompileShaderCombo(RGB_TO_YV12_FAST__PASS_2_OF_2)) {
    159       return false;
    160     }
    161   } else {
    162     if (!CompileShaderCombo(RGB_TO_YV12_SLOW__PASS_1_OF_3) ||
    163         !CompileShaderCombo(RGB_TO_YV12_SLOW__PASS_2_OF_3) ||
    164         !CompileShaderCombo(RGB_TO_YV12_SLOW__PASS_3_OF_3)) {
    165       return false;
    166     }
    167   }
    168   COMPILE_ASSERT(NUM_SHADERS == 6, must_compile_at_doinit);
    169 
    170   ScopedComPtr<IDirect3DVertexDeclaration9> vertex_declaration;
    171   HRESULT hr = device_->CreateVertexDeclaration(g_vertexElements,
    172                                                 vertex_declaration.Receive());
    173   if (FAILED(hr))
    174     return false;
    175   hr = device_->SetVertexDeclaration(vertex_declaration);
    176   if (FAILED(hr))
    177     return false;
    178 
    179   return true;
    180 }
    181 
    182 bool AcceleratedSurfaceTransformer::CompileShaderCombo(
    183     ShaderCombo shader) {
    184   if (!vertex_shaders_[shader]) {
    185     HRESULT hr = device_->CreateVertexShader(
    186         reinterpret_cast<const DWORD*>(vertex_shader_sources_[shader]),
    187         vertex_shaders_[shader].Receive());
    188 
    189     if (FAILED(hr))
    190       return false;
    191 
    192     for (int i = 0; i < NUM_SHADERS; ++i) {
    193       if (vertex_shader_sources_[i] == vertex_shader_sources_[shader] &&
    194           i != shader) {
    195         vertex_shaders_[i] = vertex_shaders_[shader];
    196       }
    197     }
    198   }
    199 
    200   if (!pixel_shaders_[shader]) {
    201     HRESULT hr = device_->CreatePixelShader(
    202         reinterpret_cast<const DWORD*>(pixel_shader_sources_[shader]),
    203         pixel_shaders_[shader].Receive());
    204 
    205     if (FAILED(hr))
    206       return false;
    207 
    208     for (int i = 0; i < NUM_SHADERS; ++i) {
    209       if (pixel_shader_sources_[i] == pixel_shader_sources_[shader] &&
    210           i != shader) {
    211         pixel_shaders_[i] = pixel_shaders_[shader];
    212       }
    213     }
    214   }
    215 
    216   return true;
    217 }
    218 
    219 void AcceleratedSurfaceTransformer::ReleaseAll() {
    220   for (int i = 0; i < NUM_SHADERS; i++) {
    221     vertex_shaders_[i] = NULL;
    222     pixel_shaders_[i] = NULL;
    223   }
    224 
    225   user_scratch_texture_ = NULL;
    226   uv_scratch_texture_ = NULL;
    227   y_scratch_surface_ = NULL;
    228   u_scratch_surface_ = NULL;
    229   v_scratch_surface_ = NULL;
    230   for (int i = 0; i < arraysize(scaler_scratch_surfaces_); i++)
    231     scaler_scratch_surfaces_[i] = NULL;
    232 
    233   device_ = NULL;
    234 }
    235 void AcceleratedSurfaceTransformer::DetachAll() {
    236   for (int i = 0; i < NUM_SHADERS; i++) {
    237     vertex_shaders_[i].Detach();
    238     pixel_shaders_[i].Detach();
    239   }
    240 
    241   user_scratch_texture_.Detach();
    242   uv_scratch_texture_.Detach();
    243   y_scratch_surface_.Detach();
    244   u_scratch_surface_.Detach();
    245   v_scratch_surface_.Detach();
    246   for (int i = 0; i < arraysize(scaler_scratch_surfaces_); i++)
    247     scaler_scratch_surfaces_[i].Detach();
    248 
    249   device_.Detach();
    250 }
    251 
    252 bool AcceleratedSurfaceTransformer::CopyInverted(
    253     IDirect3DTexture9* src_texture,
    254     IDirect3DSurface9* dst_surface,
    255     const gfx::Size& dst_size) {
    256   return CopyWithTextureScale(src_texture, dst_surface, dst_size, 1.0f, -1.0f);
    257 }
    258 
    259 bool AcceleratedSurfaceTransformer::Copy(
    260     IDirect3DTexture9* src_texture,
    261     IDirect3DSurface9* dst_surface,
    262     const gfx::Size& dst_size) {
    263   return CopyWithTextureScale(src_texture, dst_surface, dst_size, 1.0f, 1.0f);
    264 }
    265 
    266 bool AcceleratedSurfaceTransformer::CopyWithTextureScale(
    267     IDirect3DTexture9* src_texture,
    268     IDirect3DSurface9* dst_surface,
    269     const gfx::Size& dst_size,
    270     float texture_scale_x,
    271     float texture_scale_y) {
    272 
    273   if (!SetShaderCombo(ONE_TEXTURE))
    274     return false;
    275 
    276   // Set the kTextureScale vertex shader constant, which is assigned to
    277   // register 1.
    278   float texture_scale[4] = {texture_scale_x, texture_scale_y, 0, 0};
    279   device()->SetVertexShaderConstantF(1, texture_scale, 1);
    280 
    281   ScopedRenderTargetRestorer render_target_restorer(device(), 0);
    282   device()->SetRenderTarget(0, dst_surface);
    283   device()->SetTexture(0, src_texture);
    284 
    285   D3DVIEWPORT9 viewport = {
    286     0, 0,
    287     dst_size.width(), dst_size.height(),
    288     0, 1
    289   };
    290   device()->SetViewport(&viewport);
    291 
    292   if (d3d_utils::GetSize(src_texture) == dst_size) {
    293     device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
    294     device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT);
    295   } else {
    296     device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR);
    297     device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);
    298   }
    299   device()->SetSamplerState(0, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
    300   device()->SetSamplerState(0, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);
    301 
    302   DrawScreenAlignedQuad(dst_size);
    303 
    304   // Clear surface references.
    305   device()->SetTexture(0, NULL);
    306   return true;
    307 }
    308 
    309 void AcceleratedSurfaceTransformer::DrawScreenAlignedQuad(
    310     const gfx::Size& size) {
    311   const float target_size[4] = { size.width(), size.height(), 0, 0};
    312 
    313   // Set the uniform shader constant |kRenderTargetSize|, which is bound
    314   // to register c0.
    315   device()->SetVertexShaderConstantF(0, target_size, 1);
    316 
    317   // We always send down the same vertices. The vertex program will take
    318   // care of doing resolution-dependent position adjustment.
    319   Vertex vertices[] = {
    320     { -1, +1, 0.5f, 1, 0, 0 },
    321     { +1, +1, 0.5f, 1, 1, 0 },
    322     { +1, -1, 0.5f, 1, 1, 1 },
    323     { -1, -1, 0.5f, 1, 0, 1 }
    324   };
    325 
    326   device()->BeginScene();
    327   device()->DrawPrimitiveUP(D3DPT_TRIANGLEFAN,
    328                             2,
    329                             vertices,
    330                             sizeof(vertices[0]));
    331   device()->EndScene();
    332 
    333 }
    334 
    335 bool AcceleratedSurfaceTransformer::GetIntermediateTexture(
    336     const gfx::Size& size,
    337     IDirect3DTexture9** texture,
    338     IDirect3DSurface9** texture_level_zero) {
    339   if (!d3d_utils::CreateOrReuseRenderTargetTexture(device(),
    340                                                    size,
    341                                                    &user_scratch_texture_,
    342                                                    texture_level_zero))
    343     return false;
    344 
    345   *texture = ScopedComPtr<IDirect3DTexture9>(user_scratch_texture_).Detach();
    346   return true;
    347 }
    348 
    349 // Resize an RGB surface using repeated linear interpolation.
    350 bool AcceleratedSurfaceTransformer::ResizeBilinear(
    351     IDirect3DSurface9* src_surface,
    352     const gfx::Rect& src_subrect,
    353     IDirect3DSurface9* dst_surface,
    354     const gfx::Rect& dst_rect) {
    355   COMPILE_ASSERT(arraysize(scaler_scratch_surfaces_) == 2, surface_count);
    356 
    357   gfx::Size src_size = src_subrect.size();
    358   gfx::Size dst_size = dst_rect.size();
    359 
    360   if (src_size.IsEmpty() || dst_size.IsEmpty())
    361     return false;
    362 
    363   HRESULT hr = S_OK;
    364   // Set up intermediate buffers needed for downsampling.
    365   const int resample_count = GetResampleCount(src_subrect, dst_size);
    366   const gfx::Size half_size =
    367       GetHalfSizeNoLessThan(src_subrect.size(), dst_size);
    368   if (resample_count > 1) {
    369     if (!d3d_utils::CreateOrReuseLockableSurface(device(),
    370                                                  half_size,
    371                                                  &scaler_scratch_surfaces_[0]))
    372       return false;
    373   }
    374   if (resample_count > 2) {
    375     const gfx::Size quarter_size = GetHalfSizeNoLessThan(half_size, dst_size);
    376     if (!d3d_utils::CreateOrReuseLockableSurface(device(),
    377                                                  quarter_size,
    378                                                  &scaler_scratch_surfaces_[1]))
    379       return false;
    380   }
    381 
    382   // Repeat downsampling the surface until its size becomes identical to
    383   // |dst_size|. We keep the factor of each downsampling no more than two
    384   // because using a factor more than two can introduce aliasing.
    385   RECT read_rect = src_subrect.ToRECT();
    386   gfx::Size write_size = half_size;
    387   int read_buffer_index = 1;
    388   int write_buffer_index = 0;
    389   for (int i = 0; i < resample_count; ++i) {
    390     TRACE_EVENT0("gpu", "StretchRect");
    391     IDirect3DSurface9* read_buffer =
    392         (i == 0) ? src_surface : scaler_scratch_surfaces_[read_buffer_index];
    393     IDirect3DSurface9* write_buffer;
    394     RECT write_rect;
    395     if (i == resample_count - 1) {
    396       write_buffer = dst_surface;
    397       write_rect = dst_rect.ToRECT();
    398     } else {
    399       write_buffer = scaler_scratch_surfaces_[write_buffer_index];
    400       write_rect = gfx::Rect(write_size).ToRECT();
    401     }
    402 
    403     hr = device()->StretchRect(read_buffer,
    404                                &read_rect,
    405                                write_buffer,
    406                                &write_rect,
    407                                D3DTEXF_LINEAR);
    408 
    409     if (FAILED(hr))
    410       return false;
    411     read_rect = write_rect;
    412     write_size = GetHalfSizeNoLessThan(write_size, dst_size);
    413     std::swap(read_buffer_index, write_buffer_index);
    414   }
    415 
    416   return true;
    417 }
    418 
    419 bool AcceleratedSurfaceTransformer::TransformRGBToYV12(
    420     IDirect3DTexture9* src_surface,
    421     const gfx::Size& dst_size,
    422     IDirect3DSurface9** dst_y,
    423     IDirect3DSurface9** dst_u,
    424     IDirect3DSurface9** dst_v) {
    425   gfx::Size packed_y_size;
    426   gfx::Size packed_uv_size;
    427   if (!AllocYUVBuffers(dst_size, &packed_y_size, &packed_uv_size,
    428                        dst_y, dst_u, dst_v)) {
    429     return false;
    430   }
    431 
    432   if (device_supports_multiple_render_targets()) {
    433     return TransformRGBToYV12_MRT(src_surface,
    434                                   dst_size,
    435                                   packed_y_size,
    436                                   packed_uv_size,
    437                                   *dst_y,
    438                                   *dst_u,
    439                                   *dst_v);
    440   } else {
    441     return TransformRGBToYV12_WithoutMRT(src_surface,
    442                                          dst_size,
    443                                          packed_y_size,
    444                                          packed_uv_size,
    445                                          *dst_y,
    446                                          *dst_u,
    447                                          *dst_v);
    448   }
    449 }
    450 
    451 bool AcceleratedSurfaceTransformer::ReadFast(IDirect3DSurface9* gpu_surface,
    452                                              uint8* dst,
    453                                              int dst_bytes_per_row,
    454                                              int dst_num_rows,
    455                                              int dst_stride) {
    456   // TODO(nick): Compared to GetRenderTargetData, LockRect+memcpy is 50% faster
    457   // on some systems, but 100x slower on others. We should have logic here to
    458   // choose the best path, probably by adaptively trying both and picking the
    459   // faster one. http://crbug.com/168532
    460   return ReadByGetRenderTargetData(gpu_surface, dst, dst_bytes_per_row,
    461                                    dst_num_rows, dst_stride);
    462 }
    463 
    464 bool AcceleratedSurfaceTransformer::ReadByLockAndCopy(
    465     IDirect3DSurface9* gpu_surface,
    466     uint8* dst,
    467     int dst_bytes_per_row,
    468     int dst_num_rows,
    469     int dst_stride) {
    470   D3DLOCKED_RECT locked_rect;
    471   {
    472     TRACE_EVENT0("gpu", "LockRect");
    473     HRESULT hr = gpu_surface->LockRect(&locked_rect, NULL,
    474                                        D3DLOCK_READONLY | D3DLOCK_NOSYSLOCK);
    475     if (FAILED(hr)) {
    476       LOG(ERROR) << "Failed to lock surface";
    477       return false;
    478     }
    479   }
    480 
    481   {
    482     TRACE_EVENT0("gpu", "memcpy");
    483     uint8* dst_row = dst;
    484     uint8* src_row = reinterpret_cast<uint8*>(locked_rect.pBits);
    485     for (int i = 0; i < dst_num_rows; i++) {
    486       memcpy(dst_row, src_row, dst_bytes_per_row);
    487       src_row += locked_rect.Pitch;
    488       dst_row += dst_stride;
    489     }
    490   }
    491   gpu_surface->UnlockRect();
    492   return true;
    493 }
    494 
    495 bool AcceleratedSurfaceTransformer::ReadByGetRenderTargetData(
    496     IDirect3DSurface9* gpu_surface,
    497     uint8* dst,
    498     int dst_bytes_per_row,
    499     int dst_num_rows,
    500     int dst_stride) {
    501   HRESULT hr = 0;
    502   ScopedComPtr<IDirect3DSurface9> system_surface;
    503   gfx::Size src_size = d3d_utils::GetSize(gpu_surface);
    504 
    505   // Depending on pitch and alignment, we might be able to wrap |dst| in an
    506   // offscreen- plain surface for a direct copy.
    507   const bool direct_copy = (dst_stride == dst_bytes_per_row &&
    508                             src_size.width() * 4 == dst_bytes_per_row &&
    509                             dst_num_rows >= src_size.height());
    510 
    511   {
    512     TRACE_EVENT0("gpu", "CreateOffscreenPlainSurface");
    513     HANDLE handle = reinterpret_cast<HANDLE>(dst);
    514     hr = device()->CreateOffscreenPlainSurface(src_size.width(),
    515                                                src_size.height(),
    516                                                D3DFMT_A8R8G8B8,
    517                                                D3DPOOL_SYSTEMMEM,
    518                                                system_surface.Receive(),
    519                                                direct_copy ? &handle : NULL);
    520     if (!SUCCEEDED(hr)) {
    521       LOG(ERROR) << "Failed to create offscreen plain surface.";
    522       return false;
    523     }
    524   }
    525 
    526   {
    527     TRACE_EVENT0("gpu", "GetRenderTargetData");
    528     hr = device()->GetRenderTargetData(gpu_surface, system_surface);
    529     if (FAILED(hr)) {
    530       LOG(ERROR) << "Failed GetRenderTargetData";
    531       return false;
    532     }
    533   }
    534 
    535   if (direct_copy) {
    536     // We're done: |system_surface| is a wrapper around |dst|.
    537     return true;
    538   } else {
    539     // Extra memcpy required from |system_surface| to |dst|.
    540     return ReadByLockAndCopy(system_surface, dst, dst_bytes_per_row,
    541                              dst_num_rows, dst_stride);
    542   }
    543 }
    544 
    545 bool AcceleratedSurfaceTransformer::AllocYUVBuffers(
    546     const gfx::Size& dst_size,
    547     gfx::Size* y_size,
    548     gfx::Size* uv_size,
    549     IDirect3DSurface9** dst_y,
    550     IDirect3DSurface9** dst_u,
    551     IDirect3DSurface9** dst_v) {
    552 
    553   // Y is full height, packed into 4 components.
    554   *y_size = gfx::Size((dst_size.width() + 3) / 4, dst_size.height());
    555 
    556   // U and V are half the size (rounded up) of Y.
    557   *uv_size = gfx::Size((y_size->width() + 1) / 2, (y_size->height() + 1) / 2);
    558 
    559   if (!d3d_utils::CreateOrReuseLockableSurface(device(), *y_size,
    560                                                &y_scratch_surface_)) {
    561     return false;
    562   }
    563   if (!d3d_utils::CreateOrReuseLockableSurface(device(), *uv_size,
    564                                                &u_scratch_surface_)) {
    565     return false;
    566   }
    567   if (!d3d_utils::CreateOrReuseLockableSurface(device(), *uv_size,
    568                                                &v_scratch_surface_)) {
    569     return false;
    570   }
    571 
    572   *dst_y = ScopedComPtr<IDirect3DSurface9>(y_scratch_surface_).Detach();
    573   *dst_u = ScopedComPtr<IDirect3DSurface9>(u_scratch_surface_).Detach();
    574   *dst_v = ScopedComPtr<IDirect3DSurface9>(v_scratch_surface_).Detach();
    575 
    576   return true;
    577 }
    578 
    579 bool AcceleratedSurfaceTransformer::TransformRGBToYV12_MRT(
    580     IDirect3DTexture9* src_surface,
    581     const gfx::Size& dst_size,
    582     const gfx::Size& packed_y_size,
    583     const gfx::Size& packed_uv_size,
    584     IDirect3DSurface9* dst_y,
    585     IDirect3DSurface9* dst_u,
    586     IDirect3DSurface9* dst_v) {
    587   TRACE_EVENT0("gpu", "RGBToYV12_MRT");
    588 
    589   ScopedRenderTargetRestorer color0_restorer(device(), 0);
    590   ScopedRenderTargetRestorer color1_restorer(device(), 1);
    591 
    592   // Create an intermediate surface to hold the UUVV values. This is color
    593   // target 1 for the first pass, and texture 0 for the second pass. Its
    594   // values are not read afterwards.
    595 
    596   ScopedComPtr<IDirect3DSurface9> uv_as_surface;
    597   if (!d3d_utils::CreateOrReuseRenderTargetTexture(device(),
    598                                                    packed_y_size,
    599                                                    &uv_scratch_texture_,
    600                                                    uv_as_surface.Receive())) {
    601     return false;
    602   }
    603 
    604   // Clamping is required if (dst_size.width() % 8 != 0) or if
    605   // (dst_size.height != 0), so we set it always. Both passes rely on this.
    606   device()->SetSamplerState(0, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
    607   device()->SetSamplerState(0, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);
    608 
    609   /////////////////////////////////////////
    610   // Pass 1: RGB --(scaled)--> YYYY + UUVV
    611   SetShaderCombo(RGB_TO_YV12_FAST__PASS_1_OF_2);
    612 
    613   // Enable bilinear filtering if scaling is required. The filtering will take
    614   // place entirely in the first pass.
    615   if (d3d_utils::GetSize(src_surface) != dst_size) {
    616     device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR);
    617     device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);
    618   } else {
    619     device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
    620     device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT);
    621   }
    622 
    623   device()->SetTexture(0, src_surface);
    624   device()->SetRenderTarget(0, dst_y);
    625   device()->SetRenderTarget(1, uv_as_surface);
    626   DrawScreenAlignedQuad(dst_size);
    627 
    628   /////////////////////////////////////////
    629   // Pass 2: UUVV -> UUUU + VVVV
    630   SetShaderCombo(RGB_TO_YV12_FAST__PASS_2_OF_2);
    631 
    632   // The second pass uses bilinear minification to achieve vertical scaling,
    633   // so enable it always.
    634   device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
    635   device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);
    636 
    637   device()->SetTexture(0, uv_scratch_texture_);
    638   device()->SetRenderTarget(0, dst_u);
    639   device()->SetRenderTarget(1, dst_v);
    640   DrawScreenAlignedQuad(packed_y_size);
    641 
    642   // Clear surface references.
    643   device()->SetTexture(0, NULL);
    644   return true;
    645 }
    646 
    647 bool AcceleratedSurfaceTransformer::TransformRGBToYV12_WithoutMRT(
    648     IDirect3DTexture9* src_surface,
    649     const gfx::Size& dst_size,
    650     const gfx::Size& packed_y_size,
    651     const gfx::Size& packed_uv_size,
    652     IDirect3DSurface9* dst_y,
    653     IDirect3DSurface9* dst_u,
    654     IDirect3DSurface9* dst_v) {
    655   TRACE_EVENT0("gpu", "RGBToYV12_WithoutMRT");
    656 
    657   ScopedRenderTargetRestorer color0_restorer(device(), 0);
    658 
    659   ScopedComPtr<IDirect3DTexture9> scaled_src_surface;
    660 
    661   // If scaling is requested, do it to a temporary texture. The MRT path
    662   // gets a scale for free, so we need to support it here too (even though
    663   // it's an extra operation).
    664   if (d3d_utils::GetSize(src_surface) == dst_size) {
    665     scaled_src_surface = src_surface;
    666   } else {
    667     ScopedComPtr<IDirect3DSurface9> dst_level0;
    668     if (!d3d_utils::CreateOrReuseRenderTargetTexture(
    669             device(), dst_size, &uv_scratch_texture_, dst_level0.Receive())) {
    670       return false;
    671     }
    672     if (!Copy(src_surface, dst_level0, dst_size)) {
    673       return false;
    674     }
    675     scaled_src_surface = uv_scratch_texture_;
    676   }
    677 
    678   // Input texture is the same for all three passes.
    679   device()->SetTexture(0, scaled_src_surface);
    680 
    681   // Clamping is required if (dst_size.width() % 8 != 0) or if
    682   // (dst_size.height != 0), so we set it always. All passes rely on this.
    683   device()->SetSamplerState(0, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
    684   device()->SetSamplerState(0, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);
    685 
    686   /////////////////////
    687   // Pass 1: RGB -> Y.
    688   SetShaderCombo(RGB_TO_YV12_SLOW__PASS_1_OF_3);
    689 
    690   // Pass 1 just needs point sampling.
    691   device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
    692   device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT);
    693 
    694   device()->SetRenderTarget(0, dst_y);
    695   DrawScreenAlignedQuad(dst_size);
    696 
    697   // Passes 2 and 3 rely on bilinear minification to downsample U and V.
    698   device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
    699   device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);
    700 
    701   /////////////////////
    702   // Pass 2: RGB -> U.
    703   SetShaderCombo(RGB_TO_YV12_SLOW__PASS_2_OF_3);
    704   device()->SetRenderTarget(0, dst_u);
    705   DrawScreenAlignedQuad(dst_size);
    706 
    707   /////////////////////
    708   // Pass 3: RGB -> V.
    709   SetShaderCombo(RGB_TO_YV12_SLOW__PASS_3_OF_3);
    710   device()->SetRenderTarget(0, dst_v);
    711   DrawScreenAlignedQuad(dst_size);
    712 
    713   // Clear surface references.
    714   device()->SetTexture(0, NULL);
    715   return true;
    716 }
    717 
    718 IDirect3DDevice9* AcceleratedSurfaceTransformer::device() {
    719   return device_;
    720 }
    721 
    722 bool AcceleratedSurfaceTransformer::SetShaderCombo(ShaderCombo combo) {
    723   // Compile shaders on first use, if needed. Normally the compilation should
    724   // already have happened at Init() time, but test code might force
    725   // us down an unusual path.
    726   if (!CompileShaderCombo(combo))
    727     return false;
    728 
    729   HRESULT hr = device()->SetVertexShader(vertex_shaders_[combo]);
    730   if (!SUCCEEDED(hr))
    731     return false;
    732   hr = device()->SetPixelShader(pixel_shaders_[combo]);
    733   if (!SUCCEEDED(hr))
    734     return false;
    735   return true;
    736 }
    737