1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // @gyp_namespace(ui_surface) 6 // Compiles into C++ as 'accelerated_surface_transformer_win_hlsl_compiled.h' 7 8 struct Vertex { 9 float4 position : POSITION; 10 float2 texCoord : TEXCOORD0; 11 }; 12 13 texture t; 14 sampler s; 15 16 extern uniform float2 kRenderTargetSize : c0; 17 extern uniform float2 kTextureScale : c1; 18 19 // @gyp_compile(vs_2_0, vsOneTexture) 20 // 21 // Passes a position and texture coordinate to the pixel shader. 22 Vertex vsOneTexture(Vertex input) { 23 // Texture scale is typically just 1 (to do nothing) or -1 (to flip). 24 input.texCoord = ((2 * (input.texCoord - 0.5) * kTextureScale) + 1) / 2; 25 input.position.x += -1 / kRenderTargetSize.x; 26 input.position.y += 1 / kRenderTargetSize.y; 27 return input; 28 }; 29 30 // @gyp_compile(ps_2_0, psOneTexture) 31 // 32 // Samples a texture at the given texture coordinate and returns the result. 33 float4 psOneTexture(float2 texCoord : TEXCOORD0) : COLOR0 { 34 return tex2D(s, texCoord); 35 }; 36 37 // Return |value| rounded up to the nearest multiple of |multiple|. 38 float alignTo(float value, float multiple) { 39 // |multiple| is usually a compile-time constant; this check allows 40 // the compiler to avoid the fmod when possible. 41 if (multiple == 1) 42 return value; 43 44 // Biasing the value provides numeric stability. We expect |value| to 45 // be an integer; this prevents 4.001 from being rounded up to 8. 46 float biased_value = value - 0.5; 47 return biased_value + multiple - fmod(biased_value, multiple); 48 } 49 50 float4 packForByteOrder(float4 value) { 51 return value.bgra; 52 } 53 54 // Adjust the input vertex to address the correct range of texels. This depends 55 // on the value of the shader constant |kRenderTargetSize|, as well as an 56 // alignment factor |align| that effectively specifies the footprint of the 57 // texel samples done by this shader pass, and is used to correct when that 58 // footprint size doesn't align perfectly with the actual input size. 59 Vertex adjustForAlignmentAndPacking(Vertex vtx, float2 align) { 60 float src_width = kRenderTargetSize.x; 61 float src_height = kRenderTargetSize.y; 62 63 // Because our caller expects to be sampling |align.x| many pixels from src at 64 // a time, if src's width isn't evenly divisible by |align.x|, it is necessary 65 // to pretend that the source is slightly bigger than it is. 66 float bloated_src_width = alignTo(src_width, align.x); 67 float bloated_src_height = alignTo(src_height, align.y); 68 69 // When bloated_src_width != src_width, we'll adjust the texture coordinates 70 // to sample past the edge of the vtx; clamping will produce extra copies of 71 // the last row. 72 float texture_x_scale = bloated_src_width / src_width; 73 float texture_y_scale = bloated_src_height / src_height; 74 75 // Adjust positions so that we're addressing full fragments in the output, per 76 // the top-left filling convention. The shifts would be equivalent to 77 // 1/dst_width and 1/dst_height, if we were to calculate those explicitly. 78 vtx.position.x -= align.x / bloated_src_width; 79 vtx.position.y += align.y / bloated_src_height; 80 81 // Apply the texture scale 82 vtx.texCoord.x *= texture_x_scale; 83 vtx.texCoord.y *= texture_y_scale; 84 85 return vtx; 86 } 87 88 /////////////////////////////////////////////////////////////////////// 89 // RGB24 to YV12 in two passes; writing two 8888 targets each pass. 90 // 91 // YV12 is full-resolution luma and half-resolution blue/red chroma. 92 // 93 // (original) 94 // XRGB XRGB XRGB XRGB XRGB XRGB XRGB XRGB 95 // XRGB XRGB XRGB XRGB XRGB XRGB XRGB XRGB 96 // XRGB XRGB XRGB XRGB XRGB XRGB XRGB XRGB 97 // XRGB XRGB XRGB XRGB XRGB XRGB XRGB XRGB 98 // XRGB XRGB XRGB XRGB XRGB XRGB XRGB XRGB 99 // XRGB XRGB XRGB XRGB XRGB XRGB XRGB XRGB 100 // | 101 // | (y plane) (temporary) 102 // | YYYY YYYY UVUV UVUV 103 // +--> { YYYY YYYY + UVUV UVUV } 104 // YYYY YYYY UVUV UVUV 105 // First YYYY YYYY UVUV UVUV 106 // pass YYYY YYYY UVUV UVUV 107 // YYYY YYYY UVUV UVUV 108 // | 109 // | (u plane) (v plane) 110 // Second | UUUU VVVV 111 // pass +--> { UUUU + VVVV } 112 // UUUU VVVV 113 // 114 /////////////////////////////////////////////////////////////////////// 115 116 // Phase one of RGB24->YV12 conversion: vsFetch4Pixels/psConvertRGBtoY8UV44 117 // 118 // @gyp_compile(vs_2_0, vsFetch4Pixels) 119 // @gyp_compile(ps_2_0, psConvertRGBtoY8UV44) 120 // 121 // Writes four source pixels at a time to a full-size Y plane and a half-width 122 // interleaved UV plane. After execution, the Y plane is complete but the UV 123 // planes still need to be de-interleaved and vertically scaled. 124 // 125 void vsFetch4Pixels(in Vertex vertex, 126 out float4 position : POSITION, 127 out float2 texCoord0 : TEXCOORD0, 128 out float2 texCoord1 : TEXCOORD1, 129 out float2 texCoord2 : TEXCOORD2, 130 out float2 texCoord3 : TEXCOORD3) { 131 Vertex adjusted = adjustForAlignmentAndPacking(vertex, float2(4, 1)); 132 133 // Set up four taps, aligned to texel centers if the src's true size is 134 // |kRenderTargetSize|, and doing bilinear interpolation otherwise. 135 float2 one_texel_x = float2(1 / kRenderTargetSize.x, 0); 136 position = adjusted.position; 137 texCoord0 = adjusted.texCoord - 1.5f * one_texel_x; 138 texCoord1 = adjusted.texCoord - 0.5f * one_texel_x; 139 texCoord2 = adjusted.texCoord + 0.5f * one_texel_x; 140 texCoord3 = adjusted.texCoord + 1.5f * one_texel_x; 141 }; 142 143 struct YV16QuadPixel 144 { 145 float4 YYYY : COLOR0; 146 float4 UUVV : COLOR1; 147 }; 148 149 // Color conversion constants. 150 static const float3x1 rgb_to_y = float3x1( +0.257f, +0.504f, +0.098f ); 151 static const float3x1 rgb_to_u = float3x1( -0.148f, -0.291f, +0.439f ); 152 static const float3x1 rgb_to_v = float3x1( +0.439f, -0.368f, -0.071f ); 153 static const float y_bias = 0.0625f; 154 static const float uv_bias = 0.5f; 155 156 YV16QuadPixel psConvertRGBtoY8UV44(float2 texCoord0 : TEXCOORD0, 157 float2 texCoord1 : TEXCOORD1, 158 float2 texCoord2 : TEXCOORD2, 159 float2 texCoord3 : TEXCOORD3) { 160 // Load the four texture samples into a matrix. 161 float4x3 rgb_quad_pixel = float4x3(tex2D(s, texCoord0).rgb, 162 tex2D(s, texCoord1).rgb, 163 tex2D(s, texCoord2).rgb, 164 tex2D(s, texCoord3).rgb); 165 166 // RGB -> Y conversion (x4). 167 float4 yyyy = mul(rgb_quad_pixel, rgb_to_y) + y_bias; 168 169 // Average adjacent texture samples while converting RGB->UV. This is the same 170 // as color converting then averaging, but slightly less math. These values 171 // will be in the range [-0.439f, +0.439f] and still need to have the bias 172 // term applied. 173 float2x3 rgb_double_pixel = float2x3(rgb_quad_pixel[0] + rgb_quad_pixel[1], 174 rgb_quad_pixel[2] + rgb_quad_pixel[3]); 175 float2 uu = mul(rgb_double_pixel, rgb_to_u / 2); 176 float2 vv = mul(rgb_double_pixel, rgb_to_v / 2); 177 178 // Package the result to account for BGRA byte ordering. 179 YV16QuadPixel result; 180 result.YYYY = packForByteOrder(yyyy); 181 result.UUVV.xyzw = float4(uu, vv) + uv_bias; // Apply uv bias. 182 return result; 183 }; 184 185 // Phase two of RGB24->YV12 conversion: vsFetch2Pixels/psConvertUV44toU2V2 186 // 187 // @gyp_compile(vs_2_0, vsFetch2Pixels) 188 // @gyp_compile(ps_2_0, psConvertUV44toU2V2) 189 // 190 // Deals with UV only. Input is interleaved UV pixels, already scaled 191 // horizontally, packed two per RGBA texel. Output is two color planes U and V, 192 // packed four to a RGBA pixel. 193 // 194 // Vertical scaling happens via a half-texel offset and bilinear interpolation 195 // during texture sampling. 196 void vsFetch2Pixels(in Vertex vertex, 197 out float4 position : POSITION, 198 out float2 texCoord0 : TEXCOORD0, 199 out float2 texCoord1 : TEXCOORD1) { 200 // We fetch two texels in the horizontal direction, and scale by 2 in the 201 // vertical direction. 202 Vertex adjusted = adjustForAlignmentAndPacking(vertex, float2(2, 2)); 203 204 // Setup the two texture coordinates. No need to adjust texCoord.y; it's 205 // already at the mid-way point between the two rows. Horizontally, we'll 206 // fetch two texels so that we have enough data to fill our output. 207 float2 one_texel_x = float2(1 / kRenderTargetSize.x, 0); 208 position = adjusted.position; 209 texCoord0 = adjusted.texCoord - 0.5f * one_texel_x; 210 texCoord1 = adjusted.texCoord + 0.5f * one_texel_x; 211 }; 212 213 struct UV8QuadPixel { 214 float4 UUUU : COLOR0; 215 float4 VVVV : COLOR1; 216 }; 217 218 UV8QuadPixel psConvertUV44toU2V2(float2 texCoord0 : TEXCOORD0, 219 float2 texCoord1 : TEXCOORD1) { 220 // We're just sampling two pixels and unswizzling them. There's no need to do 221 // vertical scaling with math, since bilinear interpolation in the sampler 222 // takes care of that. 223 float4 lo_uuvv = tex2D(s, texCoord0); 224 float4 hi_uuvv = tex2D(s, texCoord1); 225 UV8QuadPixel result; 226 result.UUUU = packForByteOrder(float4(lo_uuvv.xy, hi_uuvv.xy)); 227 result.VVVV = packForByteOrder(float4(lo_uuvv.zw, hi_uuvv.zw)); 228 return result; 229 }; 230 231 232 /////////////////////////////////////////////////////////////////////// 233 // RGB24 to YV12 in three passes, without MRT: one pass per output color plane. 234 // vsFetch4Pixels is the common vertex shader for all three passes. 235 // 236 // Note that this technique will not do full bilinear filtering on its RGB 237 // input (you'd get correctly filtered Y, but aliasing in U and V). 238 // 239 // Pass 1: vsFetch4Pixels + psConvertRGBToY 240 // Pass 2: vsFetch4Pixels_Scale2 + psConvertRGBToU 241 // Pass 3: vsFetch4Pixels_Scale2 + psConvertRGBToV 242 // 243 // @gyp_compile(vs_2_0, vsFetch4Pixels_Scale2) 244 // @gyp_compile(ps_2_0, psConvertRGBtoY) 245 // @gyp_compile(ps_2_0, psConvertRGBtoU) 246 // @gyp_compile(ps_2_0, psConvertRGBtoV) 247 // 248 /////////////////////////////////////////////////////////////////////// 249 void vsFetch4Pixels_Scale2(in Vertex vertex, 250 out float4 position : POSITION, 251 out float2 texCoord0 : TEXCOORD0, 252 out float2 texCoord1 : TEXCOORD1, 253 out float2 texCoord2 : TEXCOORD2, 254 out float2 texCoord3 : TEXCOORD3) { 255 Vertex adjusted = adjustForAlignmentAndPacking(vertex, float2(8, 2)); 256 257 // Set up four taps, each of which samples a 2x2 texel quad at the midpoint. 258 float2 one_texel_x = float2(1 / kRenderTargetSize.x, 0); 259 position = adjusted.position; 260 texCoord0 = adjusted.texCoord - 3 * one_texel_x; 261 texCoord1 = adjusted.texCoord - 1 * one_texel_x; 262 texCoord2 = adjusted.texCoord + 1 * one_texel_x; 263 texCoord3 = adjusted.texCoord + 3 * one_texel_x; 264 }; 265 266 // RGB -> Y, four samples at a time. 267 float4 psConvertRGBtoY(float2 texCoord0 : TEXCOORD0, 268 float2 texCoord1 : TEXCOORD1, 269 float2 texCoord2 : TEXCOORD2, 270 float2 texCoord3 : TEXCOORD3) : COLOR0 { 271 float4x3 rgb_quad_pixel = float4x3(tex2D(s, texCoord0).rgb, 272 tex2D(s, texCoord1).rgb, 273 tex2D(s, texCoord2).rgb, 274 tex2D(s, texCoord3).rgb); 275 return packForByteOrder(mul(rgb_quad_pixel, rgb_to_y) + y_bias); 276 } 277 278 // RGB -> U, four samples at a time. 279 float4 psConvertRGBtoU(float2 texCoord0 : TEXCOORD0, 280 float2 texCoord1 : TEXCOORD1, 281 float2 texCoord2 : TEXCOORD2, 282 float2 texCoord3 : TEXCOORD3) : COLOR0 { 283 float4x3 rgb_quad_pixel = float4x3(tex2D(s, texCoord0).rgb, 284 tex2D(s, texCoord1).rgb, 285 tex2D(s, texCoord2).rgb, 286 tex2D(s, texCoord3).rgb); 287 return packForByteOrder(mul(rgb_quad_pixel, rgb_to_u) + uv_bias); 288 } 289 290 // RGB -> V, four samples at a time. 291 float4 psConvertRGBtoV(float2 texCoord0 : TEXCOORD0, 292 float2 texCoord1 : TEXCOORD1, 293 float2 texCoord2 : TEXCOORD2, 294 float2 texCoord3 : TEXCOORD3) : COLOR0 { 295 float4x3 rgb_quad_pixel = float4x3(tex2D(s, texCoord0).rgb, 296 tex2D(s, texCoord1).rgb, 297 tex2D(s, texCoord2).rgb, 298 tex2D(s, texCoord3).rgb); 299 return packForByteOrder(mul(rgb_quad_pixel, rgb_to_v) + uv_bias); 300 } 301