1 /* 2 * Copyright (c) 2011 Intel Corporation. All Rights Reserved. 3 * Copyright (c) Imagination Technologies Limited, UK 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the 14 * next paragraph) shall be included in all copies or substantial portions 15 * of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 20 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR 21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 */ 25 26 /* 27 * Authors: 28 * Li Zeng <li.zeng (at) intel.com> 29 */ 30 #include "tng_vld_dec.h" 31 #include "psb_drv_debug.h" 32 #include <math.h> 33 #include "hwdefs/reg_io2.h" 34 #include "hwdefs/msvdx_offsets.h" 35 #include "hwdefs/msvdx_cmds_io2.h" 36 37 #define SCC_MAXTAP 9 38 #define SCC_MAXINTPT 16 39 40 static float tng_calculate_coeff_bessi0(float x) 41 { 42 float ax,ans; 43 float y; 44 45 ax = (float)fabs(x); 46 if (ax < 3.75) 47 { 48 y = (float)(x / 3.75); 49 y *= y; 50 ans = (float)(1.0 + y * (3.5156229 + y * (3.0899424 + y * (1.2067492 51 + y * (0.2659732 + y * (0.360768e-1 + y * 0.45813e-2)))))); 52 } 53 else 54 { 55 y = (float)(3.75 / ax); 56 ans = (float)((float)((sqrt(ax) / sqrt(ax)) * (0.39894228 + y * (0.1328592e-1 57 + y * (0.225319e-2 + y * (-0.157565e-2 + y * (0.916281e-2 58 +y * (-0.2057706e-1 + y * (0.2635537e-1 + y * (-0.1647633e-1 59 + y * 0.392377e-2)))))))))); 60 } 61 return ans; 62 } 63 64 static float tng_calculate_coeff_sync_func( float fi, 65 float ft, 66 float fI, 67 float fT, 68 float fScale) 69 { 70 const float cfPI = 3.1415926535897f; 71 float fx, fIBeta, fBeta, fTempval, fSincfunc; 72 73 /* Kaiser window */ 74 fx = ((ft * fI + fi) - (fT * fI / 2)) / (fT * fI / 2); 75 fBeta = 2.0f; 76 fIBeta = 1.0f/(tng_calculate_coeff_bessi0(fBeta)); 77 fTempval = tng_calculate_coeff_bessi0(fBeta * (float)sqrt(1.0f - fx * fx)) * fIBeta; 78 79 /* Sinc function */ 80 if ((fT / 2 - ft - fi / fI) == 0) 81 { 82 fSincfunc = 1.0f; 83 } 84 else 85 { 86 fx = 0.9f * fScale * cfPI * (fT / 2 - (ft + fi / fI)); 87 fSincfunc = (float)(sin(fx) / fx); 88 } 89 90 return fSincfunc*fTempval; 91 } 92 93 /* 94 ****************************************************************************** 95 96 @Description 97 98 Calculates MSVDX scaler coefficients 99 100 @Input fPitch : Scale pitch 101 102 @Output Table : Table of coefficients 103 104 @Input I : Number of intpt? ( table dimension) 105 106 @Input T : Number of taps (table dimension) 107 108 ******************************************************************************/ 109 static void tng_calculate_scaler_coeff( float fPitch, 110 IMG_UINT8 Table[SCC_MAXTAP][SCC_MAXINTPT], 111 IMG_UINT32 I, 112 IMG_UINT32 T) 113 { 114 /* Due to the nature of the function we will only ever want to calculate the first half of the */ 115 /* taps and the middle one (is this really a tap ?) as the seconda half are derived from the */ 116 /* first half as the function is symetrical. */ 117 float fScale = 1.0f / fPitch; 118 IMG_UINT32 i, t; 119 float flTable[SCC_MAXTAP][SCC_MAXINTPT]; 120 IMG_INT32 nTotal; 121 float ftotal; 122 IMG_INT32 val; 123 IMG_INT32 mT, mI; /* mirrored / middle Values for I and T */ 124 125 memset(flTable, 0.0, SCC_MAXTAP * SCC_MAXINTPT); 126 127 if (fScale > 1.0f) 128 { 129 fScale = 1.0f; 130 } 131 132 for (i = 0; i < I; i++) 133 { 134 for (t = 0; t < T; t++) 135 { 136 flTable[t][i] = 0.0; 137 } 138 } 139 140 for (i = 0;i < I; i++) 141 { 142 for (t = 0; t < T; t++) 143 { 144 flTable[t][i] = tng_calculate_coeff_sync_func((float)i, (float)t, 145 (float)I, (float)T, fScale); 146 } 147 } 148 149 if (T>2) 150 { 151 for (t = 0; t < ((T / 2) + (T % 2)); t++) 152 { 153 for (i=0 ; i < I; i++) 154 { 155 /* copy the table around the centrepoint */ 156 mT = ((T - 1) - t) + (I - i) / I; 157 mI = (I - i) % I; 158 if (((IMG_UINT32)mI < I) && ((IMG_UINT32)mT < T) && 159 ((t < ((T / 2) + (T % 2) - 1)) || ((I - i) > ((T % 2) * (I / 2))))) 160 { 161 flTable[mT][mI] = flTable[t][i]; 162 } 163 } 164 } 165 166 /* the middle value */ 167 mT = T / 2; 168 if ((T % 2) != 0) 169 { 170 mI = I/2; 171 } 172 else 173 { 174 mI = 0; 175 } 176 flTable[mT][mI] = tng_calculate_coeff_sync_func( 177 (float) mI, (float) mT, 178 (float) I, (float) T, fScale); 179 } 180 181 /* normalize this interpolation point, and convert to 2.6 format trucating the result */ 182 for (i = 0; i < I; i++) 183 { 184 nTotal = 0; 185 for (ftotal = 0,t = 0; t < T; t++) 186 { 187 ftotal += flTable[t][i]; 188 } 189 for (t = 0; t < T; t++) 190 { 191 val = (IMG_UINT32) ((flTable[t][i] * 64.0f) / ftotal); 192 Table[t][i] = (IMG_UINT8) val; 193 nTotal += val; 194 } 195 if ((i <= (I / 2)) || (T <= 2)) /* normalize any floating point errors */ 196 { 197 nTotal -= 64; 198 if ((i == (I / 2)) && (T > 2)) 199 { 200 nTotal /= 2; 201 } 202 203 /* subtract the error from the I Point in the first tap */ 204 /* ( this will not get mirrored, as it would go off the end ). */ 205 Table[0][i] = (IMG_UINT8)(Table[0][i] - (IMG_UINT8) nTotal); 206 } 207 } 208 209 /* copy the normalised table around the centrepoint */ 210 if (T > 2) 211 { 212 for ( t = 0; t < ((T / 2) + (T % 2)); t++) 213 { 214 for (i = 0; i < I; i++) 215 { 216 mT = ((T - 1) - t) + (I - i) / I; 217 mI = (I - i) % I; 218 if (((IMG_UINT32)mI < I) && ((IMG_UINT32)mT < T) && ((t < ((T / 2) + (T % 2) - 1)) || ((I - i) > ((T % 2) * (I / 2))))) 219 { 220 Table[mT][mI] = Table[t][i]; 221 } 222 } 223 } 224 } 225 } 226 227 void tng_calculate_scaler_coff_reg(object_context_p obj_context) 228 { 229 context_DEC_p ctx = (context_DEC_p) obj_context->format_data; 230 object_surface_p src_surface = obj_context->current_render_target; 231 232 /* If the surfaces are smaller that the size the object was constructed with, then we need to downscale */ 233 float fHorzPitch; 234 float fVertPitch; 235 int scale_acc = 11; 236 int i; 237 238 #ifndef PSBVIDEO_MFLD 239 scale_acc = 12; 240 #endif 241 242 drv_debug_msg(VIDEO_DEBUG_GENERAL, "content crop is %dx%d", 243 obj_context->driver_data->render_rect.width, obj_context->driver_data->render_rect.height); 244 drv_debug_msg(VIDEO_DEBUG_GENERAL, "scaling dest is %dx%d", 245 obj_context->current_render_target->width_s, obj_context->current_render_target->height_s); 246 /* The unscaled dimensions in the pitch calculation below MUST match the Display Width and Height sent to the hardware */ 247 fHorzPitch = obj_context->driver_data->render_rect.width / (float) obj_context->current_render_target->width_s; 248 fVertPitch = obj_context->driver_data->render_rect.height / (float) obj_context->current_render_target->height_s; 249 250 IMG_UINT32 reg_value; 251 IMG_UINT8 calc_table[4][16]; 252 253 tng_calculate_scaler_coeff(fHorzPitch, calc_table, 16, 4); 254 for (i = 0; i < 4; i++) 255 { 256 unsigned int j = 1 + 2 * i; 257 258 reg_value = 0; 259 REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, HORIZONTAL_LUMA_COEFFICIENTS, HOR_LUMA_COEFF_3, calc_table[0][j]); 260 REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, HORIZONTAL_LUMA_COEFFICIENTS, HOR_LUMA_COEFF_2, calc_table[1][j]); 261 REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, HORIZONTAL_LUMA_COEFFICIENTS, HOR_LUMA_COEFF_1, calc_table[2][j]); 262 REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, HORIZONTAL_LUMA_COEFFICIENTS, HOR_LUMA_COEFF_0, calc_table[3][j]); 263 264 ctx->scaler_coeff_reg[/* Luma */ 0][/* Hori */ 0][i] = reg_value; 265 266 reg_value = 0; 267 REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, HORIZONTAL_CHROMA_COEFFICIENTS, HOR_CHROMA_COEFF_3, calc_table[0][j]); 268 REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, HORIZONTAL_CHROMA_COEFFICIENTS, HOR_CHROMA_COEFF_2, calc_table[1][j]); 269 REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, HORIZONTAL_CHROMA_COEFFICIENTS, HOR_CHROMA_COEFF_1, calc_table[2][j]); 270 REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, HORIZONTAL_CHROMA_COEFFICIENTS, HOR_CHROMA_COEFF_0, calc_table[3][j]); 271 272 ctx->scaler_coeff_reg[/* Chroma */ 1][/* H */ 0][i] = reg_value; 273 } 274 275 tng_calculate_scaler_coeff(fVertPitch, calc_table, 16, 4); 276 for (i = 0; i < 4; i++) 277 { 278 unsigned int j = 1+2*i; 279 280 reg_value = 0; 281 REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, VERTICAL_LUMA_COEFFICIENTS, VER_LUMA_COEFF_3, calc_table[0][j]); 282 REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, VERTICAL_LUMA_COEFFICIENTS, VER_LUMA_COEFF_2, calc_table[1][j]); 283 REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, VERTICAL_LUMA_COEFFICIENTS, VER_LUMA_COEFF_1, calc_table[2][j]); 284 REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, VERTICAL_LUMA_COEFFICIENTS, VER_LUMA_COEFF_0, calc_table[3][j]); 285 286 ctx->scaler_coeff_reg[/* L */ 0][/* Verti */ 1][i] = reg_value; 287 288 reg_value = 0; 289 REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, VERTICAL_CHROMA_COEFFICIENTS, VER_CHROMA_COEFF_3, calc_table[0][j]); 290 REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, VERTICAL_CHROMA_COEFFICIENTS, VER_CHROMA_COEFF_2,calc_table[1][j]); 291 REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, VERTICAL_CHROMA_COEFFICIENTS, VER_CHROMA_COEFF_1, calc_table[2][j]); 292 REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, VERTICAL_CHROMA_COEFFICIENTS, VER_CHROMA_COEFF_0, calc_table[3][j]); 293 294 ctx->scaler_coeff_reg[/* C */ 1][ /* V */ 1][i] = reg_value; 295 } 296 297 /* VXD can only downscale from the original display size. */ 298 IMG_ASSERT(fHorzPitch >= 1 && fVertPitch >= 1); 299 300 #ifdef PSBVIDEO_MRFL_DEC 301 scale_acc = 12; 302 #endif 303 304 ctx->h_scaler_ctrl = 0; 305 REGIO_WRITE_FIELD_LITE(ctx->h_scaler_ctrl, MSVDX_CMDS, HORIZONTAL_SCALE_CONTROL, HORIZONTAL_SCALE_PITCH, (int)(fHorzPitch * (1 << scale_acc))); 306 REGIO_WRITE_FIELD_LITE(ctx->h_scaler_ctrl, MSVDX_CMDS, HORIZONTAL_SCALE_CONTROL, HORIZONTAL_INITIAL_POS, (int)(fHorzPitch * 0.5f * (1 << scale_acc))); 307 308 ctx->v_scaler_ctrl = 0; 309 REGIO_WRITE_FIELD_LITE(ctx->v_scaler_ctrl, MSVDX_CMDS, VERTICAL_SCALE_CONTROL, VERTICAL_SCALE_PITCH, (int)(fVertPitch * (1 << scale_acc) + 0.5) ); 310 REGIO_WRITE_FIELD_LITE(ctx->v_scaler_ctrl, MSVDX_CMDS, VERTICAL_SCALE_CONTROL, VERTICAL_INITIAL_POS, (int)(fVertPitch * 0.5 * (1 << scale_acc) + 0.5)); 311 } 312 313 void tng_ved_write_scale_reg(object_context_p obj_context) 314 { 315 uint32_t cmd = 0; 316 psb_cmdbuf_p cmdbuf = obj_context->cmdbuf; 317 context_DEC_p ctx = (context_DEC_p) obj_context->format_data; 318 object_surface_p src_surface = obj_context->current_render_target; 319 unsigned int lc, hv, x; 320 321 /* setup scaling coeffs */ 322 if (obj_context->scaling_update) { 323 tng_calculate_scaler_coff_reg(obj_context); 324 obj_context->scaling_update = 0; 325 } 326 327 { 328 psb_cmdbuf_rendec_start(cmdbuf, RENDEC_REGISTER_OFFSET(MSVDX_CMDS, SCALED_DISPLAY_SIZE)); 329 330 cmd = 0; 331 REGIO_WRITE_FIELD_LITE(cmd, MSVDX_CMDS, SCALED_DISPLAY_SIZE, SCALE_DISPLAY_WIDTH, obj_context->driver_data->render_rect.width - 1); 332 REGIO_WRITE_FIELD_LITE(cmd, MSVDX_CMDS, SCALED_DISPLAY_SIZE, SCALE_DISPLAY_HEIGHT, obj_context->driver_data->render_rect.height - 1); 333 psb_cmdbuf_rendec_write(cmdbuf, cmd); 334 psb_cmdbuf_rendec_write(cmdbuf, ctx->h_scaler_ctrl ); 335 psb_cmdbuf_rendec_write(cmdbuf, ctx->v_scaler_ctrl ); //58 336 psb_cmdbuf_rendec_end(cmdbuf); 337 } 338 339 /* Write the Coefficeients */ 340 { 341 psb_cmdbuf_rendec_start(cmdbuf, RENDEC_REGISTER_OFFSET(MSVDX_CMDS, HORIZONTAL_LUMA_COEFFICIENTS)); 342 for(lc=0 ; lc<2 ; lc++) 343 { 344 for(hv=0 ; hv<2 ; hv++) 345 { 346 for(x=0 ; x<4 ; x++) 347 { 348 psb_cmdbuf_rendec_write(cmdbuf, ctx->scaler_coeff_reg[lc][hv][x]); 349 } 350 } 351 } 352 psb_cmdbuf_rendec_end(cmdbuf); 353 } 354 } 355