1 /* 2 * SDL - Simple DirectMedia Layer 3 * CELL BE Support for PS3 Framebuffer 4 * Copyright (C) 2008, 2009 International Business Machines Corporation 5 * 6 * This library is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU Lesser General Public License as published 8 * by the Free Software Foundation; either version 2.1 of the License, or 9 * (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, but 12 * WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 19 * USA 20 * 21 * Martin Lowinski <lowinski [at] de [dot] ibm [ibm] com> 22 * Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com> 23 * SPE code based on research by: 24 * Rene Becker 25 * Thimo Emmerich 26 */ 27 28 #include "spu_common.h" 29 30 #include <spu_intrinsics.h> 31 #include <spu_mfcio.h> 32 33 // Debugging 34 //#define DEBUG 35 36 #ifdef DEBUG 37 #define deprintf(fmt, args... ) \ 38 fprintf( stdout, fmt, ##args ); \ 39 fflush( stdout ); 40 #else 41 #define deprintf( fmt, args... ) 42 #endif 43 44 struct scale_parms_t parms __attribute__((aligned(128))); 45 46 /* A maximum of 8 lines Y, therefore 4 lines V, 4 lines U are stored 47 * there might be the need to retrieve misaligned data, adjust 48 * incoming v and u plane to be able to handle this (add 128) 49 */ 50 unsigned char y_plane[2][(MAX_HDTV_WIDTH+128)*4] __attribute__((aligned(128))); 51 unsigned char v_plane[2][(MAX_HDTV_WIDTH+128)*2] __attribute__((aligned(128))); 52 unsigned char u_plane[2][(MAX_HDTV_WIDTH+128)*2] __attribute__((aligned(128))); 53 54 /* temp-buffer for scaling: 4 lines Y, therefore 2 lines V, 2 lines U */ 55 unsigned char scaled_y_plane[2][MAX_HDTV_WIDTH*2] __attribute__((aligned(128))); 56 unsigned char scaled_v_plane[2][MAX_HDTV_WIDTH/2] __attribute__((aligned(128))); 57 unsigned char scaled_u_plane[2][MAX_HDTV_WIDTH/2] __attribute__((aligned(128))); 58 59 /* some vectors needed by the float to int conversion */ 60 static const vector float vec_255 = { 255.0f, 255.0f, 255.0f, 255.0f }; 61 static const vector float vec_0_1 = { 0.1f, 0.1f, 0.1f, 0.1f }; 62 63 void bilinear_scale_line_w8(unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride); 64 void bilinear_scale_line_w16(unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride); 65 66 void scale_srcw16_dstw16(); 67 void scale_srcw16_dstw32(); 68 void scale_srcw32_dstw16(); 69 void scale_srcw32_dstw32(); 70 71 int main( unsigned long long spe_id __attribute__((unused)), unsigned long long argp ) 72 { 73 deprintf("[SPU] bilin_scaler_spu is up... (on SPE #%llu)\n", spe_id); 74 /* DMA transfer for the input parameters */ 75 spu_mfcdma32(&parms, (unsigned int)argp, sizeof(struct scale_parms_t), TAG_INIT, MFC_GET_CMD); 76 DMA_WAIT_TAG(TAG_INIT); 77 78 deprintf("[SPU] Scale %ux%u to %ux%u\n", parms.src_pixel_width, parms.src_pixel_height, 79 parms.dst_pixel_width, parms.dst_pixel_height); 80 81 if(parms.src_pixel_width & 0x1f) { 82 if(parms.dst_pixel_width & 0x1F) { 83 deprintf("[SPU] Using scale_srcw16_dstw16\n"); 84 scale_srcw16_dstw16(); 85 } else { 86 deprintf("[SPU] Using scale_srcw16_dstw32\n"); 87 scale_srcw16_dstw32(); 88 } 89 } else { 90 if(parms.dst_pixel_width & 0x1F) { 91 deprintf("[SPU] Using scale_srcw32_dstw16\n"); 92 scale_srcw32_dstw16(); 93 } else { 94 deprintf("[SPU] Using scale_srcw32_dstw32\n"); 95 scale_srcw32_dstw32(); 96 } 97 } 98 deprintf("[SPU] bilin_scaler_spu... done!\n"); 99 100 return 0; 101 } 102 103 104 /* 105 * vfloat_to_vuint() 106 * 107 * converts a float vector to an unsinged int vector using saturated 108 * arithmetic 109 * 110 * @param vec_s float vector for conversion 111 * @returns converted unsigned int vector 112 */ 113 inline static vector unsigned int vfloat_to_vuint(vector float vec_s) { 114 vector unsigned int select_1 = spu_cmpgt(vec_0_1, vec_s); 115 vec_s = spu_sel(vec_s, vec_0_1, select_1); 116 117 vector unsigned int select_2 = spu_cmpgt(vec_s, vec_255); 118 vec_s = spu_sel(vec_s, vec_255, select_2); 119 return spu_convtu(vec_s,0); 120 } 121 122 123 /* 124 * scale_srcw16_dstw16() 125 * 126 * processes an input image of width 16 127 * scaling is done to a width 16 128 * result stored in RAM 129 */ 130 void scale_srcw16_dstw16() { 131 // extract parameters 132 unsigned char* dst_addr = (unsigned char *)parms.dstBuffer; 133 134 unsigned int src_width = parms.src_pixel_width; 135 unsigned int src_height = parms.src_pixel_height; 136 unsigned int dst_width = parms.dst_pixel_width; 137 unsigned int dst_height = parms.dst_pixel_height; 138 139 // YVU 140 unsigned int src_linestride_y = src_width; 141 unsigned int src_dbl_linestride_y = src_width<<1; 142 unsigned int src_linestride_vu = src_width>>1; 143 unsigned int src_dbl_linestride_vu = src_width; 144 145 // scaled YVU 146 unsigned int scaled_src_linestride_y = dst_width; 147 148 // ram addresses 149 unsigned char* src_addr_y = parms.y_plane; 150 unsigned char* src_addr_v = parms.v_plane; 151 unsigned char* src_addr_u = parms.u_plane; 152 153 // for handling misalignment, addresses are precalculated 154 unsigned char* precalc_src_addr_v = src_addr_v; 155 unsigned char* precalc_src_addr_u = src_addr_u; 156 157 unsigned int dst_picture_size = dst_width*dst_height; 158 159 // Sizes for destination 160 unsigned int dst_dbl_linestride_y = dst_width<<1; 161 unsigned int dst_dbl_linestride_vu = dst_width>>1; 162 163 // Perform address calculation for Y, V and U in main memory with dst_addr as base 164 unsigned char* dst_addr_main_memory_y = dst_addr; 165 unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size; 166 unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2); 167 168 // calculate scale factors 169 vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width ); 170 float y_scale = (float)src_height/(float)dst_height; 171 172 // double buffered processing 173 // buffer switching 174 unsigned int curr_src_idx = 0; 175 unsigned int curr_dst_idx = 0; 176 unsigned int next_src_idx, next_dst_idx; 177 178 // 2 lines y as output, upper and lowerline 179 unsigned int curr_interpl_y_upper = 0; 180 unsigned int next_interpl_y_upper; 181 unsigned int curr_interpl_y_lower, next_interpl_y_lower; 182 // only 1 line v/u output, both planes have the same dimension 183 unsigned int curr_interpl_vu = 0; 184 unsigned int next_interpl_vu; 185 186 // weights, calculated in every loop iteration 187 vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f }; 188 vector float vf_next_NSweight_y_upper; 189 vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower; 190 vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f }; 191 vector float vf_next_NSweight_vu; 192 193 // line indices for the src picture 194 float curr_src_y_upper = 0.0f, next_src_y_upper; 195 float curr_src_y_lower, next_src_y_lower; 196 float curr_src_vu = 0.0f, next_src_vu; 197 198 // line indices for the dst picture 199 unsigned int dst_y=0, dst_vu=0; 200 201 // offset for the v and u plane to handle misalignement 202 unsigned int curr_lsoff_v = 0, next_lsoff_v; 203 unsigned int curr_lsoff_u = 0, next_lsoff_u; 204 205 // calculate lower line indices 206 curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale; 207 curr_interpl_y_lower = (unsigned int)curr_src_y_lower; 208 // lower line weight 209 vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower ); 210 211 212 // start partially double buffered processing 213 // get initial data, 2 sets of y, 1 set v, 1 set u 214 mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 ); 215 mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y, 216 (unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y), 217 src_dbl_linestride_y, 218 RETR_BUF, 219 0, 0 ); 220 mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); 221 mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); 222 223 /* iteration loop 224 * within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved 225 * the scaled output is 2 lines y, 1 line v, 1 line u 226 * the yuv2rgb-converted output is stored to RAM 227 */ 228 for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) { 229 dst_y = dst_vu<<1; 230 231 // calculate next indices 232 next_src_vu = ((float)dst_vu+1)*y_scale; 233 next_src_y_upper = ((float)dst_y+2)*y_scale; 234 next_src_y_lower = ((float)dst_y+3)*y_scale; 235 236 next_interpl_vu = (unsigned int) next_src_vu; 237 next_interpl_y_upper = (unsigned int) next_src_y_upper; 238 next_interpl_y_lower = (unsigned int) next_src_y_lower; 239 240 // calculate weight NORTH-SOUTH 241 vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu ); 242 vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper ); 243 vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower ); 244 245 // get next lines 246 next_src_idx = curr_src_idx^1; 247 next_dst_idx = curr_dst_idx^1; 248 249 // 4 lines y 250 mfc_get( y_plane[next_src_idx], 251 (unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y), 252 src_dbl_linestride_y, 253 RETR_BUF+next_src_idx, 254 0, 0 ); 255 mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y, 256 (unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y), 257 src_dbl_linestride_y, 258 RETR_BUF+next_src_idx, 259 0, 0 ); 260 261 // 2 lines v 262 precalc_src_addr_v = src_addr_v+(next_interpl_vu*src_linestride_vu); 263 next_lsoff_v = ((unsigned int)precalc_src_addr_v)&0x0F; 264 mfc_get( v_plane[next_src_idx], 265 ((unsigned int) precalc_src_addr_v)&0xFFFFFFF0, 266 src_dbl_linestride_vu+(next_lsoff_v<<1), 267 RETR_BUF+next_src_idx, 268 0, 0 ); 269 // 2 lines u 270 precalc_src_addr_u = src_addr_u+(next_interpl_vu*src_linestride_vu); 271 next_lsoff_u = ((unsigned int)precalc_src_addr_u)&0x0F; 272 mfc_get( u_plane[next_src_idx], 273 ((unsigned int) precalc_src_addr_u)&0xFFFFFFF0, 274 src_dbl_linestride_vu+(next_lsoff_v<<1), 275 RETR_BUF+next_src_idx, 276 0, 0 ); 277 278 DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); 279 280 // scaling 281 // work line y_upper 282 bilinear_scale_line_w16( y_plane[curr_src_idx], 283 scaled_y_plane[curr_src_idx], 284 dst_width, 285 vf_x_scale, 286 vf_curr_NSweight_y_upper, 287 src_linestride_y ); 288 // work line y_lower 289 bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, 290 scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, 291 dst_width, 292 vf_x_scale, 293 vf_curr_NSweight_y_lower, 294 src_linestride_y ); 295 // work line v 296 bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v, 297 scaled_v_plane[curr_src_idx], 298 dst_width>>1, 299 vf_x_scale, 300 vf_curr_NSweight_vu, 301 src_linestride_vu ); 302 // work line u 303 bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u, 304 scaled_u_plane[curr_src_idx], 305 dst_width>>1, 306 vf_x_scale, 307 vf_curr_NSweight_vu, 308 src_linestride_vu ); 309 310 311 // Store the result back to main memory into a destination buffer in YUV format 312 //--------------------------------------------------------------------------------------------- 313 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); 314 315 // Perform three DMA transfers to 3 different locations in the main memory! 316 // dst_width: Pixel width of destination image 317 // dst_addr: Destination address in main memory 318 // dst_vu: Counter which is incremented one by one 319 // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) 320 mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) 321 (unsigned int)dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) 322 dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) 323 STR_BUF+curr_dst_idx, // Tag 324 0, 0 ); 325 326 mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) 327 (unsigned int)dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) 328 dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) 329 STR_BUF+curr_dst_idx, // Tag 330 0, 0 ); 331 332 mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) 333 (unsigned int)dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) 334 dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) 335 STR_BUF+curr_dst_idx, // Tag 336 0, 0 ); 337 //--------------------------------------------------------------------------------------------- 338 339 340 // update for next cycle 341 curr_src_idx = next_src_idx; 342 curr_dst_idx = next_dst_idx; 343 344 curr_interpl_y_upper = next_interpl_y_upper; 345 curr_interpl_y_lower = next_interpl_y_lower; 346 curr_interpl_vu = next_interpl_vu; 347 348 vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper; 349 vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower; 350 vf_curr_NSweight_vu = vf_next_NSweight_vu; 351 352 curr_src_y_upper = next_src_y_upper; 353 curr_src_y_lower = next_src_y_lower; 354 curr_src_vu = next_src_vu; 355 356 curr_lsoff_v = next_lsoff_v; 357 curr_lsoff_u = next_lsoff_u; 358 } 359 360 361 362 DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); 363 364 // scaling 365 // work line y_upper 366 bilinear_scale_line_w16( y_plane[curr_src_idx], 367 scaled_y_plane[curr_src_idx], 368 dst_width, 369 vf_x_scale, 370 vf_curr_NSweight_y_upper, 371 src_linestride_y ); 372 // work line y_lower 373 bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, 374 scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, 375 dst_width, 376 vf_x_scale, 377 vf_curr_NSweight_y_lower, 378 src_linestride_y ); 379 // work line v 380 bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v, 381 scaled_v_plane[curr_src_idx], 382 dst_width>>1, 383 vf_x_scale, 384 vf_curr_NSweight_vu, 385 src_linestride_vu ); 386 // work line u 387 bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u, 388 scaled_u_plane[curr_src_idx], 389 dst_width>>1, 390 vf_x_scale, 391 vf_curr_NSweight_vu, 392 src_linestride_vu ); 393 394 395 // Store the result back to main memory into a destination buffer in YUV format 396 //--------------------------------------------------------------------------------------------- 397 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); 398 399 // Perform three DMA transfers to 3 different locations in the main memory! 400 // dst_width: Pixel width of destination image 401 // dst_addr: Destination address in main memory 402 // dst_vu: Counter which is incremented one by one 403 // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) 404 mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) 405 (unsigned int)dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) 406 dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) 407 STR_BUF+curr_dst_idx, // Tag 408 0, 0 ); 409 410 mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) 411 (unsigned int)dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) 412 dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) 413 STR_BUF+curr_dst_idx, // Tag 414 0, 0 ); 415 416 mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) 417 (unsigned int)dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) 418 dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) 419 STR_BUF+curr_dst_idx, // Tag 420 0, 0 ); 421 422 // wait for completion 423 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); 424 //--------------------------------------------------------------------------------------------- 425 } 426 427 428 /* 429 * scale_srcw16_dstw32() 430 * 431 * processes an input image of width 16 432 * scaling is done to a width 32 433 * yuv2rgb conversion on a width of 32 434 * result stored in RAM 435 */ 436 void scale_srcw16_dstw32() { 437 // extract parameters 438 unsigned char* dst_addr = (unsigned char *)parms.dstBuffer; 439 440 unsigned int src_width = parms.src_pixel_width; 441 unsigned int src_height = parms.src_pixel_height; 442 unsigned int dst_width = parms.dst_pixel_width; 443 unsigned int dst_height = parms.dst_pixel_height; 444 445 // YVU 446 unsigned int src_linestride_y = src_width; 447 unsigned int src_dbl_linestride_y = src_width<<1; 448 unsigned int src_linestride_vu = src_width>>1; 449 unsigned int src_dbl_linestride_vu = src_width; 450 // scaled YVU 451 unsigned int scaled_src_linestride_y = dst_width; 452 453 // ram addresses 454 unsigned char* src_addr_y = parms.y_plane; 455 unsigned char* src_addr_v = parms.v_plane; 456 unsigned char* src_addr_u = parms.u_plane; 457 458 unsigned int dst_picture_size = dst_width*dst_height; 459 460 // Sizes for destination 461 unsigned int dst_dbl_linestride_y = dst_width<<1; 462 unsigned int dst_dbl_linestride_vu = dst_width>>1; 463 464 // Perform address calculation for Y, V and U in main memory with dst_addr as base 465 unsigned char* dst_addr_main_memory_y = dst_addr; 466 unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size; 467 unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2); 468 469 470 // for handling misalignment, addresses are precalculated 471 unsigned char* precalc_src_addr_v = src_addr_v; 472 unsigned char* precalc_src_addr_u = src_addr_u; 473 474 // calculate scale factors 475 vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width ); 476 float y_scale = (float)src_height/(float)dst_height; 477 478 // double buffered processing 479 // buffer switching 480 unsigned int curr_src_idx = 0; 481 unsigned int curr_dst_idx = 0; 482 unsigned int next_src_idx, next_dst_idx; 483 484 // 2 lines y as output, upper and lowerline 485 unsigned int curr_interpl_y_upper = 0; 486 unsigned int next_interpl_y_upper; 487 unsigned int curr_interpl_y_lower, next_interpl_y_lower; 488 // only 1 line v/u output, both planes have the same dimension 489 unsigned int curr_interpl_vu = 0; 490 unsigned int next_interpl_vu; 491 492 // weights, calculated in every loop iteration 493 vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f }; 494 vector float vf_next_NSweight_y_upper; 495 vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower; 496 vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f }; 497 vector float vf_next_NSweight_vu; 498 499 // line indices for the src picture 500 float curr_src_y_upper = 0.0f, next_src_y_upper; 501 float curr_src_y_lower, next_src_y_lower; 502 float curr_src_vu = 0.0f, next_src_vu; 503 504 // line indices for the dst picture 505 unsigned int dst_y=0, dst_vu=0; 506 507 // offset for the v and u plane to handle misalignement 508 unsigned int curr_lsoff_v = 0, next_lsoff_v; 509 unsigned int curr_lsoff_u = 0, next_lsoff_u; 510 511 // calculate lower line idices 512 curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale; 513 curr_interpl_y_lower = (unsigned int)curr_src_y_lower; 514 // lower line weight 515 vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower ); 516 517 518 // start partially double buffered processing 519 // get initial data, 2 sets of y, 1 set v, 1 set u 520 mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 ); 521 mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y, 522 (unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y), 523 src_dbl_linestride_y, 524 RETR_BUF, 525 0, 0 ); 526 mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); 527 mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); 528 529 // iteration loop 530 // within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved 531 // the scaled output is 2 lines y, 1 line v, 1 line u 532 // the yuv2rgb-converted output is stored to RAM 533 for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) { 534 dst_y = dst_vu<<1; 535 536 // calculate next indices 537 next_src_vu = ((float)dst_vu+1)*y_scale; 538 next_src_y_upper = ((float)dst_y+2)*y_scale; 539 next_src_y_lower = ((float)dst_y+3)*y_scale; 540 541 next_interpl_vu = (unsigned int) next_src_vu; 542 next_interpl_y_upper = (unsigned int) next_src_y_upper; 543 next_interpl_y_lower = (unsigned int) next_src_y_lower; 544 545 // calculate weight NORTH-SOUTH 546 vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu ); 547 vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper ); 548 vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower ); 549 550 // get next lines 551 next_src_idx = curr_src_idx^1; 552 next_dst_idx = curr_dst_idx^1; 553 554 // 4 lines y 555 mfc_get( y_plane[next_src_idx], 556 (unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y), 557 src_dbl_linestride_y, 558 RETR_BUF+next_src_idx, 559 0, 0 ); 560 mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y, 561 (unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y), 562 src_dbl_linestride_y, 563 RETR_BUF+next_src_idx, 564 0, 0 ); 565 566 // 2 lines v 567 precalc_src_addr_v = src_addr_v+(next_interpl_vu*src_linestride_vu); 568 next_lsoff_v = ((unsigned int)precalc_src_addr_v)&0x0F; 569 mfc_get( v_plane[next_src_idx], 570 ((unsigned int) precalc_src_addr_v)&0xFFFFFFF0, 571 src_dbl_linestride_vu+(next_lsoff_v<<1), 572 RETR_BUF+next_src_idx, 573 0, 0 ); 574 // 2 lines u 575 precalc_src_addr_u = src_addr_u+(next_interpl_vu*src_linestride_vu); 576 next_lsoff_u = ((unsigned int)precalc_src_addr_u)&0x0F; 577 mfc_get( u_plane[next_src_idx], 578 ((unsigned int) precalc_src_addr_u)&0xFFFFFFF0, 579 src_dbl_linestride_vu+(next_lsoff_v<<1), 580 RETR_BUF+next_src_idx, 581 0, 0 ); 582 583 DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); 584 585 // scaling 586 // work line y_upper 587 bilinear_scale_line_w16( y_plane[curr_src_idx], 588 scaled_y_plane[curr_src_idx], 589 dst_width, 590 vf_x_scale, 591 vf_curr_NSweight_y_upper, 592 src_linestride_y ); 593 // work line y_lower 594 bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, 595 scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, 596 dst_width, 597 vf_x_scale, 598 vf_curr_NSweight_y_lower, 599 src_linestride_y ); 600 // work line v 601 bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v, 602 scaled_v_plane[curr_src_idx], 603 dst_width>>1, 604 vf_x_scale, 605 vf_curr_NSweight_vu, 606 src_linestride_vu ); 607 // work line u 608 bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u, 609 scaled_u_plane[curr_src_idx], 610 dst_width>>1, 611 vf_x_scale, 612 vf_curr_NSweight_vu, 613 src_linestride_vu ); 614 615 //--------------------------------------------------------------------------------------------- 616 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); 617 618 // Perform three DMA transfers to 3 different locations in the main memory! 619 // dst_width: Pixel width of destination image 620 // dst_addr: Destination address in main memory 621 // dst_vu: Counter which is incremented one by one 622 // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) 623 624 mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) 625 (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) 626 dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) 627 STR_BUF+curr_dst_idx, // Tag 628 0, 0 ); 629 630 mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) 631 (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) 632 dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) 633 STR_BUF+curr_dst_idx, // Tag 634 0, 0 ); 635 636 mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) 637 (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) 638 dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) 639 STR_BUF+curr_dst_idx, // Tag 640 0, 0 ); 641 //--------------------------------------------------------------------------------------------- 642 643 644 // update for next cycle 645 curr_src_idx = next_src_idx; 646 curr_dst_idx = next_dst_idx; 647 648 curr_interpl_y_upper = next_interpl_y_upper; 649 curr_interpl_y_lower = next_interpl_y_lower; 650 curr_interpl_vu = next_interpl_vu; 651 652 vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper; 653 vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower; 654 vf_curr_NSweight_vu = vf_next_NSweight_vu; 655 656 curr_src_y_upper = next_src_y_upper; 657 curr_src_y_lower = next_src_y_lower; 658 curr_src_vu = next_src_vu; 659 660 curr_lsoff_v = next_lsoff_v; 661 curr_lsoff_u = next_lsoff_u; 662 } 663 664 665 666 DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); 667 668 // scaling 669 // work line y_upper 670 bilinear_scale_line_w16( y_plane[curr_src_idx], 671 scaled_y_plane[curr_src_idx], 672 dst_width, 673 vf_x_scale, 674 vf_curr_NSweight_y_upper, 675 src_linestride_y ); 676 // work line y_lower 677 bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, 678 scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, 679 dst_width, 680 vf_x_scale, 681 vf_curr_NSweight_y_lower, 682 src_linestride_y ); 683 // work line v 684 bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v, 685 scaled_v_plane[curr_src_idx], 686 dst_width>>1, 687 vf_x_scale, 688 vf_curr_NSweight_vu, 689 src_linestride_vu ); 690 // work line u 691 bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u, 692 scaled_u_plane[curr_src_idx], 693 dst_width>>1, 694 vf_x_scale, 695 vf_curr_NSweight_vu, 696 src_linestride_vu ); 697 698 //--------------------------------------------------------------------------------------------- 699 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); 700 701 // Perform three DMA transfers to 3 different locations in the main memory! 702 // dst_width: Pixel width of destination image 703 // dst_addr: Destination address in main memory 704 // dst_vu: Counter which is incremented one by one 705 // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) 706 707 mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) 708 (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) 709 dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) 710 STR_BUF+curr_dst_idx, // Tag 711 0, 0 ); 712 713 mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) 714 (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) 715 dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) 716 STR_BUF+curr_dst_idx, // Tag 717 0, 0 ); 718 719 mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) 720 (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) 721 dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) 722 STR_BUF+curr_dst_idx, // Tag 723 0, 0 ); 724 725 // wait for completion 726 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); 727 //--------------------------------------------------------------------------------------------- 728 } 729 730 731 /* 732 * scale_srcw32_dstw16() 733 * 734 * processes an input image of width 32 735 * scaling is done to a width 16 736 * yuv2rgb conversion on a width of 16 737 * result stored in RAM 738 */ 739 void scale_srcw32_dstw16() { 740 // extract parameters 741 unsigned char* dst_addr = (unsigned char *)parms.dstBuffer; 742 743 unsigned int src_width = parms.src_pixel_width; 744 unsigned int src_height = parms.src_pixel_height; 745 unsigned int dst_width = parms.dst_pixel_width; 746 unsigned int dst_height = parms.dst_pixel_height; 747 748 // YVU 749 unsigned int src_linestride_y = src_width; 750 unsigned int src_dbl_linestride_y = src_width<<1; 751 unsigned int src_linestride_vu = src_width>>1; 752 unsigned int src_dbl_linestride_vu = src_width; 753 // scaled YVU 754 unsigned int scaled_src_linestride_y = dst_width; 755 756 // ram addresses 757 unsigned char* src_addr_y = parms.y_plane; 758 unsigned char* src_addr_v = parms.v_plane; 759 unsigned char* src_addr_u = parms.u_plane; 760 761 unsigned int dst_picture_size = dst_width*dst_height; 762 763 // Sizes for destination 764 unsigned int dst_dbl_linestride_y = dst_width<<1; 765 unsigned int dst_dbl_linestride_vu = dst_width>>1; 766 767 // Perform address calculation for Y, V and U in main memory with dst_addr as base 768 unsigned char* dst_addr_main_memory_y = dst_addr; 769 unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size; 770 unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2); 771 772 // calculate scale factors 773 vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width ); 774 float y_scale = (float)src_height/(float)dst_height; 775 776 // double buffered processing 777 // buffer switching 778 unsigned int curr_src_idx = 0; 779 unsigned int curr_dst_idx = 0; 780 unsigned int next_src_idx, next_dst_idx; 781 782 // 2 lines y as output, upper and lowerline 783 unsigned int curr_interpl_y_upper = 0; 784 unsigned int next_interpl_y_upper; 785 unsigned int curr_interpl_y_lower, next_interpl_y_lower; 786 // only 1 line v/u output, both planes have the same dimension 787 unsigned int curr_interpl_vu = 0; 788 unsigned int next_interpl_vu; 789 790 // weights, calculated in every loop iteration 791 vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f }; 792 vector float vf_next_NSweight_y_upper; 793 vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower; 794 vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f }; 795 vector float vf_next_NSweight_vu; 796 797 // line indices for the src picture 798 float curr_src_y_upper = 0.0f, next_src_y_upper; 799 float curr_src_y_lower, next_src_y_lower; 800 float curr_src_vu = 0.0f, next_src_vu; 801 802 // line indices for the dst picture 803 unsigned int dst_y=0, dst_vu=0; 804 805 // calculate lower line idices 806 curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale; 807 curr_interpl_y_lower = (unsigned int)curr_src_y_lower; 808 // lower line weight 809 vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower ); 810 811 812 // start partially double buffered processing 813 // get initial data, 2 sets of y, 1 set v, 1 set u 814 mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 ); 815 mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y, 816 (unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y), 817 src_dbl_linestride_y, 818 RETR_BUF, 819 0, 0 ); 820 mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); 821 mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); 822 823 // iteration loop 824 // within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved 825 // the scaled output is 2 lines y, 1 line v, 1 line u 826 // the yuv2rgb-converted output is stored to RAM 827 for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) { 828 dst_y = dst_vu<<1; 829 830 // calculate next indices 831 next_src_vu = ((float)dst_vu+1)*y_scale; 832 next_src_y_upper = ((float)dst_y+2)*y_scale; 833 next_src_y_lower = ((float)dst_y+3)*y_scale; 834 835 next_interpl_vu = (unsigned int) next_src_vu; 836 next_interpl_y_upper = (unsigned int) next_src_y_upper; 837 next_interpl_y_lower = (unsigned int) next_src_y_lower; 838 839 // calculate weight NORTH-SOUTH 840 vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu ); 841 vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper ); 842 vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower ); 843 844 // get next lines 845 next_src_idx = curr_src_idx^1; 846 next_dst_idx = curr_dst_idx^1; 847 848 // 4 lines y 849 mfc_get( y_plane[next_src_idx], 850 (unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y), 851 src_dbl_linestride_y, 852 RETR_BUF+next_src_idx, 853 0, 0 ); 854 mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y, 855 (unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y), 856 src_dbl_linestride_y, 857 RETR_BUF+next_src_idx, 858 0, 0 ); 859 860 // 2 lines v 861 mfc_get( v_plane[next_src_idx], 862 (unsigned int) src_addr_v+(next_interpl_vu*src_linestride_vu), 863 src_dbl_linestride_vu, 864 RETR_BUF+next_src_idx, 865 0, 0 ); 866 // 2 lines u 867 mfc_get( u_plane[next_src_idx], 868 (unsigned int) src_addr_u+(next_interpl_vu*src_linestride_vu), 869 src_dbl_linestride_vu, 870 RETR_BUF+next_src_idx, 871 0, 0 ); 872 873 DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); 874 875 // scaling 876 // work line y_upper 877 bilinear_scale_line_w16( y_plane[curr_src_idx], 878 scaled_y_plane[curr_src_idx], 879 dst_width, 880 vf_x_scale, 881 vf_curr_NSweight_y_upper, 882 src_linestride_y ); 883 // work line y_lower 884 bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, 885 scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, 886 dst_width, 887 vf_x_scale, 888 vf_curr_NSweight_y_lower, 889 src_linestride_y ); 890 // work line v 891 bilinear_scale_line_w16( v_plane[curr_src_idx], 892 scaled_v_plane[curr_src_idx], 893 dst_width>>1, 894 vf_x_scale, 895 vf_curr_NSweight_vu, 896 src_linestride_vu ); 897 // work line u 898 bilinear_scale_line_w16( u_plane[curr_src_idx], 899 scaled_u_plane[curr_src_idx], 900 dst_width>>1, 901 vf_x_scale, 902 vf_curr_NSweight_vu, 903 src_linestride_vu ); 904 905 //--------------------------------------------------------------------------------------------- 906 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); 907 908 // Perform three DMA transfers to 3 different locations in the main memory! 909 // dst_width: Pixel width of destination image 910 // dst_addr: Destination address in main memory 911 // dst_vu: Counter which is incremented one by one 912 // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) 913 914 mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) 915 (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) 916 dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) 917 STR_BUF+curr_dst_idx, // Tag 918 0, 0 ); 919 920 mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) 921 (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) 922 dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) 923 STR_BUF+curr_dst_idx, // Tag 924 0, 0 ); 925 926 mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) 927 (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) 928 dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) 929 STR_BUF+curr_dst_idx, // Tag 930 0, 0 ); 931 //--------------------------------------------------------------------------------------------- 932 933 934 // update for next cycle 935 curr_src_idx = next_src_idx; 936 curr_dst_idx = next_dst_idx; 937 938 curr_interpl_y_upper = next_interpl_y_upper; 939 curr_interpl_y_lower = next_interpl_y_lower; 940 curr_interpl_vu = next_interpl_vu; 941 942 vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper; 943 vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower; 944 vf_curr_NSweight_vu = vf_next_NSweight_vu; 945 946 curr_src_y_upper = next_src_y_upper; 947 curr_src_y_lower = next_src_y_lower; 948 curr_src_vu = next_src_vu; 949 } 950 951 952 953 DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); 954 955 // scaling 956 // work line y_upper 957 bilinear_scale_line_w16( y_plane[curr_src_idx], 958 scaled_y_plane[curr_src_idx], 959 dst_width, 960 vf_x_scale, 961 vf_curr_NSweight_y_upper, 962 src_linestride_y ); 963 // work line y_lower 964 bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, 965 scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, 966 dst_width, 967 vf_x_scale, 968 vf_curr_NSweight_y_lower, 969 src_linestride_y ); 970 // work line v 971 bilinear_scale_line_w16( v_plane[curr_src_idx], 972 scaled_v_plane[curr_src_idx], 973 dst_width>>1, 974 vf_x_scale, 975 vf_curr_NSweight_vu, 976 src_linestride_vu ); 977 // work line u 978 bilinear_scale_line_w16( u_plane[curr_src_idx], 979 scaled_u_plane[curr_src_idx], 980 dst_width>>1, 981 vf_x_scale, 982 vf_curr_NSweight_vu, 983 src_linestride_vu ); 984 985 986 //--------------------------------------------------------------------------------------------- 987 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); 988 989 // Perform three DMA transfers to 3 different locations in the main memory! 990 // dst_width: Pixel width of destination image 991 // dst_addr: Destination address in main memory 992 // dst_vu: Counter which is incremented one by one 993 // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) 994 995 mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) 996 (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) 997 dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) 998 STR_BUF+curr_dst_idx, // Tag 999 0, 0 ); 1000 1001 mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) 1002 (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) 1003 dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) 1004 STR_BUF+curr_dst_idx, // Tag 1005 0, 0 ); 1006 1007 mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) 1008 (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) 1009 dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) 1010 STR_BUF+curr_dst_idx, // Tag 1011 0, 0 ); 1012 1013 // wait for completion 1014 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); 1015 //--------------------------------------------------------------------------------------------- 1016 } 1017 1018 1019 /** 1020 * scale_srcw32_dstw32() 1021 * 1022 * processes an input image of width 32 1023 * scaling is done to a width 32 1024 * yuv2rgb conversion on a width of 32 1025 * result stored in RAM 1026 */ 1027 void scale_srcw32_dstw32() { 1028 // extract parameters 1029 unsigned char* dst_addr = (unsigned char *)parms.dstBuffer; 1030 1031 unsigned int src_width = parms.src_pixel_width; 1032 unsigned int src_height = parms.src_pixel_height; 1033 unsigned int dst_width = parms.dst_pixel_width; 1034 unsigned int dst_height = parms.dst_pixel_height; 1035 1036 // YVU 1037 unsigned int src_linestride_y = src_width; 1038 unsigned int src_dbl_linestride_y = src_width<<1; 1039 unsigned int src_linestride_vu = src_width>>1; 1040 unsigned int src_dbl_linestride_vu = src_width; 1041 1042 // scaled YVU 1043 unsigned int scaled_src_linestride_y = dst_width; 1044 1045 // ram addresses 1046 unsigned char* src_addr_y = parms.y_plane; 1047 unsigned char* src_addr_v = parms.v_plane; 1048 unsigned char* src_addr_u = parms.u_plane; 1049 1050 unsigned int dst_picture_size = dst_width*dst_height; 1051 1052 // Sizes for destination 1053 unsigned int dst_dbl_linestride_y = dst_width<<1; 1054 unsigned int dst_dbl_linestride_vu = dst_width>>1; 1055 1056 // Perform address calculation for Y, V and U in main memory with dst_addr as base 1057 unsigned char* dst_addr_main_memory_y = dst_addr; 1058 unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size; 1059 unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2); 1060 1061 // calculate scale factors 1062 vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width ); 1063 float y_scale = (float)src_height/(float)dst_height; 1064 1065 // double buffered processing 1066 // buffer switching 1067 unsigned int curr_src_idx = 0; 1068 unsigned int curr_dst_idx = 0; 1069 unsigned int next_src_idx, next_dst_idx; 1070 1071 // 2 lines y as output, upper and lowerline 1072 unsigned int curr_interpl_y_upper = 0; 1073 unsigned int next_interpl_y_upper; 1074 unsigned int curr_interpl_y_lower, next_interpl_y_lower; 1075 // only 1 line v/u output, both planes have the same dimension 1076 unsigned int curr_interpl_vu = 0; 1077 unsigned int next_interpl_vu; 1078 1079 // weights, calculated in every loop iteration 1080 vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f }; 1081 vector float vf_next_NSweight_y_upper; 1082 vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower; 1083 vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f }; 1084 vector float vf_next_NSweight_vu; 1085 1086 // line indices for the src picture 1087 float curr_src_y_upper = 0.0f, next_src_y_upper; 1088 float curr_src_y_lower, next_src_y_lower; 1089 float curr_src_vu = 0.0f, next_src_vu; 1090 1091 // line indices for the dst picture 1092 unsigned int dst_y=0, dst_vu=0; 1093 1094 // calculate lower line idices 1095 curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale; 1096 curr_interpl_y_lower = (unsigned int)curr_src_y_lower; 1097 // lower line weight 1098 vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower ); 1099 1100 1101 // start partially double buffered processing 1102 // get initial data, 2 sets of y, 1 set v, 1 set u 1103 mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 ); 1104 mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y, 1105 (unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y), 1106 src_dbl_linestride_y, 1107 RETR_BUF, 1108 0, 0 ); 1109 mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); 1110 mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); 1111 1112 // iteration loop 1113 // within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved 1114 // the scaled output is 2 lines y, 1 line v, 1 line u 1115 // the yuv2rgb-converted output is stored to RAM 1116 for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) { 1117 dst_y = dst_vu<<1; 1118 1119 // calculate next indices 1120 next_src_vu = ((float)dst_vu+1)*y_scale; 1121 next_src_y_upper = ((float)dst_y+2)*y_scale; 1122 next_src_y_lower = ((float)dst_y+3)*y_scale; 1123 1124 next_interpl_vu = (unsigned int) next_src_vu; 1125 next_interpl_y_upper = (unsigned int) next_src_y_upper; 1126 next_interpl_y_lower = (unsigned int) next_src_y_lower; 1127 1128 // calculate weight NORTH-SOUTH 1129 vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu ); 1130 vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper ); 1131 vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower ); 1132 1133 // get next lines 1134 next_src_idx = curr_src_idx^1; 1135 next_dst_idx = curr_dst_idx^1; 1136 1137 // 4 lines y 1138 mfc_get( y_plane[next_src_idx], 1139 (unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y), 1140 src_dbl_linestride_y, 1141 RETR_BUF+next_src_idx, 1142 0, 0 ); 1143 mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y, 1144 (unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y), 1145 src_dbl_linestride_y, 1146 RETR_BUF+next_src_idx, 1147 0, 0 ); 1148 1149 // 2 lines v 1150 mfc_get( v_plane[next_src_idx], 1151 (unsigned int) src_addr_v+(next_interpl_vu*src_linestride_vu), 1152 src_dbl_linestride_vu, 1153 RETR_BUF+next_src_idx, 1154 0, 0 ); 1155 // 2 lines u 1156 mfc_get( u_plane[next_src_idx], 1157 (unsigned int) src_addr_u+(next_interpl_vu*src_linestride_vu), 1158 src_dbl_linestride_vu, 1159 RETR_BUF+next_src_idx, 1160 0, 0 ); 1161 1162 DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); 1163 1164 // scaling 1165 // work line y_upper 1166 bilinear_scale_line_w16( y_plane[curr_src_idx], 1167 scaled_y_plane[curr_src_idx], 1168 dst_width, 1169 vf_x_scale, 1170 vf_curr_NSweight_y_upper, 1171 src_linestride_y ); 1172 // work line y_lower 1173 bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, 1174 scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, 1175 dst_width, 1176 vf_x_scale, 1177 vf_curr_NSweight_y_lower, 1178 src_linestride_y ); 1179 // work line v 1180 bilinear_scale_line_w16( v_plane[curr_src_idx], 1181 scaled_v_plane[curr_src_idx], 1182 dst_width>>1, 1183 vf_x_scale, 1184 vf_curr_NSweight_vu, 1185 src_linestride_vu ); 1186 // work line u 1187 bilinear_scale_line_w16( u_plane[curr_src_idx], 1188 scaled_u_plane[curr_src_idx], 1189 dst_width>>1, 1190 vf_x_scale, 1191 vf_curr_NSweight_vu, 1192 src_linestride_vu ); 1193 1194 1195 1196 // Store the result back to main memory into a destination buffer in YUV format 1197 //--------------------------------------------------------------------------------------------- 1198 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); 1199 1200 // Perform three DMA transfers to 3 different locations in the main memory! 1201 // dst_width: Pixel width of destination image 1202 // dst_addr: Destination address in main memory 1203 // dst_vu: Counter which is incremented one by one 1204 // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) 1205 1206 mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) 1207 (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) 1208 dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) 1209 STR_BUF+curr_dst_idx, // Tag 1210 0, 0 ); 1211 1212 mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) 1213 (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) 1214 dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) 1215 STR_BUF+curr_dst_idx, // Tag 1216 0, 0 ); 1217 1218 mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) 1219 (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) 1220 dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) 1221 STR_BUF+curr_dst_idx, // Tag 1222 0, 0 ); 1223 //--------------------------------------------------------------------------------------------- 1224 1225 1226 // update for next cycle 1227 curr_src_idx = next_src_idx; 1228 curr_dst_idx = next_dst_idx; 1229 1230 curr_interpl_y_upper = next_interpl_y_upper; 1231 curr_interpl_y_lower = next_interpl_y_lower; 1232 curr_interpl_vu = next_interpl_vu; 1233 1234 vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper; 1235 vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower; 1236 vf_curr_NSweight_vu = vf_next_NSweight_vu; 1237 1238 curr_src_y_upper = next_src_y_upper; 1239 curr_src_y_lower = next_src_y_lower; 1240 curr_src_vu = next_src_vu; 1241 } 1242 1243 1244 1245 DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); 1246 1247 // scaling 1248 // work line y_upper 1249 bilinear_scale_line_w16( y_plane[curr_src_idx], 1250 scaled_y_plane[curr_src_idx], 1251 dst_width, 1252 vf_x_scale, 1253 vf_curr_NSweight_y_upper, 1254 src_linestride_y ); 1255 // work line y_lower 1256 bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, 1257 scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, 1258 dst_width, 1259 vf_x_scale, 1260 vf_curr_NSweight_y_lower, 1261 src_linestride_y ); 1262 // work line v 1263 bilinear_scale_line_w16( v_plane[curr_src_idx], 1264 scaled_v_plane[curr_src_idx], 1265 dst_width>>1, 1266 vf_x_scale, 1267 vf_curr_NSweight_vu, 1268 src_linestride_vu ); 1269 // work line u 1270 bilinear_scale_line_w16( u_plane[curr_src_idx], 1271 scaled_u_plane[curr_src_idx], 1272 dst_width>>1, 1273 vf_x_scale, 1274 vf_curr_NSweight_vu, 1275 src_linestride_vu ); 1276 1277 1278 // Store the result back to main memory into a destination buffer in YUV format 1279 //--------------------------------------------------------------------------------------------- 1280 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); 1281 1282 // Perform three DMA transfers to 3 different locations in the main memory! 1283 // dst_width: Pixel width of destination image 1284 // dst_addr: Destination address in main memory 1285 // dst_vu: Counter which is incremented one by one 1286 // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) 1287 1288 mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) 1289 (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) 1290 dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) 1291 STR_BUF+curr_dst_idx, // Tag 1292 0, 0 ); 1293 1294 mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) 1295 (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) 1296 dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) 1297 STR_BUF+curr_dst_idx, // Tag 1298 0, 0 ); 1299 1300 mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) 1301 (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) 1302 dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) 1303 STR_BUF+curr_dst_idx, // Tag 1304 0, 0 ); 1305 1306 // wait for completion 1307 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); 1308 //--------------------------------------------------------------------------------------------- 1309 } 1310 1311 1312 /* 1313 * bilinear_scale_line_w8() 1314 * 1315 * processes a line of yuv-input, width has to be a multiple of 8 1316 * scaled yuv-output is written to local store buffer 1317 * 1318 * @param src buffer for 2 lines input 1319 * @param dst_ buffer for 1 line output 1320 * @param dst_width the width of the destination line 1321 * @param vf_x_scale a float vector, at each entry is the x_scale-factor 1322 * @param vf_NSweight a float vector, at each position is the weight NORTH/SOUTH for the current line 1323 * @param src_linestride the stride of the srcline 1324 */ 1325 void bilinear_scale_line_w8( unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride ) { 1326 1327 unsigned char* dst = dst_; 1328 1329 unsigned int dst_x; 1330 for( dst_x=0; dst_x<dst_width; dst_x+=8) { 1331 // address calculation for loading the 4 surrounding pixel of each calculated 1332 // destination pixel 1333 vector unsigned int vui_dst_x_tmp = spu_splats( dst_x ); 1334 // lower range->first 4 pixel 1335 // upper range->next 4 pixel 1336 vector unsigned int vui_inc_dst_x_lower_range = { 0, 1, 2, 3 }; 1337 vector unsigned int vui_inc_dst_x_upper_range = { 4, 5, 6, 7 }; 1338 vector unsigned int vui_dst_x_lower_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_lower_range ); 1339 vector unsigned int vui_dst_x_upper_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_upper_range ); 1340 1341 // calculate weight EAST-WEST 1342 vector float vf_dst_x_lower_range = spu_convtf( vui_dst_x_lower_range, 0 ); 1343 vector float vf_dst_x_upper_range = spu_convtf( vui_dst_x_upper_range, 0 ); 1344 vector float vf_src_x_lower_range = spu_mul( vf_dst_x_lower_range, vf_x_scale ); 1345 vector float vf_src_x_upper_range = spu_mul( vf_dst_x_upper_range, vf_x_scale ); 1346 vector unsigned int vui_interpl_x_lower_range = spu_convtu( vf_src_x_lower_range, 0 ); 1347 vector unsigned int vui_interpl_x_upper_range = spu_convtu( vf_src_x_upper_range, 0 ); 1348 vector float vf_interpl_x_lower_range = spu_convtf( vui_interpl_x_lower_range, 0 ); 1349 vector float vf_interpl_x_upper_range = spu_convtf( vui_interpl_x_upper_range, 0 ); 1350 vector float vf_EWweight_lower_range = spu_sub( vf_src_x_lower_range, vf_interpl_x_lower_range ); 1351 vector float vf_EWweight_upper_range = spu_sub( vf_src_x_upper_range, vf_interpl_x_upper_range ); 1352 1353 // calculate address offset 1354 // 1355 // pixel NORTH WEST 1356 vector unsigned int vui_off_pixelNW_lower_range = vui_interpl_x_lower_range; 1357 vector unsigned int vui_off_pixelNW_upper_range = vui_interpl_x_upper_range; 1358 1359 // pixel NORTH EAST-->(offpixelNW+1) 1360 vector unsigned int vui_add_1 = { 1, 1, 1, 1 }; 1361 vector unsigned int vui_off_pixelNE_lower_range = spu_add( vui_off_pixelNW_lower_range, vui_add_1 ); 1362 vector unsigned int vui_off_pixelNE_upper_range = spu_add( vui_off_pixelNW_upper_range, vui_add_1 ); 1363 1364 // SOUTH-WEST-->(offpixelNW+src_linestride) 1365 vector unsigned int vui_srclinestride = spu_splats( src_linestride ); 1366 vector unsigned int vui_off_pixelSW_lower_range = spu_add( vui_srclinestride, vui_off_pixelNW_lower_range ); 1367 vector unsigned int vui_off_pixelSW_upper_range = spu_add( vui_srclinestride, vui_off_pixelNW_upper_range ); 1368 1369 // SOUTH-EAST-->(offpixelNW+src_linestride+1) 1370 vector unsigned int vui_off_pixelSE_lower_range = spu_add( vui_srclinestride, vui_off_pixelNE_lower_range ); 1371 vector unsigned int vui_off_pixelSE_upper_range = spu_add( vui_srclinestride, vui_off_pixelNE_upper_range ); 1372 1373 // calculate each address 1374 vector unsigned int vui_src_ls = spu_splats( (unsigned int) src ); 1375 vector unsigned int vui_addr_pixelNW_lower_range = spu_add( vui_src_ls, vui_off_pixelNW_lower_range ); 1376 vector unsigned int vui_addr_pixelNW_upper_range = spu_add( vui_src_ls, vui_off_pixelNW_upper_range ); 1377 vector unsigned int vui_addr_pixelNE_lower_range = spu_add( vui_src_ls, vui_off_pixelNE_lower_range ); 1378 vector unsigned int vui_addr_pixelNE_upper_range = spu_add( vui_src_ls, vui_off_pixelNE_upper_range ); 1379 1380 vector unsigned int vui_addr_pixelSW_lower_range = spu_add( vui_src_ls, vui_off_pixelSW_lower_range ); 1381 vector unsigned int vui_addr_pixelSW_upper_range = spu_add( vui_src_ls, vui_off_pixelSW_upper_range ); 1382 vector unsigned int vui_addr_pixelSE_lower_range = spu_add( vui_src_ls, vui_off_pixelSE_lower_range ); 1383 vector unsigned int vui_addr_pixelSE_upper_range = spu_add( vui_src_ls, vui_off_pixelSE_upper_range ); 1384 1385 // get each pixel 1386 // 1387 // scalar load, afterwards insertion into the right position 1388 // NORTH WEST 1389 vector unsigned char null_vector = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; 1390 vector unsigned char vuc_pixel_NW_lower_range = spu_insert( 1391 *((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 0 )), null_vector, 3 ); 1392 vuc_pixel_NW_lower_range = spu_insert( 1393 *((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 1 )), 1394 vuc_pixel_NW_lower_range, 7 ); 1395 vuc_pixel_NW_lower_range = spu_insert( 1396 *((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 2 )), 1397 vuc_pixel_NW_lower_range, 11 ); 1398 vuc_pixel_NW_lower_range = spu_insert( 1399 *((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 3 )), 1400 vuc_pixel_NW_lower_range, 15 ); 1401 1402 vector unsigned char vuc_pixel_NW_upper_range = spu_insert( 1403 *((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 0 )), null_vector, 3 ); 1404 vuc_pixel_NW_upper_range = spu_insert( 1405 *((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 1 )), 1406 vuc_pixel_NW_upper_range, 7 ); 1407 vuc_pixel_NW_upper_range = spu_insert( 1408 *((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 2 )), 1409 vuc_pixel_NW_upper_range, 11 ); 1410 vuc_pixel_NW_upper_range = spu_insert( 1411 *((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 3 )), 1412 vuc_pixel_NW_upper_range, 15 ); 1413 1414 // NORTH EAST 1415 vector unsigned char vuc_pixel_NE_lower_range = spu_insert( 1416 *((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 0 )), null_vector, 3 ); 1417 vuc_pixel_NE_lower_range = spu_insert( 1418 *((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 1 )), 1419 vuc_pixel_NE_lower_range, 7 ); 1420 vuc_pixel_NE_lower_range = spu_insert( 1421 *((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 2 )), 1422 vuc_pixel_NE_lower_range, 11 ); 1423 vuc_pixel_NE_lower_range = spu_insert( 1424 *((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 3 )), 1425 vuc_pixel_NE_lower_range, 15 ); 1426 1427 vector unsigned char vuc_pixel_NE_upper_range = spu_insert( 1428 *((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 0 )), null_vector, 3 ); 1429 vuc_pixel_NE_upper_range = spu_insert( 1430 *((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 1 )), 1431 vuc_pixel_NE_upper_range, 7 ); 1432 vuc_pixel_NE_upper_range = spu_insert( 1433 *((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 2 )), 1434 vuc_pixel_NE_upper_range, 11 ); 1435 vuc_pixel_NE_upper_range = spu_insert( 1436 *((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 3 )), 1437 vuc_pixel_NE_upper_range, 15 ); 1438 1439 1440 // SOUTH WEST 1441 vector unsigned char vuc_pixel_SW_lower_range = spu_insert( 1442 *((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 0 )), null_vector, 3 ); 1443 vuc_pixel_SW_lower_range = spu_insert( 1444 *((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 1 )), 1445 vuc_pixel_SW_lower_range, 7 ); 1446 vuc_pixel_SW_lower_range = spu_insert( 1447 *((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 2 )), 1448 vuc_pixel_SW_lower_range, 11 ); 1449 vuc_pixel_SW_lower_range = spu_insert( 1450 *((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 3 )), 1451 vuc_pixel_SW_lower_range, 15 ); 1452 1453 vector unsigned char vuc_pixel_SW_upper_range = spu_insert( 1454 *((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 0 )), null_vector, 3 ); 1455 vuc_pixel_SW_upper_range = spu_insert( 1456 *((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 1 )), 1457 vuc_pixel_SW_upper_range, 7 ); 1458 vuc_pixel_SW_upper_range = spu_insert( 1459 *((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 2 )), 1460 vuc_pixel_SW_upper_range, 11 ); 1461 vuc_pixel_SW_upper_range = spu_insert( 1462 *((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 3 )), 1463 vuc_pixel_SW_upper_range, 15 ); 1464 1465 // SOUTH EAST 1466 vector unsigned char vuc_pixel_SE_lower_range = spu_insert( 1467 *((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 0 )), null_vector, 3 ); 1468 vuc_pixel_SE_lower_range = spu_insert( 1469 *((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 1 )), 1470 vuc_pixel_SE_lower_range, 7 ); 1471 vuc_pixel_SE_lower_range = spu_insert( 1472 *((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 2 )), 1473 vuc_pixel_SE_lower_range, 11 ); 1474 vuc_pixel_SE_lower_range = spu_insert( 1475 *((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 3 )), 1476 vuc_pixel_SE_lower_range, 15 ); 1477 1478 vector unsigned char vuc_pixel_SE_upper_range = spu_insert( 1479 *((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 0 )), null_vector, 3 ); 1480 vuc_pixel_SE_upper_range = spu_insert( 1481 *((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 1 )), 1482 vuc_pixel_SE_upper_range, 7 ); 1483 vuc_pixel_SE_upper_range = spu_insert( 1484 *((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 2 )), 1485 vuc_pixel_SE_upper_range, 11 ); 1486 vuc_pixel_SE_upper_range = spu_insert( 1487 *((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 3 )), 1488 vuc_pixel_SE_upper_range, 15 ); 1489 1490 1491 // convert to float 1492 vector float vf_pixel_NW_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_lower_range, 0 ); 1493 vector float vf_pixel_NW_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_upper_range, 0 ); 1494 1495 vector float vf_pixel_SW_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_lower_range, 0 ); 1496 vector float vf_pixel_SW_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_upper_range, 0 ); 1497 1498 vector float vf_pixel_NE_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_lower_range, 0 ); 1499 vector float vf_pixel_NE_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_upper_range, 0 ); 1500 1501 vector float vf_pixel_SE_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_lower_range, 0 ); 1502 vector float vf_pixel_SE_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_upper_range, 0 ); 1503 1504 1505 1506 // first linear interpolation: EWtop 1507 // EWtop = NW + EWweight*(NE-NW) 1508 // 1509 // lower range 1510 vector float vf_EWtop_lower_range_tmp = spu_sub( vf_pixel_NE_lower_range, vf_pixel_NW_lower_range ); 1511 vector float vf_EWtop_lower_range = spu_madd( vf_EWweight_lower_range, 1512 vf_EWtop_lower_range_tmp, 1513 vf_pixel_NW_lower_range ); 1514 1515 // upper range 1516 vector float vf_EWtop_upper_range_tmp = spu_sub( vf_pixel_NE_upper_range, vf_pixel_NW_upper_range ); 1517 vector float vf_EWtop_upper_range = spu_madd( vf_EWweight_upper_range, 1518 vf_EWtop_upper_range_tmp, 1519 vf_pixel_NW_upper_range ); 1520 1521 1522 1523 // second linear interpolation: EWbottom 1524 // EWbottom = SW + EWweight*(SE-SW) 1525 // 1526 // lower range 1527 vector float vf_EWbottom_lower_range_tmp = spu_sub( vf_pixel_SE_lower_range, vf_pixel_SW_lower_range ); 1528 vector float vf_EWbottom_lower_range = spu_madd( vf_EWweight_lower_range, 1529 vf_EWbottom_lower_range_tmp, 1530 vf_pixel_SW_lower_range ); 1531 1532 // upper range 1533 vector float vf_EWbottom_upper_range_tmp = spu_sub( vf_pixel_SE_upper_range, vf_pixel_SW_upper_range ); 1534 vector float vf_EWbottom_upper_range = spu_madd( vf_EWweight_upper_range, 1535 vf_EWbottom_upper_range_tmp, 1536 vf_pixel_SW_upper_range ); 1537 1538 1539 1540 // third linear interpolation: the bilinear interpolated value 1541 // result = EWtop + NSweight*(EWbottom-EWtop); 1542 // 1543 // lower range 1544 vector float vf_result_lower_range_tmp = spu_sub( vf_EWbottom_lower_range, vf_EWtop_lower_range ); 1545 vector float vf_result_lower_range = spu_madd( vf_NSweight, 1546 vf_result_lower_range_tmp, 1547 vf_EWtop_lower_range ); 1548 1549 // upper range 1550 vector float vf_result_upper_range_tmp = spu_sub( vf_EWbottom_upper_range, vf_EWtop_upper_range ); 1551 vector float vf_result_upper_range = spu_madd( vf_NSweight, 1552 vf_result_upper_range_tmp, 1553 vf_EWtop_upper_range ); 1554 1555 1556 // convert back: using saturated arithmetic 1557 vector unsigned int vui_result_lower_range = vfloat_to_vuint( vf_result_lower_range ); 1558 vector unsigned int vui_result_upper_range = vfloat_to_vuint( vf_result_upper_range ); 1559 1560 // merge results->lower,upper 1561 vector unsigned char vuc_mask_merge_result = { 0x03, 0x07, 0x0B, 0x0F, 1562 0x13, 0x17, 0x1B, 0x1F, 1563 0x00, 0x00, 0x00, 0x00, 1564 0x00, 0x00, 0x00, 0x00 }; 1565 1566 vector unsigned char vuc_result = spu_shuffle( (vector unsigned char) vui_result_lower_range, 1567 (vector unsigned char) vui_result_upper_range, 1568 vuc_mask_merge_result ); 1569 1570 // partial storing 1571 vector unsigned char vuc_mask_out = { 0x00, 0x00, 0x00, 0x00, 1572 0x00, 0x00, 0x00, 0x00, 1573 0xFF, 0xFF, 0xFF, 0xFF, 1574 0xFF, 0xFF, 0xFF, 0xFF }; 1575 1576 1577 // get currently stored data 1578 vector unsigned char vuc_orig = *((vector unsigned char*)dst); 1579 1580 // clear currently stored data 1581 vuc_orig = spu_and( vuc_orig, 1582 spu_rlqwbyte( vuc_mask_out, ((unsigned int)dst)&0x0F) ); 1583 1584 // rotate result according to storing address 1585 vuc_result = spu_rlqwbyte( vuc_result, ((unsigned int)dst)&0x0F ); 1586 1587 // store result 1588 *((vector unsigned char*)dst) = spu_or( vuc_result, 1589 vuc_orig ); 1590 dst += 8; 1591 } 1592 } 1593 1594 1595 /* 1596 * bilinear_scale_line_w16() 1597 * 1598 * processes a line of yuv-input, width has to be a multiple of 16 1599 * scaled yuv-output is written to local store buffer 1600 * 1601 * @param src buffer for 2 lines input 1602 * @param dst_ buffer for 1 line output 1603 * @param dst_width the width of the destination line 1604 * @param vf_x_scale a float vector, at each entry is the x_scale-factor 1605 * @param vf_NSweight a float vector, at each position is the weight NORTH/SOUTH for the current line 1606 * @param src_linestride the stride of the srcline 1607 */ 1608 void bilinear_scale_line_w16( unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride ) { 1609 1610 unsigned char* dst = dst_; 1611 1612 unsigned int dst_x; 1613 for( dst_x=0; dst_x<dst_width; dst_x+=16) { 1614 // address calculation for loading the 4 surrounding pixel of each calculated 1615 // destination pixel 1616 vector unsigned int vui_dst_x_tmp = spu_splats( dst_x ); 1617 // parallelised processing 1618 // first range->pixel 1 2 3 4 1619 // second range->pixel 5 6 7 8 1620 // third range->pixel 9 10 11 12 1621 // fourth range->pixel 13 14 15 16 1622 vector unsigned int vui_inc_dst_x_first_range = { 0, 1, 2, 3 }; 1623 vector unsigned int vui_inc_dst_x_second_range = { 4, 5, 6, 7 }; 1624 vector unsigned int vui_inc_dst_x_third_range = { 8, 9, 10, 11 }; 1625 vector unsigned int vui_inc_dst_x_fourth_range = { 12, 13, 14, 15 }; 1626 vector unsigned int vui_dst_x_first_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_first_range ); 1627 vector unsigned int vui_dst_x_second_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_second_range ); 1628 vector unsigned int vui_dst_x_third_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_third_range ); 1629 vector unsigned int vui_dst_x_fourth_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_fourth_range ); 1630 1631 // calculate weight EAST-WEST 1632 vector float vf_dst_x_first_range = spu_convtf( vui_dst_x_first_range, 0 ); 1633 vector float vf_dst_x_second_range = spu_convtf( vui_dst_x_second_range, 0 ); 1634 vector float vf_dst_x_third_range = spu_convtf( vui_dst_x_third_range, 0 ); 1635 vector float vf_dst_x_fourth_range = spu_convtf( vui_dst_x_fourth_range, 0 ); 1636 vector float vf_src_x_first_range = spu_mul( vf_dst_x_first_range, vf_x_scale ); 1637 vector float vf_src_x_second_range = spu_mul( vf_dst_x_second_range, vf_x_scale ); 1638 vector float vf_src_x_third_range = spu_mul( vf_dst_x_third_range, vf_x_scale ); 1639 vector float vf_src_x_fourth_range = spu_mul( vf_dst_x_fourth_range, vf_x_scale ); 1640 vector unsigned int vui_interpl_x_first_range = spu_convtu( vf_src_x_first_range, 0 ); 1641 vector unsigned int vui_interpl_x_second_range = spu_convtu( vf_src_x_second_range, 0 ); 1642 vector unsigned int vui_interpl_x_third_range = spu_convtu( vf_src_x_third_range, 0 ); 1643 vector unsigned int vui_interpl_x_fourth_range = spu_convtu( vf_src_x_fourth_range, 0 ); 1644 vector float vf_interpl_x_first_range = spu_convtf( vui_interpl_x_first_range, 0 ); 1645 vector float vf_interpl_x_second_range = spu_convtf( vui_interpl_x_second_range, 0 ); 1646 vector float vf_interpl_x_third_range = spu_convtf( vui_interpl_x_third_range, 0 ); 1647 vector float vf_interpl_x_fourth_range = spu_convtf( vui_interpl_x_fourth_range, 0 ); 1648 vector float vf_EWweight_first_range = spu_sub( vf_src_x_first_range, vf_interpl_x_first_range ); 1649 vector float vf_EWweight_second_range = spu_sub( vf_src_x_second_range, vf_interpl_x_second_range ); 1650 vector float vf_EWweight_third_range = spu_sub( vf_src_x_third_range, vf_interpl_x_third_range ); 1651 vector float vf_EWweight_fourth_range = spu_sub( vf_src_x_fourth_range, vf_interpl_x_fourth_range ); 1652 1653 // calculate address offset 1654 // 1655 // pixel NORTH WEST 1656 vector unsigned int vui_off_pixelNW_first_range = vui_interpl_x_first_range; 1657 vector unsigned int vui_off_pixelNW_second_range = vui_interpl_x_second_range; 1658 vector unsigned int vui_off_pixelNW_third_range = vui_interpl_x_third_range; 1659 vector unsigned int vui_off_pixelNW_fourth_range = vui_interpl_x_fourth_range; 1660 1661 // pixel NORTH EAST-->(offpixelNW+1) 1662 vector unsigned int vui_add_1 = { 1, 1, 1, 1 }; 1663 vector unsigned int vui_off_pixelNE_first_range = spu_add( vui_off_pixelNW_first_range, vui_add_1 ); 1664 vector unsigned int vui_off_pixelNE_second_range = spu_add( vui_off_pixelNW_second_range, vui_add_1 ); 1665 vector unsigned int vui_off_pixelNE_third_range = spu_add( vui_off_pixelNW_third_range, vui_add_1 ); 1666 vector unsigned int vui_off_pixelNE_fourth_range = spu_add( vui_off_pixelNW_fourth_range, vui_add_1 ); 1667 1668 // SOUTH-WEST-->(offpixelNW+src_linestride) 1669 vector unsigned int vui_srclinestride = spu_splats( src_linestride ); 1670 vector unsigned int vui_off_pixelSW_first_range = spu_add( vui_srclinestride, vui_off_pixelNW_first_range ); 1671 vector unsigned int vui_off_pixelSW_second_range = spu_add( vui_srclinestride, vui_off_pixelNW_second_range ); 1672 vector unsigned int vui_off_pixelSW_third_range = spu_add( vui_srclinestride, vui_off_pixelNW_third_range ); 1673 vector unsigned int vui_off_pixelSW_fourth_range = spu_add( vui_srclinestride, vui_off_pixelNW_fourth_range ); 1674 1675 // SOUTH-EAST-->(offpixelNW+src_linestride+1) 1676 vector unsigned int vui_off_pixelSE_first_range = spu_add( vui_srclinestride, vui_off_pixelNE_first_range ); 1677 vector unsigned int vui_off_pixelSE_second_range = spu_add( vui_srclinestride, vui_off_pixelNE_second_range ); 1678 vector unsigned int vui_off_pixelSE_third_range = spu_add( vui_srclinestride, vui_off_pixelNE_third_range ); 1679 vector unsigned int vui_off_pixelSE_fourth_range = spu_add( vui_srclinestride, vui_off_pixelNE_fourth_range ); 1680 1681 // calculate each address 1682 vector unsigned int vui_src_ls = spu_splats( (unsigned int) src ); 1683 vector unsigned int vui_addr_pixelNW_first_range = spu_add( vui_src_ls, vui_off_pixelNW_first_range ); 1684 vector unsigned int vui_addr_pixelNW_second_range = spu_add( vui_src_ls, vui_off_pixelNW_second_range ); 1685 vector unsigned int vui_addr_pixelNW_third_range = spu_add( vui_src_ls, vui_off_pixelNW_third_range ); 1686 vector unsigned int vui_addr_pixelNW_fourth_range = spu_add( vui_src_ls, vui_off_pixelNW_fourth_range ); 1687 1688 vector unsigned int vui_addr_pixelNE_first_range = spu_add( vui_src_ls, vui_off_pixelNE_first_range ); 1689 vector unsigned int vui_addr_pixelNE_second_range = spu_add( vui_src_ls, vui_off_pixelNE_second_range ); 1690 vector unsigned int vui_addr_pixelNE_third_range = spu_add( vui_src_ls, vui_off_pixelNE_third_range ); 1691 vector unsigned int vui_addr_pixelNE_fourth_range = spu_add( vui_src_ls, vui_off_pixelNE_fourth_range ); 1692 1693 vector unsigned int vui_addr_pixelSW_first_range = spu_add( vui_src_ls, vui_off_pixelSW_first_range ); 1694 vector unsigned int vui_addr_pixelSW_second_range = spu_add( vui_src_ls, vui_off_pixelSW_second_range ); 1695 vector unsigned int vui_addr_pixelSW_third_range = spu_add( vui_src_ls, vui_off_pixelSW_third_range ); 1696 vector unsigned int vui_addr_pixelSW_fourth_range = spu_add( vui_src_ls, vui_off_pixelSW_fourth_range ); 1697 1698 vector unsigned int vui_addr_pixelSE_first_range = spu_add( vui_src_ls, vui_off_pixelSE_first_range ); 1699 vector unsigned int vui_addr_pixelSE_second_range = spu_add( vui_src_ls, vui_off_pixelSE_second_range ); 1700 vector unsigned int vui_addr_pixelSE_third_range = spu_add( vui_src_ls, vui_off_pixelSE_third_range ); 1701 vector unsigned int vui_addr_pixelSE_fourth_range = spu_add( vui_src_ls, vui_off_pixelSE_fourth_range ); 1702 1703 1704 // get each pixel 1705 // 1706 // scalar load, afterwards insertion into the right position 1707 // NORTH WEST 1708 // first range 1709 vector unsigned char null_vector = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; 1710 vector unsigned char vuc_pixel_NW_first_range = spu_insert( 1711 *((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 0 )), null_vector, 3 ); 1712 vuc_pixel_NW_first_range = spu_insert( 1713 *((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 1 )), 1714 vuc_pixel_NW_first_range, 7 ); 1715 vuc_pixel_NW_first_range = spu_insert( 1716 *((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 2 )), 1717 vuc_pixel_NW_first_range, 11 ); 1718 vuc_pixel_NW_first_range = spu_insert( 1719 *((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 3 )), 1720 vuc_pixel_NW_first_range, 15 ); 1721 // second range 1722 vector unsigned char vuc_pixel_NW_second_range = spu_insert( 1723 *((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 0 )), null_vector, 3 ); 1724 vuc_pixel_NW_second_range = spu_insert( 1725 *((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 1 )), 1726 vuc_pixel_NW_second_range, 7 ); 1727 vuc_pixel_NW_second_range = spu_insert( 1728 *((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 2 )), 1729 vuc_pixel_NW_second_range, 11 ); 1730 vuc_pixel_NW_second_range = spu_insert( 1731 *((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 3 )), 1732 vuc_pixel_NW_second_range, 15 ); 1733 // third range 1734 vector unsigned char vuc_pixel_NW_third_range = spu_insert( 1735 *((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 0 )), null_vector, 3 ); 1736 vuc_pixel_NW_third_range = spu_insert( 1737 *((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 1 )), 1738 vuc_pixel_NW_third_range, 7 ); 1739 vuc_pixel_NW_third_range = spu_insert( 1740 *((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 2 )), 1741 vuc_pixel_NW_third_range, 11 ); 1742 vuc_pixel_NW_third_range = spu_insert( 1743 *((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 3 )), 1744 vuc_pixel_NW_third_range, 15 ); 1745 // fourth range 1746 vector unsigned char vuc_pixel_NW_fourth_range = spu_insert( 1747 *((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 0 )), null_vector, 3 ); 1748 vuc_pixel_NW_fourth_range = spu_insert( 1749 *((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 1 )), 1750 vuc_pixel_NW_fourth_range, 7 ); 1751 vuc_pixel_NW_fourth_range = spu_insert( 1752 *((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 2 )), 1753 vuc_pixel_NW_fourth_range, 11 ); 1754 vuc_pixel_NW_fourth_range = spu_insert( 1755 *((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 3 )), 1756 vuc_pixel_NW_fourth_range, 15 ); 1757 1758 // NORTH EAST 1759 // first range 1760 vector unsigned char vuc_pixel_NE_first_range = spu_insert( 1761 *((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 0 )), null_vector, 3 ); 1762 vuc_pixel_NE_first_range = spu_insert( 1763 *((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 1 )), 1764 vuc_pixel_NE_first_range, 7 ); 1765 vuc_pixel_NE_first_range = spu_insert( 1766 *((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 2 )), 1767 vuc_pixel_NE_first_range, 11 ); 1768 vuc_pixel_NE_first_range = spu_insert( 1769 *((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 3 )), 1770 vuc_pixel_NE_first_range, 15 ); 1771 // second range 1772 vector unsigned char vuc_pixel_NE_second_range = spu_insert( 1773 *((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 0 )), null_vector, 3 ); 1774 vuc_pixel_NE_second_range = spu_insert( 1775 *((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 1 )), 1776 vuc_pixel_NE_second_range, 7 ); 1777 vuc_pixel_NE_second_range = spu_insert( 1778 *((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 2 )), 1779 vuc_pixel_NE_second_range, 11 ); 1780 vuc_pixel_NE_second_range = spu_insert( 1781 *((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 3 )), 1782 vuc_pixel_NE_second_range, 15 ); 1783 // third range 1784 vector unsigned char vuc_pixel_NE_third_range = spu_insert( 1785 *((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 0 )), null_vector, 3 ); 1786 vuc_pixel_NE_third_range = spu_insert( 1787 *((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 1 )), 1788 vuc_pixel_NE_third_range, 7 ); 1789 vuc_pixel_NE_third_range = spu_insert( 1790 *((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 2 )), 1791 vuc_pixel_NE_third_range, 11 ); 1792 vuc_pixel_NE_third_range = spu_insert( 1793 *((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 3 )), 1794 vuc_pixel_NE_third_range, 15 ); 1795 // fourth range 1796 vector unsigned char vuc_pixel_NE_fourth_range = spu_insert( 1797 *((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 0 )), null_vector, 3 ); 1798 vuc_pixel_NE_fourth_range = spu_insert( 1799 *((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 1 )), 1800 vuc_pixel_NE_fourth_range, 7 ); 1801 vuc_pixel_NE_fourth_range = spu_insert( 1802 *((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 2 )), 1803 vuc_pixel_NE_fourth_range, 11 ); 1804 vuc_pixel_NE_fourth_range = spu_insert( 1805 *((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 3 )), 1806 vuc_pixel_NE_fourth_range, 15 ); 1807 1808 // SOUTH WEST 1809 // first range 1810 vector unsigned char vuc_pixel_SW_first_range = spu_insert( 1811 *((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 0 )), null_vector, 3 ); 1812 vuc_pixel_SW_first_range = spu_insert( 1813 *((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 1 )), 1814 vuc_pixel_SW_first_range, 7 ); 1815 vuc_pixel_SW_first_range = spu_insert( 1816 *((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 2 )), 1817 vuc_pixel_SW_first_range, 11 ); 1818 vuc_pixel_SW_first_range = spu_insert( 1819 *((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 3 )), 1820 vuc_pixel_SW_first_range, 15 ); 1821 // second range 1822 vector unsigned char vuc_pixel_SW_second_range = spu_insert( 1823 *((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 0 )), null_vector, 3 ); 1824 vuc_pixel_SW_second_range = spu_insert( 1825 *((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 1 )), 1826 vuc_pixel_SW_second_range, 7 ); 1827 vuc_pixel_SW_second_range = spu_insert( 1828 *((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 2 )), 1829 vuc_pixel_SW_second_range, 11 ); 1830 vuc_pixel_SW_second_range = spu_insert( 1831 *((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 3 )), 1832 vuc_pixel_SW_second_range, 15 ); 1833 // third range 1834 vector unsigned char vuc_pixel_SW_third_range = spu_insert( 1835 *((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 0 )), null_vector, 3 ); 1836 vuc_pixel_SW_third_range = spu_insert( 1837 *((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 1 )), 1838 vuc_pixel_SW_third_range, 7 ); 1839 vuc_pixel_SW_third_range = spu_insert( 1840 *((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 2 )), 1841 vuc_pixel_SW_third_range, 11 ); 1842 vuc_pixel_SW_third_range = spu_insert( 1843 *((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 3 )), 1844 vuc_pixel_SW_third_range, 15 ); 1845 // fourth range 1846 vector unsigned char vuc_pixel_SW_fourth_range = spu_insert( 1847 *((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 0 )), null_vector, 3 ); 1848 vuc_pixel_SW_fourth_range = spu_insert( 1849 *((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 1 )), 1850 vuc_pixel_SW_fourth_range, 7 ); 1851 vuc_pixel_SW_fourth_range = spu_insert( 1852 *((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 2 )), 1853 vuc_pixel_SW_fourth_range, 11 ); 1854 vuc_pixel_SW_fourth_range = spu_insert( 1855 *((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 3 )), 1856 vuc_pixel_SW_fourth_range, 15 ); 1857 1858 // NORTH EAST 1859 // first range 1860 vector unsigned char vuc_pixel_SE_first_range = spu_insert( 1861 *((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 0 )), null_vector, 3 ); 1862 vuc_pixel_SE_first_range = spu_insert( 1863 *((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 1 )), 1864 vuc_pixel_SE_first_range, 7 ); 1865 vuc_pixel_SE_first_range = spu_insert( 1866 *((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 2 )), 1867 vuc_pixel_SE_first_range, 11 ); 1868 vuc_pixel_SE_first_range = spu_insert( 1869 *((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 3 )), 1870 vuc_pixel_SE_first_range, 15 ); 1871 // second range 1872 vector unsigned char vuc_pixel_SE_second_range = spu_insert( 1873 *((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 0 )), null_vector, 3 ); 1874 vuc_pixel_SE_second_range = spu_insert( 1875 *((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 1 )), 1876 vuc_pixel_SE_second_range, 7 ); 1877 vuc_pixel_SE_second_range = spu_insert( 1878 *((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 2 )), 1879 vuc_pixel_SE_second_range, 11 ); 1880 vuc_pixel_SE_second_range = spu_insert( 1881 *((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 3 )), 1882 vuc_pixel_SE_second_range, 15 ); 1883 // third range 1884 vector unsigned char vuc_pixel_SE_third_range = spu_insert( 1885 *((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 0 )), null_vector, 3 ); 1886 vuc_pixel_SE_third_range = spu_insert( 1887 *((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 1 )), 1888 vuc_pixel_SE_third_range, 7 ); 1889 vuc_pixel_SE_third_range = spu_insert( 1890 *((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 2 )), 1891 vuc_pixel_SE_third_range, 11 ); 1892 vuc_pixel_SE_third_range = spu_insert( 1893 *((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 3 )), 1894 vuc_pixel_SE_third_range, 15 ); 1895 // fourth range 1896 vector unsigned char vuc_pixel_SE_fourth_range = spu_insert( 1897 *((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 0 )), null_vector, 3 ); 1898 vuc_pixel_SE_fourth_range = spu_insert( 1899 *((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 1 )), 1900 vuc_pixel_SE_fourth_range, 7 ); 1901 vuc_pixel_SE_fourth_range = spu_insert( 1902 *((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 2 )), 1903 vuc_pixel_SE_fourth_range, 11 ); 1904 vuc_pixel_SE_fourth_range = spu_insert( 1905 *((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 3 )), 1906 vuc_pixel_SE_fourth_range, 15 ); 1907 1908 1909 1910 // convert to float 1911 vector float vf_pixel_NW_first_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_first_range, 0 ); 1912 vector float vf_pixel_NW_second_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_second_range, 0 ); 1913 vector float vf_pixel_NW_third_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_third_range, 0 ); 1914 vector float vf_pixel_NW_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_fourth_range, 0 ); 1915 1916 vector float vf_pixel_NE_first_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_first_range, 0 ); 1917 vector float vf_pixel_NE_second_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_second_range, 0 ); 1918 vector float vf_pixel_NE_third_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_third_range, 0 ); 1919 vector float vf_pixel_NE_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_fourth_range, 0 ); 1920 1921 vector float vf_pixel_SW_first_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_first_range, 0 ); 1922 vector float vf_pixel_SW_second_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_second_range, 0 ); 1923 vector float vf_pixel_SW_third_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_third_range, 0 ); 1924 vector float vf_pixel_SW_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_fourth_range, 0 ); 1925 1926 vector float vf_pixel_SE_first_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_first_range, 0 ); 1927 vector float vf_pixel_SE_second_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_second_range, 0 ); 1928 vector float vf_pixel_SE_third_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_third_range, 0 ); 1929 vector float vf_pixel_SE_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_fourth_range, 0 ); 1930 1931 // first linear interpolation: EWtop 1932 // EWtop = NW + EWweight*(NE-NW) 1933 // 1934 // first range 1935 vector float vf_EWtop_first_range_tmp = spu_sub( vf_pixel_NE_first_range, vf_pixel_NW_first_range ); 1936 vector float vf_EWtop_first_range = spu_madd( vf_EWweight_first_range, 1937 vf_EWtop_first_range_tmp, 1938 vf_pixel_NW_first_range ); 1939 1940 // second range 1941 vector float vf_EWtop_second_range_tmp = spu_sub( vf_pixel_NE_second_range, vf_pixel_NW_second_range ); 1942 vector float vf_EWtop_second_range = spu_madd( vf_EWweight_second_range, 1943 vf_EWtop_second_range_tmp, 1944 vf_pixel_NW_second_range ); 1945 1946 // third range 1947 vector float vf_EWtop_third_range_tmp = spu_sub( vf_pixel_NE_third_range, vf_pixel_NW_third_range ); 1948 vector float vf_EWtop_third_range = spu_madd( vf_EWweight_third_range, 1949 vf_EWtop_third_range_tmp, 1950 vf_pixel_NW_third_range ); 1951 1952 // fourth range 1953 vector float vf_EWtop_fourth_range_tmp = spu_sub( vf_pixel_NE_fourth_range, vf_pixel_NW_fourth_range ); 1954 vector float vf_EWtop_fourth_range = spu_madd( vf_EWweight_fourth_range, 1955 vf_EWtop_fourth_range_tmp, 1956 vf_pixel_NW_fourth_range ); 1957 1958 1959 1960 // second linear interpolation: EWbottom 1961 // EWbottom = SW + EWweight*(SE-SW) 1962 // 1963 // first range 1964 vector float vf_EWbottom_first_range_tmp = spu_sub( vf_pixel_SE_first_range, vf_pixel_SW_first_range ); 1965 vector float vf_EWbottom_first_range = spu_madd( vf_EWweight_first_range, 1966 vf_EWbottom_first_range_tmp, 1967 vf_pixel_SW_first_range ); 1968 1969 // second range 1970 vector float vf_EWbottom_second_range_tmp = spu_sub( vf_pixel_SE_second_range, vf_pixel_SW_second_range ); 1971 vector float vf_EWbottom_second_range = spu_madd( vf_EWweight_second_range, 1972 vf_EWbottom_second_range_tmp, 1973 vf_pixel_SW_second_range ); 1974 // first range 1975 vector float vf_EWbottom_third_range_tmp = spu_sub( vf_pixel_SE_third_range, vf_pixel_SW_third_range ); 1976 vector float vf_EWbottom_third_range = spu_madd( vf_EWweight_third_range, 1977 vf_EWbottom_third_range_tmp, 1978 vf_pixel_SW_third_range ); 1979 1980 // first range 1981 vector float vf_EWbottom_fourth_range_tmp = spu_sub( vf_pixel_SE_fourth_range, vf_pixel_SW_fourth_range ); 1982 vector float vf_EWbottom_fourth_range = spu_madd( vf_EWweight_fourth_range, 1983 vf_EWbottom_fourth_range_tmp, 1984 vf_pixel_SW_fourth_range ); 1985 1986 1987 1988 // third linear interpolation: the bilinear interpolated value 1989 // result = EWtop + NSweight*(EWbottom-EWtop); 1990 // 1991 // first range 1992 vector float vf_result_first_range_tmp = spu_sub( vf_EWbottom_first_range, vf_EWtop_first_range ); 1993 vector float vf_result_first_range = spu_madd( vf_NSweight, 1994 vf_result_first_range_tmp, 1995 vf_EWtop_first_range ); 1996 1997 // second range 1998 vector float vf_result_second_range_tmp = spu_sub( vf_EWbottom_second_range, vf_EWtop_second_range ); 1999 vector float vf_result_second_range = spu_madd( vf_NSweight, 2000 vf_result_second_range_tmp, 2001 vf_EWtop_second_range ); 2002 2003 // third range 2004 vector float vf_result_third_range_tmp = spu_sub( vf_EWbottom_third_range, vf_EWtop_third_range ); 2005 vector float vf_result_third_range = spu_madd( vf_NSweight, 2006 vf_result_third_range_tmp, 2007 vf_EWtop_third_range ); 2008 2009 // fourth range 2010 vector float vf_result_fourth_range_tmp = spu_sub( vf_EWbottom_fourth_range, vf_EWtop_fourth_range ); 2011 vector float vf_result_fourth_range = spu_madd( vf_NSweight, 2012 vf_result_fourth_range_tmp, 2013 vf_EWtop_fourth_range ); 2014 2015 2016 2017 // convert back: using saturated arithmetic 2018 vector unsigned int vui_result_first_range = vfloat_to_vuint( vf_result_first_range ); 2019 vector unsigned int vui_result_second_range = vfloat_to_vuint( vf_result_second_range ); 2020 vector unsigned int vui_result_third_range = vfloat_to_vuint( vf_result_third_range ); 2021 vector unsigned int vui_result_fourth_range = vfloat_to_vuint( vf_result_fourth_range ); 2022 2023 // merge results->lower,upper 2024 vector unsigned char vuc_mask_merge_result_first_second = { 0x03, 0x07, 0x0B, 0x0F, 2025 0x13, 0x17, 0x1B, 0x1F, 2026 0x00, 0x00, 0x00, 0x00, 2027 0x00, 0x00, 0x00, 0x00 }; 2028 2029 vector unsigned char vuc_mask_merge_result_third_fourth = { 0x00, 0x00, 0x00, 0x00, 2030 0x00, 0x00, 0x00, 0x00, 2031 0x03, 0x07, 0x0B, 0x0F, 2032 0x13, 0x17, 0x1B, 0x1F }; 2033 2034 vector unsigned char vuc_result_first_second = 2035 spu_shuffle( (vector unsigned char) vui_result_first_range, 2036 (vector unsigned char) vui_result_second_range, 2037 vuc_mask_merge_result_first_second ); 2038 2039 vector unsigned char vuc_result_third_fourth = 2040 spu_shuffle( (vector unsigned char) vui_result_third_range, 2041 (vector unsigned char) vui_result_fourth_range, 2042 vuc_mask_merge_result_third_fourth ); 2043 2044 // store result 2045 *((vector unsigned char*)dst) = spu_or( vuc_result_first_second, 2046 vuc_result_third_fourth ); 2047 dst += 16; 2048 } 2049 } 2050 2051