1 /* 2 * Copyright (C) 2016 The Android Open Source Project 3 * Copyright (C) 2016 Mopria Alliance, Inc. 4 * Copyright (C) 2013 Hewlett-Packard Development Company, L.P. 5 * 6 * Licensed under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 19 #include "wprint_scaler.h" 20 #include <assert.h> 21 #include <stdio.h> 22 23 #define ROUND_4_DOWN(x) ((x) & ~3) 24 #define ROUND_4_UP(x) (ROUND_4_DOWN((x) + 3)) 25 #define PSCALER_FRACT_BITS_COUNT 24 26 27 typedef enum { 28 FRACTION_ROUND_UP, 29 FRACTION_TRUNCATE 30 } pscaler_fraction_t; 31 32 static uint32 33 _scaler_fraction_part(uint32 iNum, uint32 iDen, pscaler_fraction_t mode, bool_t *overflow); 34 35 static void _hw_scale_image_plane(scaler_config_t *pscaler_config, scaler_mode_t scaleMode); 36 37 static void _calculate_factors(scaler_config_t *pscaler_config, scaler_mode_t scaleMode); 38 39 void scaler_make_image_scaler_tables(uint16 image_input_width, uint16 image_input_buf_width, 40 uint16 image_output_width, uint16 image_output_buf_width, uint16 image_input_height, 41 uint16 image_output_height, scaler_config_t *pscaler_config) { 42 pscaler_config->iSrcWidth = image_input_width; 43 pscaler_config->iSrcHeight = image_input_height; 44 pscaler_config->iOutWidth = image_output_width; 45 pscaler_config->iOutHeight = image_output_height; 46 47 if ((image_input_width >= image_output_width) && 48 (image_input_height >= image_output_height)) { // scale DOWN 49 pscaler_config->scaleMode = PSCALER_SCALE_DOWN; 50 } else if ((image_input_width <= image_output_width) && 51 (image_input_height <= image_output_height)) { // scale UP 52 pscaler_config->scaleMode = PSCALER_SCALE_UP; 53 } else if (image_input_width > image_output_width) { // mixed scale Y-axis first 54 pscaler_config->scaleMode = PSCALER_SCALE_MIXED_YUP; 55 } else { // mixed scale X-axis first 56 pscaler_config->scaleMode = PSCALER_SCALE_MIXED_XUP; 57 } 58 59 // Setup scale factors 60 _calculate_factors(pscaler_config, pscaler_config->scaleMode); 61 62 // calculates initial buffer sizes for scaling whole image 63 // start rows == 0 64 // end_rows == image height 65 // buffer widths == image widths 66 pscaler_config->fSrcStartRow.decimal = 0; 67 pscaler_config->fSrcStartRow.fraction = 0; 68 pscaler_config->iSrcStartRow = 0; 69 pscaler_config->iSrcEndRow = pscaler_config->iSrcHeight; 70 pscaler_config->iSrcBufWidth = image_input_buf_width; 71 pscaler_config->iOutStartRow = 0; 72 pscaler_config->iOutEndRow = pscaler_config->iOutHeight; 73 pscaler_config->iOutBufWidth = image_output_buf_width; 74 pscaler_config->pSrcBuf = NULL; 75 pscaler_config->pOutBuf = NULL; 76 pscaler_config->pTmpBuf = NULL; 77 } 78 79 void scaler_calculate_scaling_rows(uint16 start_output_row_number, uint16 end_output_row_number, 80 void *tables_ptr, uint16 *start_input_row_number, uint16 *end_input_row_number, 81 uint16 *num_output_rows_generated, uint16 *num_rows_offset_to_start_output_row, 82 uint32 *mixed_axis_temp_buffer_size_needed) { 83 float64_t fSrcEndRow; 84 bool_t overflow; 85 scaler_config_t *pscaler_config; 86 87 pscaler_config = (scaler_config_t *) tables_ptr; 88 assert (start_output_row_number < pscaler_config->iOutHeight); 89 90 // copy the output start and end rows 91 // Don't ever attempt to output a single row from the scaler. 92 if (end_output_row_number == start_output_row_number) { 93 if (start_output_row_number == 0) { 94 pscaler_config->iOutStartRow = start_output_row_number; 95 pscaler_config->iOutEndRow = end_output_row_number + 1; 96 *num_rows_offset_to_start_output_row = 0; 97 } else { 98 pscaler_config->iOutStartRow = start_output_row_number - 1; 99 pscaler_config->iOutEndRow = end_output_row_number; 100 *num_rows_offset_to_start_output_row = 1; 101 } 102 } else { 103 pscaler_config->iOutStartRow = start_output_row_number; 104 pscaler_config->iOutEndRow = end_output_row_number; 105 *num_rows_offset_to_start_output_row = 0; 106 } 107 108 if (pscaler_config->iOutEndRow >= pscaler_config->iOutHeight) { // last stripe 109 pscaler_config->iOutEndRow = pscaler_config->iOutHeight - 1; 110 } 111 112 if (pscaler_config->scaleMode == PSCALER_SCALE_UP || 113 pscaler_config->scaleMode == PSCALER_SCALE_MIXED_YUP) { 114 // scale factors are calculated as dim-1/dim-1 115 pscaler_config->iSrcHeight--; 116 pscaler_config->iOutHeight--; 117 } 118 119 pscaler_config->fSrcStartRow.decimal = (uint32) pscaler_config->iOutStartRow * 120 (uint32) pscaler_config->iSrcHeight / (uint32) pscaler_config->iOutHeight; 121 122 pscaler_config->fSrcStartRow.fraction = _scaler_fraction_part( 123 (uint32) pscaler_config->iOutStartRow * (uint32) pscaler_config->iSrcHeight, 124 (uint32) pscaler_config->iOutHeight, FRACTION_ROUND_UP, &overflow); 125 126 if (overflow) { 127 pscaler_config->fSrcStartRow.decimal++; 128 } 129 130 pscaler_config->iSrcStartRow = pscaler_config->fSrcStartRow.decimal; 131 132 if (pscaler_config->scaleMode == PSCALER_SCALE_UP || 133 pscaler_config->scaleMode == PSCALER_SCALE_MIXED_YUP) { 134 fSrcEndRow.decimal = (uint32) pscaler_config->iOutEndRow * 135 (uint32) pscaler_config->iSrcHeight / (uint32) pscaler_config->iOutHeight; 136 fSrcEndRow.fraction = _scaler_fraction_part( 137 (uint32) pscaler_config->iOutEndRow * (uint32) pscaler_config->iSrcHeight, 138 (uint32) pscaler_config->iOutHeight, FRACTION_TRUNCATE, &overflow); 139 140 pscaler_config->iSrcEndRow = (uint16) fSrcEndRow.decimal; 141 142 if (0 != fSrcEndRow.fraction) { 143 // will cause an extra output row to be created... 144 pscaler_config->iSrcEndRow++; 145 pscaler_config->iOutEndRow++; 146 } 147 148 // restore dimensions 149 pscaler_config->iSrcHeight++; 150 pscaler_config->iOutHeight++; 151 } else { 152 fSrcEndRow.decimal = (uint32) (pscaler_config->iOutEndRow + 1) * 153 (uint32) pscaler_config->iSrcHeight / 154 (uint32) pscaler_config->iOutHeight; 155 156 fSrcEndRow.fraction = _scaler_fraction_part( 157 (uint32) (pscaler_config->iOutEndRow + 1) * (uint32) pscaler_config->iSrcHeight, 158 (uint32) pscaler_config->iOutHeight, FRACTION_TRUNCATE, &overflow); 159 160 pscaler_config->iSrcEndRow = (uint16) fSrcEndRow.decimal; 161 162 if (0 == fSrcEndRow.fraction) { 163 pscaler_config->iSrcEndRow--; 164 } 165 } 166 167 // check to be sure we're not going beyond the source image 168 if (pscaler_config->iSrcEndRow >= pscaler_config->iSrcHeight) { // last stripe 169 pscaler_config->iSrcEndRow = pscaler_config->iSrcHeight - 1; 170 } 171 172 *start_input_row_number = pscaler_config->iSrcStartRow; 173 *end_input_row_number = pscaler_config->iSrcEndRow; 174 *num_output_rows_generated = (pscaler_config->iOutEndRow - pscaler_config->iOutStartRow + 1); 175 176 // Calculate the 2nd pass buffer size if mixed scaling is done 177 if (pscaler_config->scaleMode == PSCALER_SCALE_MIXED_XUP) { 178 *mixed_axis_temp_buffer_size_needed = 179 ROUND_4_UP(pscaler_config->iOutWidth + 1) * 180 (*end_input_row_number - *start_input_row_number + 1); 181 } else if (pscaler_config->scaleMode == PSCALER_SCALE_MIXED_YUP) { 182 *mixed_axis_temp_buffer_size_needed = 183 ROUND_4_UP(pscaler_config->iSrcWidth) * (*num_output_rows_generated + 1); 184 } else { 185 *mixed_axis_temp_buffer_size_needed = 0; 186 } 187 188 (*num_output_rows_generated)++; 189 } 190 191 void scaler_scale_image_data(uint8 *input_plane, void *tables_ptr, uint8 *scaled_output_plane, 192 uint8 *temp_buffer_for_mixed_axis_scaling) { 193 uint16 iOrigWidth, iOrigHeight, iOrigOutBufWidth, iOrigSrcBufWidth; 194 uint16 iOrigOutStartRow, iOrigOutEndRow, iOrigSrcStartRow, iOrigSrcEndRow; 195 float64_t fOrigSrcStartRow; 196 uint8 *pOrigBuf; 197 scaler_config_t *pscaler_config; 198 199 pscaler_config = (scaler_config_t *) tables_ptr; 200 pscaler_config->pSrcBuf = input_plane; 201 pscaler_config->pOutBuf = scaled_output_plane; 202 203 if ((PSCALER_SCALE_MIXED_XUP == pscaler_config->scaleMode) || 204 (PSCALER_SCALE_MIXED_YUP == pscaler_config->scaleMode)) { 205 pscaler_config->pTmpBuf = temp_buffer_for_mixed_axis_scaling; 206 207 // save the output buffer 208 pOrigBuf = pscaler_config->pOutBuf; 209 210 // use the temp buff as the output buff for pass 1 211 pscaler_config->pOutBuf = pscaler_config->pTmpBuf; 212 213 if (PSCALER_SCALE_MIXED_YUP == pscaler_config->scaleMode) { 214 // save the original output widths 215 iOrigWidth = pscaler_config->iOutWidth; 216 iOrigOutBufWidth = pscaler_config->iOutBufWidth; 217 218 // set output widths to input widths (1::1) 219 pscaler_config->iOutWidth = pscaler_config->iSrcWidth; 220 pscaler_config->iOutBufWidth = pscaler_config->iSrcBufWidth; 221 222 // calculate the new scaler factors 223 _calculate_factors(pscaler_config, PSCALER_SCALE_UP); 224 225 // Run the photo scaler hardware 226 _hw_scale_image_plane(pscaler_config, PSCALER_SCALE_UP); 227 228 // reset the output widths 229 pscaler_config->iOutWidth = iOrigWidth; 230 pscaler_config->iOutBufWidth = iOrigOutBufWidth; 231 } else { 232 // save the original output height and row info 233 iOrigHeight = pscaler_config->iOutHeight; 234 iOrigOutStartRow = pscaler_config->iOutStartRow; 235 iOrigOutEndRow = pscaler_config->iOutEndRow; 236 fOrigSrcStartRow.fraction = pscaler_config->fSrcStartRow.fraction; 237 238 // set output height and rows to input height and rows(1::1) 239 pscaler_config->iOutHeight = pscaler_config->iSrcHeight; 240 pscaler_config->iOutStartRow = pscaler_config->iSrcStartRow; 241 pscaler_config->iOutEndRow = pscaler_config->iSrcEndRow; 242 pscaler_config->fSrcStartRow.fraction = 0; 243 244 // calculate the new scaler factors 245 _calculate_factors(pscaler_config, PSCALER_SCALE_UP); 246 247 // Run the photo scaler hardware 248 _hw_scale_image_plane(pscaler_config, PSCALER_SCALE_UP); 249 250 // reset the output height and rows 251 pscaler_config->iOutHeight = iOrigHeight; 252 pscaler_config->iOutStartRow = iOrigOutStartRow; 253 pscaler_config->iOutEndRow = iOrigOutEndRow; 254 pscaler_config->fSrcStartRow.fraction = fOrigSrcStartRow.fraction; 255 } 256 // restore the original output buffer 257 pscaler_config->pOutBuf = pOrigBuf; 258 259 // save the original input buffer 260 pOrigBuf = pscaler_config->pSrcBuf; 261 262 // use the previous output (temp) buffer as the new input buffer 263 pscaler_config->pSrcBuf = pscaler_config->pTmpBuf; 264 265 if (PSCALER_SCALE_MIXED_YUP == pscaler_config->scaleMode) { 266 // save the original input height and rows 267 iOrigHeight = pscaler_config->iSrcHeight; 268 iOrigSrcStartRow = pscaler_config->iSrcStartRow; 269 iOrigSrcEndRow = pscaler_config->iSrcEndRow; 270 fOrigSrcStartRow.decimal = pscaler_config->fSrcStartRow.decimal; 271 fOrigSrcStartRow.fraction = pscaler_config->fSrcStartRow.fraction; 272 273 // set the height and rows to 1::1 for the second pass 274 pscaler_config->iSrcHeight = pscaler_config->iOutHeight; 275 pscaler_config->iSrcStartRow = pscaler_config->iOutStartRow; 276 pscaler_config->iSrcEndRow = pscaler_config->iOutEndRow; 277 pscaler_config->fSrcStartRow.decimal = pscaler_config->iOutStartRow; 278 pscaler_config->fSrcStartRow.fraction = 0; 279 280 // calculate new scale factors 281 _calculate_factors(pscaler_config, PSCALER_SCALE_DOWN); 282 283 // Run the photo scaler hardware 284 _hw_scale_image_plane(pscaler_config, PSCALER_SCALE_DOWN); 285 286 // restore original input height and rows 287 pscaler_config->iSrcHeight = iOrigHeight; 288 pscaler_config->iSrcStartRow = iOrigSrcStartRow; 289 pscaler_config->iSrcEndRow = iOrigSrcEndRow; 290 pscaler_config->fSrcStartRow.decimal = fOrigSrcStartRow.decimal; 291 pscaler_config->fSrcStartRow.fraction = fOrigSrcStartRow.fraction; 292 } else { 293 // save the original input widths 294 iOrigWidth = pscaler_config->iSrcWidth; 295 iOrigSrcBufWidth = pscaler_config->iSrcBufWidth; 296 297 // set the widths to 1::1 for the second pass 298 pscaler_config->iSrcWidth = pscaler_config->iOutWidth; 299 pscaler_config->iSrcBufWidth = pscaler_config->iOutBufWidth; 300 301 // calculate new scale factors 302 _calculate_factors(pscaler_config, PSCALER_SCALE_DOWN); 303 304 // Run the photo scaler hardware 305 _hw_scale_image_plane(pscaler_config, PSCALER_SCALE_DOWN); 306 307 // restore original input widths 308 pscaler_config->iSrcWidth = iOrigWidth; 309 pscaler_config->iSrcBufWidth = iOrigSrcBufWidth; 310 } 311 312 // restore the input buffer 313 pscaler_config->pTmpBuf = pscaler_config->pSrcBuf; 314 pscaler_config->pSrcBuf = pOrigBuf; 315 316 // release the temp buffer 317 pscaler_config->pTmpBuf = NULL; 318 } else { 319 // Run the photo scaler hardware 320 _hw_scale_image_plane(pscaler_config, pscaler_config->scaleMode); 321 } 322 } 323 324 static void _calculate_factors(scaler_config_t *pscaler_config, scaler_mode_t scaleMode) { 325 bool_t overflow; 326 if ((pscaler_config->scaleMode == PSCALER_SCALE_UP) || 327 (pscaler_config->scaleMode == PSCALER_SCALE_MIXED_YUP)) { 328 // scale up factors are computed as (dim-1)/(dim-1) 329 pscaler_config->iSrcHeight--; 330 pscaler_config->iOutHeight--; 331 } 332 if ((pscaler_config->scaleMode == PSCALER_SCALE_UP) || 333 (pscaler_config->scaleMode == PSCALER_SCALE_MIXED_XUP)) { 334 pscaler_config->iSrcWidth--; 335 pscaler_config->iOutWidth--; 336 } 337 338 pscaler_config->fXfactor.decimal = (uint32) pscaler_config->iOutWidth / 339 (uint32) pscaler_config->iSrcWidth; 340 pscaler_config->fXfactor.fraction = _scaler_fraction_part( 341 (uint32) pscaler_config->iOutWidth, 342 (uint32) pscaler_config->iSrcWidth, 343 FRACTION_TRUNCATE, 344 &overflow); 345 346 pscaler_config->fXfactorInv.decimal = (uint32) pscaler_config->iSrcWidth / 347 (uint32) pscaler_config->iOutWidth; 348 pscaler_config->fXfactorInv.fraction = _scaler_fraction_part( 349 (uint32) pscaler_config->iSrcWidth, (uint32) pscaler_config->iOutWidth, 350 FRACTION_ROUND_UP, &overflow); 351 352 if (overflow) { 353 pscaler_config->fXfactorInv.decimal++; 354 } 355 356 pscaler_config->fYfactor.decimal = (uint32) pscaler_config->iOutHeight / 357 (uint32) pscaler_config->iSrcHeight; 358 pscaler_config->fYfactor.fraction = _scaler_fraction_part( 359 (uint32) pscaler_config->iOutHeight, (uint32) pscaler_config->iSrcHeight, 360 FRACTION_TRUNCATE, &overflow); 361 362 pscaler_config->fYfactorInv.decimal = (uint32) pscaler_config->iSrcHeight / 363 (uint32) pscaler_config->iOutHeight; 364 pscaler_config->fYfactorInv.fraction = _scaler_fraction_part( 365 (uint32) pscaler_config->iSrcHeight, (uint32) pscaler_config->iOutHeight, 366 FRACTION_ROUND_UP, &overflow); 367 368 if (overflow) { 369 pscaler_config->fYfactorInv.decimal++; 370 } 371 372 if ((pscaler_config->scaleMode == PSCALER_SCALE_UP) || 373 (pscaler_config->scaleMode == PSCALER_SCALE_MIXED_YUP)) { 374 // restore original dimensions 375 pscaler_config->iSrcHeight++; 376 pscaler_config->iOutHeight++; 377 } 378 if ((pscaler_config->scaleMode == PSCALER_SCALE_UP) || 379 (pscaler_config->scaleMode == PSCALER_SCALE_MIXED_XUP)) { 380 pscaler_config->iSrcWidth++; 381 pscaler_config->iOutWidth++; 382 } 383 } 384 385 static uint32 _scaler_fraction_part(uint32 iNum, uint32 iDen, pscaler_fraction_t mode, 386 bool_t *overflow) { 387 uint32 iFract; // fractional part 388 uint32 iRem; // remainder part 389 int i; // loop counter 390 391 *overflow = 0; 392 iFract = 0; 393 iRem = iNum % iDen; 394 395 if (iRem == 0) { 396 return (0); 397 } 398 399 for (i = PSCALER_FRACT_BITS_COUNT - 1; i >= 0; i--) { 400 iRem <<= 1; 401 402 if (iRem == iDen) { 403 iFract |= (1 << i); 404 break; 405 } else if (iRem > iDen) { 406 iFract |= (1 << i); 407 iRem -= iDen; 408 } 409 } 410 411 if (mode == FRACTION_TRUNCATE) { 412 return (iFract << 8); 413 } else { 414 if (iRem == 0) { 415 return (iFract << 8); 416 } else { 417 if (iFract < 0x00ffffff) { 418 iFract++; 419 return (iFract << 8); 420 } else { 421 *overflow = 1; 422 return (0); 423 } 424 } 425 } 426 } 427 428 #define _RESTRICT_ __restrict__ 429 430 static inline void _scale_row_down_9in(uint8 *_RESTRICT_ in0, uint8 *_RESTRICT_ in1, 431 uint8 *_RESTRICT_ in2, uint8 *_RESTRICT_ in3, uint8 *_RESTRICT_ in4, uint8 *_RESTRICT_ in5, 432 uint8 *_RESTRICT_ in6, uint8 *_RESTRICT_ in7, uint8 *_RESTRICT_ in8, uint8 *_RESTRICT_ out, 433 uint64 position_x, uint64 x_factor_inv, uint32 top_weight, uint32 bot_weight, 434 uint32 weight_reciprocal, int out_width) { 435 int x; 436 uint32 in_col; 437 sint32 total_weight; 438 439 for (x = 0; x < out_width; x++) { 440 uint32 acc_r = 0; 441 uint32 acc_g = 0; 442 uint32 acc_b = 0; 443 uint32 curr_weight = 256 - ((position_x >> 24) & 0xff); 444 total_weight = x_factor_inv >> 24; 445 446 in_col = position_x >> 32; 447 448 while (total_weight > 0) { 449 acc_r += (uint32) in0[(in_col * 3) + 0] * curr_weight * top_weight; 450 acc_r += (uint32) in1[(in_col * 3) + 0] * curr_weight << 8; 451 acc_r += (uint32) in2[(in_col * 3) + 0] * curr_weight << 8; 452 acc_r += (uint32) in3[(in_col * 3) + 0] * curr_weight << 8; 453 acc_r += (uint32) in4[(in_col * 3) + 0] * curr_weight << 8; 454 acc_r += (uint32) in5[(in_col * 3) + 0] * curr_weight << 8; 455 acc_r += (uint32) in6[(in_col * 3) + 0] * curr_weight << 8; 456 acc_r += (uint32) in7[(in_col * 3) + 0] * curr_weight << 8; 457 acc_r += (uint32) in8[(in_col * 3) + 0] * curr_weight * bot_weight; 458 459 acc_g += (uint32) in0[(in_col * 3) + 1] * curr_weight * top_weight; 460 acc_g += (uint32) in1[(in_col * 3) + 1] * curr_weight << 8; 461 acc_g += (uint32) in2[(in_col * 3) + 1] * curr_weight << 8; 462 acc_g += (uint32) in3[(in_col * 3) + 1] * curr_weight << 8; 463 acc_g += (uint32) in4[(in_col * 3) + 1] * curr_weight << 8; 464 acc_g += (uint32) in5[(in_col * 3) + 1] * curr_weight << 8; 465 acc_g += (uint32) in6[(in_col * 3) + 1] * curr_weight << 8; 466 acc_g += (uint32) in7[(in_col * 3) + 1] * curr_weight << 8; 467 acc_g += (uint32) in8[(in_col * 3) + 1] * curr_weight * bot_weight; 468 469 acc_b += (uint32) in0[(in_col * 3) + 2] * curr_weight * top_weight; 470 acc_b += (uint32) in1[(in_col * 3) + 2] * curr_weight << 8; 471 acc_b += (uint32) in2[(in_col * 3) + 2] * curr_weight << 8; 472 acc_b += (uint32) in3[(in_col * 3) + 2] * curr_weight << 8; 473 acc_b += (uint32) in4[(in_col * 3) + 2] * curr_weight << 8; 474 acc_b += (uint32) in5[(in_col * 3) + 2] * curr_weight << 8; 475 acc_b += (uint32) in6[(in_col * 3) + 2] * curr_weight << 8; 476 acc_b += (uint32) in7[(in_col * 3) + 2] * curr_weight << 8; 477 acc_b += (uint32) in8[(in_col * 3) + 2] * curr_weight * bot_weight; 478 479 in_col++; 480 481 total_weight -= curr_weight; 482 curr_weight = total_weight > 256 ? 256 : total_weight; 483 } 484 485 position_x += x_factor_inv; 486 487 out[(x * 3) + 0] = ((uint64) acc_r * weight_reciprocal + ((uint64) 1 << 31)) >> 32; 488 out[(x * 3) + 0] = ((uint64) acc_g * weight_reciprocal + ((uint64) 1 << 31)) >> 32; 489 out[(x * 3) + 0] = ((uint64) acc_b * weight_reciprocal + ((uint64) 1 << 31)) >> 32; 490 } 491 } 492 493 static inline void _scale_row_down_8in(uint8 *_RESTRICT_ in0, uint8 *_RESTRICT_ in1, 494 uint8 *_RESTRICT_ in2, uint8 *_RESTRICT_ in3, uint8 *_RESTRICT_ in4, uint8 *_RESTRICT_ in5, 495 uint8 *_RESTRICT_ in6, uint8 *_RESTRICT_ in7, uint8 *_RESTRICT_ out, uint64 position_x, 496 uint64 x_factor_inv, uint32 top_weight, 497 uint32 bot_weight, uint32 weight_reciprocal, 498 int out_width) { 499 int x; 500 uint32 in_col; 501 sint32 total_weight; 502 503 for (x = 0; x < out_width; x++) { 504 uint32 acc_r = 0; 505 uint32 acc_g = 0; 506 uint32 acc_b = 0; 507 uint32 curr_weight = 256 - ((position_x >> 24) & 0xff); 508 total_weight = x_factor_inv >> 24; 509 510 in_col = position_x >> 32; 511 512 while (total_weight > 0) { 513 acc_r += (uint32) in0[(in_col * 3) + 0] * curr_weight * top_weight; 514 acc_r += (uint32) in1[(in_col * 3) + 0] * curr_weight << 8; 515 acc_r += (uint32) in2[(in_col * 3) + 0] * curr_weight << 8; 516 acc_r += (uint32) in3[(in_col * 3) + 0] * curr_weight << 8; 517 acc_r += (uint32) in4[(in_col * 3) + 0] * curr_weight << 8; 518 acc_r += (uint32) in5[(in_col * 3) + 0] * curr_weight << 8; 519 acc_r += (uint32) in6[(in_col * 3) + 0] * curr_weight << 8; 520 acc_r += (uint32) in7[(in_col * 3) + 0] * curr_weight * bot_weight; 521 522 acc_g += (uint32) in0[(in_col * 3) + 1] * curr_weight * top_weight; 523 acc_g += (uint32) in1[(in_col * 3) + 1] * curr_weight << 8; 524 acc_g += (uint32) in2[(in_col * 3) + 1] * curr_weight << 8; 525 acc_g += (uint32) in3[(in_col * 3) + 1] * curr_weight << 8; 526 acc_g += (uint32) in4[(in_col * 3) + 1] * curr_weight << 8; 527 acc_g += (uint32) in5[(in_col * 3) + 1] * curr_weight << 8; 528 acc_g += (uint32) in6[(in_col * 3) + 1] * curr_weight << 8; 529 acc_g += (uint32) in7[(in_col * 3) + 1] * curr_weight * bot_weight; 530 531 acc_b += (uint32) in0[(in_col * 3) + 2] * curr_weight * top_weight; 532 acc_b += (uint32) in1[(in_col * 3) + 2] * curr_weight << 8; 533 acc_b += (uint32) in2[(in_col * 3) + 2] * curr_weight << 8; 534 acc_b += (uint32) in3[(in_col * 3) + 2] * curr_weight << 8; 535 acc_b += (uint32) in4[(in_col * 3) + 2] * curr_weight << 8; 536 acc_b += (uint32) in5[(in_col * 3) + 2] * curr_weight << 8; 537 acc_b += (uint32) in6[(in_col * 3) + 2] * curr_weight << 8; 538 acc_b += (uint32) in7[(in_col * 3) + 2] * curr_weight * bot_weight; 539 540 in_col++; 541 542 total_weight -= curr_weight; 543 curr_weight = total_weight > 256 ? 256 : total_weight; 544 } 545 546 position_x += x_factor_inv; 547 548 out[(x * 3) + 0] = ((uint64) acc_r * weight_reciprocal + ((uint64) 1 << 31)) >> 32; 549 out[(x * 3) + 1] = ((uint64) acc_g * weight_reciprocal + ((uint64) 1 << 31)) >> 32; 550 out[(x * 3) + 2] = ((uint64) acc_b * weight_reciprocal + ((uint64) 1 << 31)) >> 32; 551 } 552 } 553 554 static inline void _scale_row_down_7in(uint8 *_RESTRICT_ in0, uint8 *_RESTRICT_ in1, 555 uint8 *_RESTRICT_ in2, uint8 *_RESTRICT_ in3, uint8 *_RESTRICT_ in4, uint8 *_RESTRICT_ in5, 556 uint8 *_RESTRICT_ in6, uint8 *_RESTRICT_ out, uint64 position_x, uint64 x_factor_inv, 557 uint32 top_weight, uint32 bot_weight, uint32 weight_reciprocal, int out_width) { 558 int x; 559 uint32 in_col; 560 sint32 total_weight; 561 562 for (x = 0; x < out_width; x++) { 563 uint32 acc_r = 0; 564 uint32 acc_g = 0; 565 uint32 acc_b = 0; 566 uint32 curr_weight = 256 - ((position_x >> 24) & 0xff); 567 total_weight = x_factor_inv >> 24; 568 569 in_col = position_x >> 32; 570 571 while (total_weight > 0) { 572 acc_r += (uint32) in0[(in_col * 3) + 0] * curr_weight * top_weight; 573 acc_r += (uint32) in1[(in_col * 3) + 0] * curr_weight << 8; 574 acc_r += (uint32) in2[(in_col * 3) + 0] * curr_weight << 8; 575 acc_r += (uint32) in3[(in_col * 3) + 0] * curr_weight << 8; 576 acc_r += (uint32) in4[(in_col * 3) + 0] * curr_weight << 8; 577 acc_r += (uint32) in5[(in_col * 3) + 0] * curr_weight << 8; 578 acc_r += (uint32) in6[(in_col * 3) + 0] * curr_weight * bot_weight; 579 580 acc_g += (uint32) in0[(in_col * 3) + 1] * curr_weight * top_weight; 581 acc_g += (uint32) in1[(in_col * 3) + 1] * curr_weight << 8; 582 acc_g += (uint32) in2[(in_col * 3) + 1] * curr_weight << 8; 583 acc_g += (uint32) in3[(in_col * 3) + 1] * curr_weight << 8; 584 acc_g += (uint32) in4[(in_col * 3) + 1] * curr_weight << 8; 585 acc_g += (uint32) in5[(in_col * 3) + 1] * curr_weight << 8; 586 acc_g += (uint32) in6[(in_col * 3) + 1] * curr_weight * bot_weight; 587 588 acc_b += (uint32) in0[(in_col * 3) + 2] * curr_weight * top_weight; 589 acc_b += (uint32) in1[(in_col * 3) + 2] * curr_weight << 8; 590 acc_b += (uint32) in2[(in_col * 3) + 2] * curr_weight << 8; 591 acc_b += (uint32) in3[(in_col * 3) + 2] * curr_weight << 8; 592 acc_b += (uint32) in4[(in_col * 3) + 2] * curr_weight << 8; 593 acc_b += (uint32) in5[(in_col * 3) + 2] * curr_weight << 8; 594 acc_b += (uint32) in6[(in_col * 3) + 2] * curr_weight * bot_weight; 595 596 in_col++; 597 598 total_weight -= curr_weight; 599 curr_weight = total_weight > 256 ? 256 : total_weight; 600 } 601 602 position_x += x_factor_inv; 603 604 out[(x * 3) + 0] = ((uint64) acc_r * weight_reciprocal + ((uint64) 1 << 31)) >> 32; 605 out[(x * 3) + 1] = ((uint64) acc_g * weight_reciprocal + ((uint64) 1 << 31)) >> 32; 606 out[(x * 3) + 2] = ((uint64) acc_b * weight_reciprocal + ((uint64) 1 << 31)) >> 32; 607 } 608 } 609 610 static inline void _scale_row_down_6in(uint8 *_RESTRICT_ in0, uint8 *_RESTRICT_ in1, 611 uint8 *_RESTRICT_ in2, uint8 *_RESTRICT_ in3, uint8 *_RESTRICT_ in4, uint8 *_RESTRICT_ in5, 612 uint8 *_RESTRICT_ out, uint64 position_x, uint64 x_factor_inv, uint32 top_weight, 613 uint32 bot_weight, uint32 weight_reciprocal, int out_width) { 614 int x; 615 uint32 in_col; 616 sint32 total_weight; 617 618 for (x = 0; x < out_width; x++) { 619 uint32 acc_r = 0; 620 uint32 acc_g = 0; 621 uint32 acc_b = 0; 622 uint32 curr_weight = 256 - ((position_x >> 24) & 0xff); 623 total_weight = x_factor_inv >> 24; 624 625 in_col = position_x >> 32; 626 627 while (total_weight > 0) { 628 acc_r += (uint32) in0[(in_col * 3) + 0] * curr_weight * top_weight; 629 acc_r += (uint32) in1[(in_col * 3) + 0] * curr_weight << 8; 630 acc_r += (uint32) in2[(in_col * 3) + 0] * curr_weight << 8; 631 acc_r += (uint32) in3[(in_col * 3) + 0] * curr_weight << 8; 632 acc_r += (uint32) in4[(in_col * 3) + 0] * curr_weight << 8; 633 acc_r += (uint32) in5[(in_col * 3) + 0] * curr_weight * bot_weight; 634 635 acc_g += (uint32) in0[(in_col * 3) + 1] * curr_weight * top_weight; 636 acc_g += (uint32) in1[(in_col * 3) + 1] * curr_weight << 8; 637 acc_g += (uint32) in2[(in_col * 3) + 1] * curr_weight << 8; 638 acc_g += (uint32) in3[(in_col * 3) + 1] * curr_weight << 8; 639 acc_g += (uint32) in4[(in_col * 3) + 1] * curr_weight << 8; 640 acc_g += (uint32) in5[(in_col * 3) + 1] * curr_weight * bot_weight; 641 642 acc_b += (uint32) in0[(in_col * 3) + 2] * curr_weight * top_weight; 643 acc_b += (uint32) in1[(in_col * 3) + 2] * curr_weight << 8; 644 acc_b += (uint32) in2[(in_col * 3) + 2] * curr_weight << 8; 645 acc_b += (uint32) in3[(in_col * 3) + 2] * curr_weight << 8; 646 acc_b += (uint32) in4[(in_col * 3) + 2] * curr_weight << 8; 647 acc_b += (uint32) in5[(in_col * 3) + 2] * curr_weight * bot_weight; 648 649 in_col++; 650 651 total_weight -= curr_weight; 652 curr_weight = total_weight > 256 ? 256 : total_weight; 653 } 654 655 position_x += x_factor_inv; 656 657 out[(x * 3) + 0] = ((uint64) acc_r * weight_reciprocal + ((uint64) 1 << 31)) >> 32; 658 out[(x * 3) + 1] = ((uint64) acc_g * weight_reciprocal + ((uint64) 1 << 31)) >> 32; 659 out[(x * 3) + 2] = ((uint64) acc_b * weight_reciprocal + ((uint64) 1 << 31)) >> 32; 660 } 661 } 662 663 static inline void _scale_row_down_5in(uint8 *_RESTRICT_ in0, uint8 *_RESTRICT_ in1, 664 uint8 *_RESTRICT_ in2, uint8 *_RESTRICT_ in3, uint8 *_RESTRICT_ in4, uint8 *_RESTRICT_ out, 665 uint64 position_x, uint64 x_factor_inv, uint32 top_weight, uint32 bot_weight, 666 uint32 weight_reciprocal, int out_width) { 667 int x; 668 uint32 in_col; 669 sint32 total_weight; 670 671 for (x = 0; x < out_width; x++) { 672 uint32 acc_r = 0; 673 uint32 acc_g = 0; 674 uint32 acc_b = 0; 675 uint32 curr_weight = 256 - ((position_x >> 24) & 0xff); 676 total_weight = x_factor_inv >> 24; 677 678 in_col = position_x >> 32; 679 680 while (total_weight > 0) { 681 acc_r += (uint32) in0[(in_col * 3) + 0] * curr_weight * top_weight; 682 acc_r += (uint32) in1[(in_col * 3) + 0] * curr_weight << 8; 683 acc_r += (uint32) in2[(in_col * 3) + 0] * curr_weight << 8; 684 acc_r += (uint32) in3[(in_col * 3) + 0] * curr_weight << 8; 685 acc_r += (uint32) in4[(in_col * 3) + 0] * curr_weight * bot_weight; 686 687 acc_g += (uint32) in0[(in_col * 3) + 1] * curr_weight * top_weight; 688 acc_g += (uint32) in1[(in_col * 3) + 1] * curr_weight << 8; 689 acc_g += (uint32) in2[(in_col * 3) + 1] * curr_weight << 8; 690 acc_g += (uint32) in3[(in_col * 3) + 1] * curr_weight << 8; 691 acc_g += (uint32) in4[(in_col * 3) + 1] * curr_weight * bot_weight; 692 693 acc_b += (uint32) in0[(in_col * 3) + 2] * curr_weight * top_weight; 694 acc_b += (uint32) in1[(in_col * 3) + 2] * curr_weight << 8; 695 acc_b += (uint32) in2[(in_col * 3) + 2] * curr_weight << 8; 696 acc_b += (uint32) in3[(in_col * 3) + 2] * curr_weight << 8; 697 acc_b += (uint32) in4[(in_col * 3) + 2] * curr_weight * bot_weight; 698 699 in_col++; 700 701 total_weight -= curr_weight; 702 curr_weight = total_weight > 256 ? 256 : total_weight; 703 } 704 705 position_x += x_factor_inv; 706 707 out[(x * 3) + 0] = ((uint64) acc_r * weight_reciprocal + ((uint64) 1 << 31)) >> 32; 708 out[(x * 3) + 1] = ((uint64) acc_g * weight_reciprocal + ((uint64) 1 << 31)) >> 32; 709 out[(x * 3) + 2] = ((uint64) acc_b * weight_reciprocal + ((uint64) 1 << 31)) >> 32; 710 } 711 } 712 713 static inline void _scale_row_down_4in(uint8 *_RESTRICT_ in0, uint8 *_RESTRICT_ in1, 714 uint8 *_RESTRICT_ in2, uint8 *_RESTRICT_ in3, uint8 *_RESTRICT_ out, uint64 position_x, 715 uint64 x_factor_inv, uint32 top_weight, uint32 bot_weight, uint32 weight_reciprocal, 716 int out_width) { 717 int x; 718 uint32 in_col; 719 sint32 total_weight; 720 721 for (x = 0; x < out_width; x++) { 722 uint32 acc_r = 0; 723 uint32 acc_g = 0; 724 uint32 acc_b = 0; 725 uint32 curr_weight = 256 - ((position_x >> 24) & 0xff); 726 total_weight = x_factor_inv >> 24; 727 728 in_col = position_x >> 32; 729 730 while (total_weight > 0) { 731 acc_r += (uint32) in0[(in_col * 3) + 0] * curr_weight * top_weight; 732 acc_r += (uint32) in1[(in_col * 3) + 0] * curr_weight << 8; 733 acc_r += (uint32) in2[(in_col * 3) + 0] * curr_weight << 8; 734 acc_r += (uint32) in3[(in_col * 3) + 0] * curr_weight * bot_weight; 735 736 acc_g += (uint32) in0[(in_col * 3) + 1] * curr_weight * top_weight; 737 acc_g += (uint32) in1[(in_col * 3) + 1] * curr_weight << 8; 738 acc_g += (uint32) in2[(in_col * 3) + 1] * curr_weight << 8; 739 acc_g += (uint32) in3[(in_col * 3) + 1] * curr_weight * bot_weight; 740 741 acc_b += (uint32) in0[(in_col * 3) + 2] * curr_weight * top_weight; 742 acc_b += (uint32) in1[(in_col * 3) + 2] * curr_weight << 8; 743 acc_b += (uint32) in2[(in_col * 3) + 2] * curr_weight << 8; 744 acc_b += (uint32) in3[(in_col * 3) + 2] * curr_weight * bot_weight; 745 746 in_col++; 747 748 total_weight -= curr_weight; 749 curr_weight = total_weight > 256 ? 256 : total_weight; 750 } 751 752 position_x += x_factor_inv; 753 754 out[(x * 3) + 0] = ((uint64) acc_r * weight_reciprocal + ((uint64) 1 << 31)) >> 32; 755 out[(x * 3) + 1] = ((uint64) acc_g * weight_reciprocal + ((uint64) 1 << 31)) >> 32; 756 out[(x * 3) + 2] = ((uint64) acc_b * weight_reciprocal + ((uint64) 1 << 31)) >> 32; 757 } 758 } 759 760 static inline void _scale_row_down_3in(uint8 *_RESTRICT_ in0, uint8 *_RESTRICT_ in1, 761 uint8 *_RESTRICT_ in2, uint8 *_RESTRICT_ out, uint64 position_x, uint64 x_factor_inv, 762 uint32 top_weight, uint32 bot_weight, uint32 weight_reciprocal, int out_width) { 763 int x; 764 uint32 in_col; 765 sint32 total_weight; 766 767 for (x = 0; x < out_width; x++) { 768 uint32 acc_r = 0; 769 uint32 acc_g = 0; 770 uint32 acc_b = 0; 771 uint32 curr_weight = 256 - ((position_x >> 24) & 0xff); 772 total_weight = x_factor_inv >> 24; 773 774 in_col = position_x >> 32; 775 776 while (total_weight > 0) { 777 acc_r += (uint32) in0[(in_col * 3) + 0] * curr_weight * top_weight; 778 acc_r += (uint32) in1[(in_col * 3) + 0] * curr_weight << 8; 779 acc_r += (uint32) in2[(in_col * 3) + 0] * curr_weight * bot_weight; 780 781 acc_g += (uint32) in0[(in_col * 3) + 1] * curr_weight * top_weight; 782 acc_g += (uint32) in1[(in_col * 3) + 1] * curr_weight << 8; 783 acc_g += (uint32) in2[(in_col * 3) + 1] * curr_weight * bot_weight; 784 785 acc_b += (uint32) in0[(in_col * 3) + 2] * curr_weight * top_weight; 786 acc_b += (uint32) in1[(in_col * 3) + 2] * curr_weight << 8; 787 acc_b += (uint32) in2[(in_col * 3) + 2] * curr_weight * bot_weight; 788 789 in_col++; 790 791 total_weight -= curr_weight; 792 curr_weight = total_weight > 256 ? 256 : total_weight; 793 } 794 795 position_x += x_factor_inv; 796 797 out[(x * 3) + 0] = ((uint64) acc_r * weight_reciprocal + ((uint64) 1 << 31)) >> 32; 798 out[(x * 3) + 1] = ((uint64) acc_g * weight_reciprocal + ((uint64) 1 << 31)) >> 32; 799 out[(x * 3) + 2] = ((uint64) acc_b * weight_reciprocal + ((uint64) 1 << 31)) >> 32; 800 } 801 } 802 803 static inline void _scale_row_down_2in(uint8 *_RESTRICT_ in0, uint8 *_RESTRICT_ in1, 804 uint8 *_RESTRICT_ out, uint64 position_x, uint64 x_factor_inv, uint32 top_weight, 805 uint32 bot_weight, uint32 weight_reciprocal, int out_width) { 806 int x; 807 uint32 in_col; 808 sint32 total_weight; 809 810 for (x = 0; x < out_width; x++) { 811 uint32 acc_r = 0; 812 uint32 acc_g = 0; 813 uint32 acc_b = 0; 814 uint32 curr_weight = 256 - ((position_x >> 24) & 0xff); 815 total_weight = x_factor_inv >> 24; 816 817 in_col = position_x >> 32; 818 819 while (total_weight > 0) { 820 acc_r += (uint32) in0[(in_col * 3) + 0] * curr_weight * top_weight; 821 acc_r += (uint32) in1[(in_col * 3) + 0] * curr_weight * bot_weight; 822 823 acc_g += (uint32) in0[(in_col * 3) + 1] * curr_weight * top_weight; 824 acc_g += (uint32) in1[(in_col * 3) + 1] * curr_weight * bot_weight; 825 826 acc_b += (uint32) in0[(in_col * 3) + 2] * curr_weight * top_weight; 827 acc_b += (uint32) in1[(in_col * 3) + 2] * curr_weight * bot_weight; 828 829 in_col++; 830 831 total_weight -= curr_weight; 832 curr_weight = total_weight > 256 ? 256 : total_weight; 833 } 834 835 position_x += x_factor_inv; 836 837 out[(x * 3) + 0] = ((uint64) acc_r * weight_reciprocal + ((uint64) 1 << 31)) >> 32; 838 out[(x * 3) + 1] = ((uint64) acc_g * weight_reciprocal + ((uint64) 1 << 31)) >> 32; 839 out[(x * 3) + 2] = ((uint64) acc_b * weight_reciprocal + ((uint64) 1 << 31)) >> 32; 840 } 841 } 842 843 static inline void _scale_row_down(uint8 *in, uint8 *_RESTRICT_ out, uint32 in_row_ofs, 844 uint64 position_x, uint64 position_y, uint64 x_factor_inv, uint64 y_factor_inv, 845 uint32 weight_reciprocal, int out_width) { 846 int x; 847 uint32 y, in_col, in_rows, top_weight, bot_weight; 848 sint32 total_weight; 849 850 total_weight = y_factor_inv >> 24; 851 852 top_weight = (uint32) 256 - ((position_y >> 24) & 0xff); 853 854 if ((sint32) top_weight > total_weight) { 855 top_weight = total_weight; 856 } 857 total_weight -= top_weight; 858 859 if (total_weight & 0xff) { 860 bot_weight = total_weight & 0xff; 861 } else if (total_weight > 255) { 862 bot_weight = 256; 863 } else { 864 bot_weight = 0; 865 } 866 867 total_weight -= bot_weight; 868 869 assert(total_weight >= 0); 870 assert((total_weight & 0xff) == 0); 871 872 in_rows = 2 + (total_weight >> 8); 873 874 if (in_rows == 2) { 875 _scale_row_down_2in(in, in + in_row_ofs, 876 out, position_x, x_factor_inv, top_weight, bot_weight, weight_reciprocal, 877 out_width); 878 } else if (in_rows == 3) { 879 _scale_row_down_3in(in, in + in_row_ofs, in + 2 * in_row_ofs, 880 out, position_x, x_factor_inv, top_weight, bot_weight, weight_reciprocal, 881 out_width); 882 } else if (in_rows == 4) { 883 _scale_row_down_4in(in, in + in_row_ofs, in + 2 * in_row_ofs, in + 3 * in_row_ofs, 884 out, position_x, x_factor_inv, top_weight, bot_weight, weight_reciprocal, 885 out_width); 886 } else if (in_rows == 5) { 887 _scale_row_down_5in(in, in + in_row_ofs, in + 2 * in_row_ofs, in + 3 * in_row_ofs, 888 in + 4 * in_row_ofs, 889 out, position_x, x_factor_inv, 890 top_weight, bot_weight, weight_reciprocal, 891 out_width); 892 } else if (in_rows == 6) { 893 _scale_row_down_6in(in, in + in_row_ofs, in + 2 * in_row_ofs, in + 3 * in_row_ofs, 894 in + 4 * in_row_ofs, in + 5 * in_row_ofs, 895 out, position_x, x_factor_inv, top_weight, bot_weight, weight_reciprocal, 896 out_width); 897 } else if (in_rows == 7) { 898 _scale_row_down_7in(in, in + in_row_ofs, in + 2 * in_row_ofs, in + 3 * in_row_ofs, 899 in + 4 * in_row_ofs, in + 5 * in_row_ofs, in + 6 * in_row_ofs, 900 out, position_x, x_factor_inv, top_weight, bot_weight, weight_reciprocal, 901 out_width); 902 } else if (in_rows == 8) { 903 _scale_row_down_8in(in, in + in_row_ofs, in + 2 * in_row_ofs, in + 3 * in_row_ofs, 904 in + 4 * in_row_ofs, in + 5 * in_row_ofs, in + 6 * in_row_ofs, 905 in + 7 * in_row_ofs, 906 out, position_x, x_factor_inv, top_weight, bot_weight, weight_reciprocal, 907 out_width); 908 } else if (in_rows == 9) { 909 _scale_row_down_9in(in, in + in_row_ofs, in + 2 * in_row_ofs, in + 3 * in_row_ofs, 910 in + 4 * in_row_ofs, in + 5 * in_row_ofs, in + 6 * in_row_ofs, 911 in + 7 * in_row_ofs, in + 8 * in_row_ofs, 912 out, position_x, x_factor_inv, top_weight, bot_weight, weight_reciprocal, 913 out_width); 914 } else { 915 for (x = 0; x < out_width; x++) { 916 uint32 acc_r = 0; 917 uint32 acc_g = 0; 918 uint32 acc_b = 0; 919 uint32 curr_weight = 256 - ((position_x >> 24) & 0xff); 920 total_weight = x_factor_inv >> 24; 921 922 in_col = position_x >> 32; 923 924 while (total_weight > 0) { 925 acc_r += (uint32) in[(in_col * 3) + 0] * curr_weight * top_weight; 926 acc_g += (uint32) in[(in_col * 3) + 1] * curr_weight * top_weight; 927 acc_b += (uint32) in[(in_col * 3) + 2] * curr_weight * top_weight; 928 929 for (y = 1; y < in_rows - 1; y++) { 930 acc_r += (uint32) in[y * in_row_ofs + ((in_col * 3) + 0)] * curr_weight * 256; 931 acc_g += (uint32) in[y * in_row_ofs + ((in_col * 3) + 1)] * curr_weight * 256; 932 acc_b += (uint32) in[y * in_row_ofs + ((in_col * 3) + 2)] * curr_weight * 256; 933 } 934 935 acc_r += 936 (uint32) in[y * in_row_ofs + ((in_col * 3) + 0)] * curr_weight * bot_weight; 937 acc_g += 938 (uint32) in[y * in_row_ofs + ((in_col * 3) + 1)] * curr_weight * bot_weight; 939 acc_b += 940 (uint32) in[y * in_row_ofs + ((in_col * 3) + 2)] * curr_weight * bot_weight; 941 942 in_col++; 943 total_weight -= curr_weight; 944 curr_weight = total_weight > 256 ? 256 : total_weight; 945 } 946 947 position_x += x_factor_inv; 948 949 out[(x * 3) + 0] = ((uint64) acc_r * weight_reciprocal + ((uint64) 1 << 31)) >> 32; 950 out[(x * 3) + 1] = ((uint64) acc_g * weight_reciprocal + ((uint64) 1 << 31)) >> 32; 951 out[(x * 3) + 2] = ((uint64) acc_b * weight_reciprocal + ((uint64) 1 << 31)) >> 32; 952 } 953 } 954 } 955 956 static void _scale_row_up(uint8 *_RESTRICT_ in0, uint8 *_RESTRICT_ in1, uint8 *_RESTRICT_ out, 957 sint32 weight_y, uint64 position_x, uint64 increment_x, int out_width) { 958 int x; 959 for (x = 0; x < out_width; x++) { 960 sint32 top_val_r, bot_val_r; 961 sint32 top_val_g, bot_val_g; 962 sint32 top_val_b, bot_val_b; 963 964 // Position is tracked with 32 bits of precision, but interpolation is 965 // only guided by 10. REVISIT - Check ASM and make sure the compiler 966 // handled the second part here optimally. 967 uint32 pix_x = position_x >> 32; 968 969 sint32 weight_x = (position_x & 0xffffffff) >> 22; 970 971 // top_val and bot_val become 18-bit values here 972 top_val_r = (in0[(pix_x * 3) + 0] << 10) + 973 weight_x * ((sint32) in0[((pix_x + 1) * 3) + 0] - in0[(pix_x * 3) + 0]); 974 bot_val_r = (in1[(pix_x * 3) + 0] << 10) + 975 weight_x * ((sint32) in1[((pix_x + 1) * 3) + 0] - in1[(pix_x * 3) + 0]); 976 977 top_val_g = (in0[(pix_x * 3) + 1] << 10) + 978 weight_x * ((sint32) in0[((pix_x + 1) * 3) + 1] - in0[(pix_x * 3) + 1]); 979 bot_val_g = (in1[(pix_x * 3) + 1] << 10) + 980 weight_x * ((sint32) in1[((pix_x + 1) * 3) + 1] - in1[(pix_x * 3) + 1]); 981 982 top_val_b = (in0[(pix_x * 3) + 2] << 10) + 983 weight_x * ((sint32) in0[((pix_x + 1) * 3) + 2] - in0[(pix_x * 3) + 2]); 984 bot_val_b = (in1[(pix_x * 3) + 2] << 10) + 985 weight_x * ((sint32) in1[((pix_x + 1) * 3) + 2] - in1[(pix_x * 3) + 2]); 986 987 // out is an 8-bit value. We do not need to range-check, as overflow 988 // is mathematically impossible. 989 out[(x * 3) + 0] = ((top_val_r << 10) + weight_y * (bot_val_r - top_val_r)) >> 20; 990 out[(x * 3) + 1] = ((top_val_g << 10) + weight_y * (bot_val_g - top_val_g)) >> 20; 991 out[(x * 3) + 2] = ((top_val_b << 10) + weight_y * (bot_val_b - top_val_b)) >> 20; 992 993 position_x += increment_x; 994 } 995 } 996 997 static void _hw_scale_image_plane(scaler_config_t *pscaler_config, scaler_mode_t scaleMode) { 998 // These pointers duplicate h/w regs 999 uint64 x_factor, y_factor, x_factor_inv, y_factor_inv; 1000 uint32 x_output_width, y_output_width; 1001 uint32 input_pixel_ptr_offset, output_pixel_ptr_offset; 1002 uint32 first_xi; 1003 uint64 first_y_src, first_x_src, weight_reciprocal; 1004 1005 // These are internal state 1006 uint32 r; 1007 uint8 *outp; 1008 1009 x_output_width = pscaler_config->iOutWidth; 1010 y_output_width = pscaler_config->iOutEndRow - 1011 pscaler_config->iOutStartRow + 1; 1012 1013 input_pixel_ptr_offset = pscaler_config->iSrcBufWidth; 1014 output_pixel_ptr_offset = pscaler_config->iOutBufWidth; 1015 1016 x_factor = (uint64) pscaler_config->fXfactor.decimal << 32; 1017 x_factor |= pscaler_config->fXfactor.fraction; 1018 1019 y_factor = (uint64) pscaler_config->fYfactor.decimal << 32; 1020 y_factor |= pscaler_config->fYfactor.fraction; 1021 1022 x_factor_inv = (uint64) pscaler_config->fXfactorInv.decimal << 32; 1023 x_factor_inv |= pscaler_config->fXfactorInv.fraction; 1024 1025 y_factor_inv = (uint64) pscaler_config->fYfactorInv.decimal << 32; 1026 y_factor_inv |= pscaler_config->fYfactorInv.fraction; 1027 1028 first_y_src = (uint64) pscaler_config->fSrcStartRow.decimal << 32; 1029 first_y_src |= pscaler_config->fSrcStartRow.fraction; 1030 1031 // PC REVISIT - The HW has config registers for these, but they aren't being 1032 // used by lib_photo_scaler do I don't want to use them, either. For now 1033 // just print them so I can figure out what's going on and then clear the 1034 // associated variables. Maybe we're always running the scaler from the 1035 // left edge of the source so they're implicitly zero? 1036 first_xi = pscaler_config->iOutStartColumn; 1037 1038 first_x_src = (uint64) pscaler_config->fSrcStartColumn.decimal << 32; 1039 first_x_src |= pscaler_config->fSrcStartColumn.fraction; 1040 1041 first_xi = first_x_src = 0; 1042 1043 weight_reciprocal = ((uint64) 1 << 32); 1044 weight_reciprocal /= (x_factor_inv >> 24) * (y_factor_inv >> 24); 1045 1046 outp = (pscaler_config->pOutBuf) + (first_xi * 3); 1047 1048 // PC - Assume pSrcBuf is already aligned to "true" base of input, 1049 // so ignore whole-number part of first_y_src. 1050 first_y_src = first_y_src & 0xffffffff; 1051 1052 for (r = 0; r < y_output_width; r++) { 1053 uint8 *inp = (pscaler_config->pSrcBuf) + 1054 (first_y_src >> 32) * input_pixel_ptr_offset; 1055 { 1056 if (scaleMode == PSCALER_SCALE_UP) { 1057 _scale_row_up(inp, inp + input_pixel_ptr_offset, outp, 1058 (first_y_src & 0xffffffff) >> 22, first_x_src, 1059 x_factor_inv, x_output_width); 1060 } else { 1061 _scale_row_down(inp, outp, input_pixel_ptr_offset, 1062 first_x_src, first_y_src, x_factor_inv, y_factor_inv, 1063 weight_reciprocal, x_output_width); 1064 } 1065 } 1066 first_y_src += y_factor_inv; 1067 outp += output_pixel_ptr_offset; 1068 } 1069 }