1 /* 2 * jsimd_i386.c 3 * 4 * Copyright 2009 Pierre Ossman <ossman (at) cendio.se> for Cendio AB 5 * Copyright 2009-2011 D. R. Commander 6 * 7 * Based on the x86 SIMD extension for IJG JPEG library, 8 * Copyright (C) 1999-2006, MIYASAKA Masaru. 9 * For conditions of distribution and use, see copyright notice in jsimdext.inc 10 * 11 * This file contains the interface between the "normal" portions 12 * of the library and the SIMD implementations when running on a 13 * 32-bit x86 architecture. 14 */ 15 16 #define JPEG_INTERNALS 17 #include "../jinclude.h" 18 #include "../jpeglib.h" 19 #include "../jsimd.h" 20 #include "../jdct.h" 21 #include "../jsimddct.h" 22 #include "jsimd.h" 23 24 /* 25 * In the PIC cases, we have no guarantee that constants will keep 26 * their alignment. This macro allows us to verify it at runtime. 27 */ 28 #define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0) 29 30 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */ 31 32 static unsigned int simd_support = ~0; 33 34 /* 35 * Check what SIMD accelerations are supported. 36 * 37 * FIXME: This code is racy under a multi-threaded environment. 38 */ 39 LOCAL(void) 40 init_simd (void) 41 { 42 char *env = NULL; 43 44 if (simd_support != ~0U) 45 return; 46 47 simd_support = jpeg_simd_cpu_support(); 48 49 /* Force different settings through environment variables */ 50 env = getenv("JSIMD_FORCEMMX"); 51 if ((env != NULL) && (strcmp(env, "1") == 0)) 52 simd_support &= JSIMD_MMX; 53 env = getenv("JSIMD_FORCE3DNOW"); 54 if ((env != NULL) && (strcmp(env, "1") == 0)) 55 simd_support &= JSIMD_3DNOW|JSIMD_MMX; 56 env = getenv("JSIMD_FORCESSE"); 57 if ((env != NULL) && (strcmp(env, "1") == 0)) 58 simd_support &= JSIMD_SSE|JSIMD_MMX; 59 env = getenv("JSIMD_FORCESSE2"); 60 if ((env != NULL) && (strcmp(env, "1") == 0)) 61 simd_support &= JSIMD_SSE2; 62 } 63 64 #ifndef JPEG_DECODE_ONLY 65 GLOBAL(int) 66 jsimd_can_rgb_ycc (void) 67 { 68 init_simd(); 69 70 /* The code is optimised for these values only */ 71 if (BITS_IN_JSAMPLE != 8) 72 return 0; 73 if (sizeof(JDIMENSION) != 4) 74 return 0; 75 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) 76 return 0; 77 78 if ((simd_support & JSIMD_SSE2) && 79 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) 80 return 1; 81 if (simd_support & JSIMD_MMX) 82 return 1; 83 84 return 0; 85 } 86 #endif 87 88 GLOBAL(int) 89 jsimd_can_rgb_gray (void) 90 { 91 init_simd(); 92 93 /* The code is optimised for these values only */ 94 if (BITS_IN_JSAMPLE != 8) 95 return 0; 96 if (sizeof(JDIMENSION) != 4) 97 return 0; 98 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) 99 return 0; 100 101 if ((simd_support & JSIMD_SSE2) && 102 IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2)) 103 return 1; 104 if (simd_support & JSIMD_MMX) 105 return 1; 106 107 return 0; 108 } 109 110 GLOBAL(int) 111 jsimd_can_ycc_rgb (void) 112 { 113 init_simd(); 114 115 /* The code is optimised for these values only */ 116 if (BITS_IN_JSAMPLE != 8) 117 return 0; 118 if (sizeof(JDIMENSION) != 4) 119 return 0; 120 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) 121 return 0; 122 123 if ((simd_support & JSIMD_SSE2) && 124 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) 125 return 1; 126 if (simd_support & JSIMD_MMX) 127 return 1; 128 129 return 0; 130 } 131 132 #ifndef JPEG_DECODE_ONLY 133 GLOBAL(void) 134 jsimd_rgb_ycc_convert (j_compress_ptr cinfo, 135 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, 136 JDIMENSION output_row, int num_rows) 137 { 138 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); 139 void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); 140 141 switch(cinfo->in_color_space) 142 { 143 case JCS_EXT_RGB: 144 sse2fct=jsimd_extrgb_ycc_convert_sse2; 145 mmxfct=jsimd_extrgb_ycc_convert_mmx; 146 break; 147 case JCS_EXT_RGBX: 148 case JCS_EXT_RGBA: 149 sse2fct=jsimd_extrgbx_ycc_convert_sse2; 150 mmxfct=jsimd_extrgbx_ycc_convert_mmx; 151 break; 152 case JCS_EXT_BGR: 153 sse2fct=jsimd_extbgr_ycc_convert_sse2; 154 mmxfct=jsimd_extbgr_ycc_convert_mmx; 155 break; 156 case JCS_EXT_BGRX: 157 case JCS_EXT_BGRA: 158 sse2fct=jsimd_extbgrx_ycc_convert_sse2; 159 mmxfct=jsimd_extbgrx_ycc_convert_mmx; 160 break; 161 case JCS_EXT_XBGR: 162 case JCS_EXT_ABGR: 163 sse2fct=jsimd_extxbgr_ycc_convert_sse2; 164 mmxfct=jsimd_extxbgr_ycc_convert_mmx; 165 break; 166 case JCS_EXT_XRGB: 167 case JCS_EXT_ARGB: 168 sse2fct=jsimd_extxrgb_ycc_convert_sse2; 169 mmxfct=jsimd_extxrgb_ycc_convert_mmx; 170 break; 171 default: 172 sse2fct=jsimd_rgb_ycc_convert_sse2; 173 mmxfct=jsimd_rgb_ycc_convert_mmx; 174 break; 175 } 176 177 if ((simd_support & JSIMD_SSE2) && 178 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) 179 sse2fct(cinfo->image_width, input_buf, 180 output_buf, output_row, num_rows); 181 else if (simd_support & JSIMD_MMX) 182 mmxfct(cinfo->image_width, input_buf, 183 output_buf, output_row, num_rows); 184 } 185 #endif 186 187 GLOBAL(void) 188 jsimd_rgb_gray_convert (j_compress_ptr cinfo, 189 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, 190 JDIMENSION output_row, int num_rows) 191 { 192 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); 193 void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); 194 195 switch(cinfo->in_color_space) 196 { 197 case JCS_EXT_RGB: 198 sse2fct=jsimd_extrgb_gray_convert_sse2; 199 mmxfct=jsimd_extrgb_gray_convert_mmx; 200 break; 201 case JCS_EXT_RGBX: 202 case JCS_EXT_RGBA: 203 sse2fct=jsimd_extrgbx_gray_convert_sse2; 204 mmxfct=jsimd_extrgbx_gray_convert_mmx; 205 break; 206 case JCS_EXT_BGR: 207 sse2fct=jsimd_extbgr_gray_convert_sse2; 208 mmxfct=jsimd_extbgr_gray_convert_mmx; 209 break; 210 case JCS_EXT_BGRX: 211 case JCS_EXT_BGRA: 212 sse2fct=jsimd_extbgrx_gray_convert_sse2; 213 mmxfct=jsimd_extbgrx_gray_convert_mmx; 214 break; 215 case JCS_EXT_XBGR: 216 case JCS_EXT_ABGR: 217 sse2fct=jsimd_extxbgr_gray_convert_sse2; 218 mmxfct=jsimd_extxbgr_gray_convert_mmx; 219 break; 220 case JCS_EXT_XRGB: 221 case JCS_EXT_ARGB: 222 sse2fct=jsimd_extxrgb_gray_convert_sse2; 223 mmxfct=jsimd_extxrgb_gray_convert_mmx; 224 break; 225 default: 226 sse2fct=jsimd_rgb_gray_convert_sse2; 227 mmxfct=jsimd_rgb_gray_convert_mmx; 228 break; 229 } 230 231 if ((simd_support & JSIMD_SSE2) && 232 IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2)) 233 sse2fct(cinfo->image_width, input_buf, 234 output_buf, output_row, num_rows); 235 else if (simd_support & JSIMD_MMX) 236 mmxfct(cinfo->image_width, input_buf, 237 output_buf, output_row, num_rows); 238 } 239 240 GLOBAL(void) 241 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, 242 JSAMPIMAGE input_buf, JDIMENSION input_row, 243 JSAMPARRAY output_buf, int num_rows) 244 { 245 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); 246 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); 247 248 switch(cinfo->out_color_space) 249 { 250 case JCS_EXT_RGB: 251 sse2fct=jsimd_ycc_extrgb_convert_sse2; 252 mmxfct=jsimd_ycc_extrgb_convert_mmx; 253 break; 254 case JCS_EXT_RGBX: 255 case JCS_EXT_RGBA: 256 sse2fct=jsimd_ycc_extrgbx_convert_sse2; 257 mmxfct=jsimd_ycc_extrgbx_convert_mmx; 258 break; 259 case JCS_EXT_BGR: 260 sse2fct=jsimd_ycc_extbgr_convert_sse2; 261 mmxfct=jsimd_ycc_extbgr_convert_mmx; 262 break; 263 case JCS_EXT_BGRX: 264 case JCS_EXT_BGRA: 265 sse2fct=jsimd_ycc_extbgrx_convert_sse2; 266 mmxfct=jsimd_ycc_extbgrx_convert_mmx; 267 break; 268 case JCS_EXT_XBGR: 269 case JCS_EXT_ABGR: 270 sse2fct=jsimd_ycc_extxbgr_convert_sse2; 271 mmxfct=jsimd_ycc_extxbgr_convert_mmx; 272 break; 273 case JCS_EXT_XRGB: 274 case JCS_EXT_ARGB: 275 sse2fct=jsimd_ycc_extxrgb_convert_sse2; 276 mmxfct=jsimd_ycc_extxrgb_convert_mmx; 277 break; 278 default: 279 sse2fct=jsimd_ycc_rgb_convert_sse2; 280 mmxfct=jsimd_ycc_rgb_convert_mmx; 281 break; 282 } 283 284 if ((simd_support & JSIMD_SSE2) && 285 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) 286 sse2fct(cinfo->output_width, input_buf, 287 input_row, output_buf, num_rows); 288 else if (simd_support & JSIMD_MMX) 289 mmxfct(cinfo->output_width, input_buf, 290 input_row, output_buf, num_rows); 291 } 292 293 #ifndef JPEG_DECODE_ONLY 294 GLOBAL(int) 295 jsimd_can_h2v2_downsample (void) 296 { 297 init_simd(); 298 299 /* The code is optimised for these values only */ 300 if (BITS_IN_JSAMPLE != 8) 301 return 0; 302 if (sizeof(JDIMENSION) != 4) 303 return 0; 304 305 if (simd_support & JSIMD_SSE2) 306 return 1; 307 if (simd_support & JSIMD_MMX) 308 return 1; 309 310 return 0; 311 } 312 313 GLOBAL(int) 314 jsimd_can_h2v1_downsample (void) 315 { 316 init_simd(); 317 318 /* The code is optimised for these values only */ 319 if (BITS_IN_JSAMPLE != 8) 320 return 0; 321 if (sizeof(JDIMENSION) != 4) 322 return 0; 323 324 if (simd_support & JSIMD_SSE2) 325 return 1; 326 if (simd_support & JSIMD_MMX) 327 return 1; 328 329 return 0; 330 } 331 332 GLOBAL(void) 333 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, 334 JSAMPARRAY input_data, JSAMPARRAY output_data) 335 { 336 if (simd_support & JSIMD_SSE2) 337 jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, 338 compptr->v_samp_factor, compptr->width_in_blocks, 339 input_data, output_data); 340 else if (simd_support & JSIMD_MMX) 341 jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor, 342 compptr->v_samp_factor, compptr->width_in_blocks, 343 input_data, output_data); 344 } 345 346 GLOBAL(void) 347 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, 348 JSAMPARRAY input_data, JSAMPARRAY output_data) 349 { 350 if (simd_support & JSIMD_SSE2) 351 jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, 352 compptr->v_samp_factor, compptr->width_in_blocks, 353 input_data, output_data); 354 else if (simd_support & JSIMD_MMX) 355 jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor, 356 compptr->v_samp_factor, compptr->width_in_blocks, 357 input_data, output_data); 358 } 359 #endif 360 361 GLOBAL(int) 362 jsimd_can_h2v2_upsample (void) 363 { 364 init_simd(); 365 366 /* The code is optimised for these values only */ 367 if (BITS_IN_JSAMPLE != 8) 368 return 0; 369 if (sizeof(JDIMENSION) != 4) 370 return 0; 371 372 if (simd_support & JSIMD_SSE2) 373 return 1; 374 if (simd_support & JSIMD_MMX) 375 return 1; 376 377 return 0; 378 } 379 380 GLOBAL(int) 381 jsimd_can_h2v1_upsample (void) 382 { 383 init_simd(); 384 385 /* The code is optimised for these values only */ 386 if (BITS_IN_JSAMPLE != 8) 387 return 0; 388 if (sizeof(JDIMENSION) != 4) 389 return 0; 390 391 if (simd_support & JSIMD_SSE2) 392 return 1; 393 if (simd_support & JSIMD_MMX) 394 return 1; 395 396 return 0; 397 } 398 399 GLOBAL(void) 400 jsimd_h2v2_upsample (j_decompress_ptr cinfo, 401 jpeg_component_info * compptr, 402 JSAMPARRAY input_data, 403 JSAMPARRAY * output_data_ptr) 404 { 405 if (simd_support & JSIMD_SSE2) 406 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, 407 cinfo->output_width, input_data, output_data_ptr); 408 else if (simd_support & JSIMD_MMX) 409 jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor, 410 cinfo->output_width, input_data, output_data_ptr); 411 } 412 413 GLOBAL(void) 414 jsimd_h2v1_upsample (j_decompress_ptr cinfo, 415 jpeg_component_info * compptr, 416 JSAMPARRAY input_data, 417 JSAMPARRAY * output_data_ptr) 418 { 419 if (simd_support & JSIMD_SSE2) 420 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, 421 cinfo->output_width, input_data, output_data_ptr); 422 else if (simd_support & JSIMD_MMX) 423 jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor, 424 cinfo->output_width, input_data, output_data_ptr); 425 } 426 427 GLOBAL(int) 428 jsimd_can_h2v2_fancy_upsample (void) 429 { 430 init_simd(); 431 432 /* The code is optimised for these values only */ 433 if (BITS_IN_JSAMPLE != 8) 434 return 0; 435 if (sizeof(JDIMENSION) != 4) 436 return 0; 437 438 if ((simd_support & JSIMD_SSE2) && 439 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) 440 return 1; 441 if (simd_support & JSIMD_MMX) 442 return 1; 443 444 return 0; 445 } 446 447 GLOBAL(int) 448 jsimd_can_h2v1_fancy_upsample (void) 449 { 450 init_simd(); 451 452 /* The code is optimised for these values only */ 453 if (BITS_IN_JSAMPLE != 8) 454 return 0; 455 if (sizeof(JDIMENSION) != 4) 456 return 0; 457 458 if ((simd_support & JSIMD_SSE2) && 459 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) 460 return 1; 461 if (simd_support & JSIMD_MMX) 462 return 1; 463 464 return 0; 465 } 466 467 GLOBAL(void) 468 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, 469 jpeg_component_info * compptr, 470 JSAMPARRAY input_data, 471 JSAMPARRAY * output_data_ptr) 472 { 473 if ((simd_support & JSIMD_SSE2) && 474 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) 475 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor, 476 compptr->downsampled_width, input_data, output_data_ptr); 477 else if (simd_support & JSIMD_MMX) 478 jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor, 479 compptr->downsampled_width, input_data, output_data_ptr); 480 } 481 482 GLOBAL(void) 483 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, 484 jpeg_component_info * compptr, 485 JSAMPARRAY input_data, 486 JSAMPARRAY * output_data_ptr) 487 { 488 if ((simd_support & JSIMD_SSE2) && 489 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) 490 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor, 491 compptr->downsampled_width, input_data, output_data_ptr); 492 else if (simd_support & JSIMD_MMX) 493 jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor, 494 compptr->downsampled_width, input_data, output_data_ptr); 495 } 496 497 GLOBAL(int) 498 jsimd_can_h2v2_merged_upsample (void) 499 { 500 init_simd(); 501 502 /* The code is optimised for these values only */ 503 if (BITS_IN_JSAMPLE != 8) 504 return 0; 505 if (sizeof(JDIMENSION) != 4) 506 return 0; 507 508 if ((simd_support & JSIMD_SSE2) && 509 IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) 510 return 1; 511 if (simd_support & JSIMD_MMX) 512 return 1; 513 514 return 0; 515 } 516 517 GLOBAL(int) 518 jsimd_can_h2v1_merged_upsample (void) 519 { 520 init_simd(); 521 522 /* The code is optimised for these values only */ 523 if (BITS_IN_JSAMPLE != 8) 524 return 0; 525 if (sizeof(JDIMENSION) != 4) 526 return 0; 527 528 if ((simd_support & JSIMD_SSE2) && 529 IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) 530 return 1; 531 if (simd_support & JSIMD_MMX) 532 return 1; 533 534 return 0; 535 } 536 537 GLOBAL(void) 538 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, 539 JSAMPIMAGE input_buf, 540 JDIMENSION in_row_group_ctr, 541 JSAMPARRAY output_buf) 542 { 543 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); 544 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); 545 546 switch(cinfo->out_color_space) 547 { 548 case JCS_EXT_RGB: 549 sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2; 550 mmxfct=jsimd_h2v2_extrgb_merged_upsample_mmx; 551 break; 552 case JCS_EXT_RGBX: 553 case JCS_EXT_RGBA: 554 sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2; 555 mmxfct=jsimd_h2v2_extrgbx_merged_upsample_mmx; 556 break; 557 case JCS_EXT_BGR: 558 sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2; 559 mmxfct=jsimd_h2v2_extbgr_merged_upsample_mmx; 560 break; 561 case JCS_EXT_BGRX: 562 case JCS_EXT_BGRA: 563 sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2; 564 mmxfct=jsimd_h2v2_extbgrx_merged_upsample_mmx; 565 break; 566 case JCS_EXT_XBGR: 567 case JCS_EXT_ABGR: 568 sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2; 569 mmxfct=jsimd_h2v2_extxbgr_merged_upsample_mmx; 570 break; 571 case JCS_EXT_XRGB: 572 case JCS_EXT_ARGB: 573 sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2; 574 mmxfct=jsimd_h2v2_extxrgb_merged_upsample_mmx; 575 break; 576 default: 577 sse2fct=jsimd_h2v2_merged_upsample_sse2; 578 mmxfct=jsimd_h2v2_merged_upsample_mmx; 579 break; 580 } 581 582 if ((simd_support & JSIMD_SSE2) && 583 IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) 584 sse2fct(cinfo->output_width, input_buf, 585 in_row_group_ctr, output_buf); 586 else if (simd_support & JSIMD_MMX) 587 mmxfct(cinfo->output_width, input_buf, 588 in_row_group_ctr, output_buf); 589 } 590 591 GLOBAL(void) 592 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, 593 JSAMPIMAGE input_buf, 594 JDIMENSION in_row_group_ctr, 595 JSAMPARRAY output_buf) 596 { 597 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); 598 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); 599 600 switch(cinfo->out_color_space) 601 { 602 case JCS_EXT_RGB: 603 sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2; 604 mmxfct=jsimd_h2v1_extrgb_merged_upsample_mmx; 605 break; 606 case JCS_EXT_RGBX: 607 case JCS_EXT_RGBA: 608 sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2; 609 mmxfct=jsimd_h2v1_extrgbx_merged_upsample_mmx; 610 break; 611 case JCS_EXT_BGR: 612 sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2; 613 mmxfct=jsimd_h2v1_extbgr_merged_upsample_mmx; 614 break; 615 case JCS_EXT_BGRX: 616 case JCS_EXT_BGRA: 617 sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2; 618 mmxfct=jsimd_h2v1_extbgrx_merged_upsample_mmx; 619 break; 620 case JCS_EXT_XBGR: 621 case JCS_EXT_ABGR: 622 sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2; 623 mmxfct=jsimd_h2v1_extxbgr_merged_upsample_mmx; 624 break; 625 case JCS_EXT_XRGB: 626 case JCS_EXT_ARGB: 627 sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2; 628 mmxfct=jsimd_h2v1_extxrgb_merged_upsample_mmx; 629 break; 630 default: 631 sse2fct=jsimd_h2v1_merged_upsample_sse2; 632 mmxfct=jsimd_h2v1_merged_upsample_mmx; 633 break; 634 } 635 636 if ((simd_support & JSIMD_SSE2) && 637 IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) 638 sse2fct(cinfo->output_width, input_buf, 639 in_row_group_ctr, output_buf); 640 else if (simd_support & JSIMD_MMX) 641 mmxfct(cinfo->output_width, input_buf, 642 in_row_group_ctr, output_buf); 643 } 644 645 #ifndef JPEG_DECODE_ONLY 646 GLOBAL(int) 647 jsimd_can_convsamp (void) 648 { 649 init_simd(); 650 651 /* The code is optimised for these values only */ 652 if (DCTSIZE != 8) 653 return 0; 654 if (BITS_IN_JSAMPLE != 8) 655 return 0; 656 if (sizeof(JDIMENSION) != 4) 657 return 0; 658 if (sizeof(DCTELEM) != 2) 659 return 0; 660 661 if (simd_support & JSIMD_SSE2) 662 return 1; 663 if (simd_support & JSIMD_MMX) 664 return 1; 665 666 return 0; 667 } 668 669 GLOBAL(int) 670 jsimd_can_convsamp_float (void) 671 { 672 init_simd(); 673 674 /* The code is optimised for these values only */ 675 if (DCTSIZE != 8) 676 return 0; 677 if (BITS_IN_JSAMPLE != 8) 678 return 0; 679 if (sizeof(JDIMENSION) != 4) 680 return 0; 681 if (sizeof(FAST_FLOAT) != 4) 682 return 0; 683 684 if (simd_support & JSIMD_SSE2) 685 return 1; 686 if (simd_support & JSIMD_SSE) 687 return 1; 688 if (simd_support & JSIMD_3DNOW) 689 return 1; 690 691 return 0; 692 } 693 694 GLOBAL(void) 695 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, 696 DCTELEM * workspace) 697 { 698 if (simd_support & JSIMD_SSE2) 699 jsimd_convsamp_sse2(sample_data, start_col, workspace); 700 else if (simd_support & JSIMD_MMX) 701 jsimd_convsamp_mmx(sample_data, start_col, workspace); 702 } 703 704 GLOBAL(void) 705 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, 706 FAST_FLOAT * workspace) 707 { 708 if (simd_support & JSIMD_SSE2) 709 jsimd_convsamp_float_sse2(sample_data, start_col, workspace); 710 else if (simd_support & JSIMD_SSE) 711 jsimd_convsamp_float_sse(sample_data, start_col, workspace); 712 else if (simd_support & JSIMD_3DNOW) 713 jsimd_convsamp_float_3dnow(sample_data, start_col, workspace); 714 } 715 716 GLOBAL(int) 717 jsimd_can_fdct_islow (void) 718 { 719 init_simd(); 720 721 /* The code is optimised for these values only */ 722 if (DCTSIZE != 8) 723 return 0; 724 if (sizeof(DCTELEM) != 2) 725 return 0; 726 727 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) 728 return 1; 729 if (simd_support & JSIMD_MMX) 730 return 1; 731 732 return 0; 733 } 734 735 GLOBAL(int) 736 jsimd_can_fdct_ifast (void) 737 { 738 init_simd(); 739 740 /* The code is optimised for these values only */ 741 if (DCTSIZE != 8) 742 return 0; 743 if (sizeof(DCTELEM) != 2) 744 return 0; 745 746 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2)) 747 return 1; 748 if (simd_support & JSIMD_MMX) 749 return 1; 750 751 return 0; 752 } 753 754 GLOBAL(int) 755 jsimd_can_fdct_float (void) 756 { 757 init_simd(); 758 759 /* The code is optimised for these values only */ 760 if (DCTSIZE != 8) 761 return 0; 762 if (sizeof(FAST_FLOAT) != 4) 763 return 0; 764 765 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) 766 return 1; 767 if (simd_support & JSIMD_3DNOW) 768 return 1; 769 770 return 0; 771 } 772 773 GLOBAL(void) 774 jsimd_fdct_islow (DCTELEM * data) 775 { 776 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) 777 jsimd_fdct_islow_sse2(data); 778 else if (simd_support & JSIMD_MMX) 779 jsimd_fdct_islow_mmx(data); 780 } 781 782 GLOBAL(void) 783 jsimd_fdct_ifast (DCTELEM * data) 784 { 785 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) 786 jsimd_fdct_ifast_sse2(data); 787 else if (simd_support & JSIMD_MMX) 788 jsimd_fdct_ifast_mmx(data); 789 } 790 791 GLOBAL(void) 792 jsimd_fdct_float (FAST_FLOAT * data) 793 { 794 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) 795 jsimd_fdct_float_sse(data); 796 else if (simd_support & JSIMD_3DNOW) 797 jsimd_fdct_float_3dnow(data); 798 } 799 800 GLOBAL(int) 801 jsimd_can_quantize (void) 802 { 803 init_simd(); 804 805 /* The code is optimised for these values only */ 806 if (DCTSIZE != 8) 807 return 0; 808 if (sizeof(JCOEF) != 2) 809 return 0; 810 if (sizeof(DCTELEM) != 2) 811 return 0; 812 813 if (simd_support & JSIMD_SSE2) 814 return 1; 815 if (simd_support & JSIMD_MMX) 816 return 1; 817 818 return 0; 819 } 820 821 GLOBAL(int) 822 jsimd_can_quantize_float (void) 823 { 824 init_simd(); 825 826 /* The code is optimised for these values only */ 827 if (DCTSIZE != 8) 828 return 0; 829 if (sizeof(JCOEF) != 2) 830 return 0; 831 if (sizeof(FAST_FLOAT) != 4) 832 return 0; 833 834 if (simd_support & JSIMD_SSE2) 835 return 1; 836 if (simd_support & JSIMD_SSE) 837 return 1; 838 if (simd_support & JSIMD_3DNOW) 839 return 1; 840 841 return 0; 842 } 843 844 GLOBAL(void) 845 jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors, 846 DCTELEM * workspace) 847 { 848 if (simd_support & JSIMD_SSE2) 849 jsimd_quantize_sse2(coef_block, divisors, workspace); 850 else if (simd_support & JSIMD_MMX) 851 jsimd_quantize_mmx(coef_block, divisors, workspace); 852 } 853 854 GLOBAL(void) 855 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, 856 FAST_FLOAT * workspace) 857 { 858 if (simd_support & JSIMD_SSE2) 859 jsimd_quantize_float_sse2(coef_block, divisors, workspace); 860 else if (simd_support & JSIMD_SSE) 861 jsimd_quantize_float_sse(coef_block, divisors, workspace); 862 else if (simd_support & JSIMD_3DNOW) 863 jsimd_quantize_float_3dnow(coef_block, divisors, workspace); 864 } 865 #endif 866 867 GLOBAL(int) 868 jsimd_can_idct_2x2 (void) 869 { 870 init_simd(); 871 872 /* The code is optimised for these values only */ 873 if (DCTSIZE != 8) 874 return 0; 875 if (sizeof(JCOEF) != 2) 876 return 0; 877 if (BITS_IN_JSAMPLE != 8) 878 return 0; 879 if (sizeof(JDIMENSION) != 4) 880 return 0; 881 if (sizeof(ISLOW_MULT_TYPE) != 2) 882 return 0; 883 884 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) 885 return 1; 886 if (simd_support & JSIMD_MMX) 887 return 1; 888 889 return 0; 890 } 891 892 GLOBAL(int) 893 jsimd_can_idct_4x4 (void) 894 { 895 init_simd(); 896 897 /* The code is optimised for these values only */ 898 if (DCTSIZE != 8) 899 return 0; 900 if (sizeof(JCOEF) != 2) 901 return 0; 902 if (BITS_IN_JSAMPLE != 8) 903 return 0; 904 if (sizeof(JDIMENSION) != 4) 905 return 0; 906 if (sizeof(ISLOW_MULT_TYPE) != 2) 907 return 0; 908 909 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) 910 return 1; 911 if (simd_support & JSIMD_MMX) 912 return 1; 913 914 return 0; 915 } 916 917 GLOBAL(void) 918 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 919 JCOEFPTR coef_block, JSAMPARRAY output_buf, 920 JDIMENSION output_col) 921 { 922 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) 923 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col); 924 else if (simd_support & JSIMD_MMX) 925 jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col); 926 } 927 928 GLOBAL(void) 929 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 930 JCOEFPTR coef_block, JSAMPARRAY output_buf, 931 JDIMENSION output_col) 932 { 933 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) 934 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col); 935 else if (simd_support & JSIMD_MMX) 936 jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col); 937 } 938 939 GLOBAL(int) 940 jsimd_can_idct_islow (void) 941 { 942 init_simd(); 943 944 /* The code is optimised for these values only */ 945 if (DCTSIZE != 8) 946 return 0; 947 if (sizeof(JCOEF) != 2) 948 return 0; 949 if (BITS_IN_JSAMPLE != 8) 950 return 0; 951 if (sizeof(JDIMENSION) != 4) 952 return 0; 953 if (sizeof(ISLOW_MULT_TYPE) != 2) 954 return 0; 955 956 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2)) 957 return 1; 958 if (simd_support & JSIMD_MMX) 959 return 1; 960 961 return 0; 962 } 963 964 GLOBAL(int) 965 jsimd_can_idct_ifast (void) 966 { 967 init_simd(); 968 969 /* The code is optimised for these values only */ 970 if (DCTSIZE != 8) 971 return 0; 972 if (sizeof(JCOEF) != 2) 973 return 0; 974 if (BITS_IN_JSAMPLE != 8) 975 return 0; 976 if (sizeof(JDIMENSION) != 4) 977 return 0; 978 if (sizeof(IFAST_MULT_TYPE) != 2) 979 return 0; 980 if (IFAST_SCALE_BITS != 2) 981 return 0; 982 983 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) 984 return 1; 985 if (simd_support & JSIMD_MMX) 986 return 1; 987 988 return 0; 989 } 990 991 GLOBAL(int) 992 jsimd_can_idct_float (void) 993 { 994 init_simd(); 995 996 if (DCTSIZE != 8) 997 return 0; 998 if (sizeof(JCOEF) != 2) 999 return 0; 1000 if (BITS_IN_JSAMPLE != 8) 1001 return 0; 1002 if (sizeof(JDIMENSION) != 4) 1003 return 0; 1004 if (sizeof(FAST_FLOAT) != 4) 1005 return 0; 1006 if (sizeof(FLOAT_MULT_TYPE) != 4) 1007 return 0; 1008 1009 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2)) 1010 return 1; 1011 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse)) 1012 return 1; 1013 if (simd_support & JSIMD_3DNOW) 1014 return 1; 1015 1016 return 0; 1017 } 1018 1019 GLOBAL(void) 1020 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1021 JCOEFPTR coef_block, JSAMPARRAY output_buf, 1022 JDIMENSION output_col) 1023 { 1024 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2)) 1025 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, output_col); 1026 else if (simd_support & JSIMD_MMX) 1027 jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf, output_col); 1028 } 1029 1030 GLOBAL(void) 1031 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1032 JCOEFPTR coef_block, JSAMPARRAY output_buf, 1033 JDIMENSION output_col) 1034 { 1035 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) 1036 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, output_col); 1037 else if (simd_support & JSIMD_MMX) 1038 jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf, output_col); 1039 } 1040 1041 GLOBAL(void) 1042 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1043 JCOEFPTR coef_block, JSAMPARRAY output_buf, 1044 JDIMENSION output_col) 1045 { 1046 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2)) 1047 jsimd_idct_float_sse2(compptr->dct_table, coef_block, 1048 output_buf, output_col); 1049 else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse)) 1050 jsimd_idct_float_sse(compptr->dct_table, coef_block, 1051 output_buf, output_col); 1052 else if (simd_support & JSIMD_3DNOW) 1053 jsimd_idct_float_3dnow(compptr->dct_table, coef_block, 1054 output_buf, output_col); 1055 } 1056