1 /* 2 * jsimd_i386.c 3 * 4 * Copyright 2009 Pierre Ossman <ossman (at) cendio.se> for Cendio AB 5 * Copyright (C) 2009-2011, 2013-2014, 2016, D. R. Commander. 6 * Copyright (C) 2015, Matthieu Darbois. 7 * 8 * Based on the x86 SIMD extension for IJG JPEG library, 9 * Copyright (C) 1999-2006, MIYASAKA Masaru. 10 * For conditions of distribution and use, see copyright notice in jsimdext.inc 11 * 12 * This file contains the interface between the "normal" portions 13 * of the library and the SIMD implementations when running on a 14 * 32-bit x86 architecture. 15 */ 16 17 #define JPEG_INTERNALS 18 #include "../jinclude.h" 19 #include "../jpeglib.h" 20 #include "../jsimd.h" 21 #include "../jdct.h" 22 #include "../jsimddct.h" 23 #include "jsimd.h" 24 25 /* 26 * In the PIC cases, we have no guarantee that constants will keep 27 * their alignment. This macro allows us to verify it at runtime. 28 */ 29 #define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0) 30 31 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */ 32 33 static unsigned int simd_support = ~0; 34 static unsigned int simd_huffman = 1; 35 36 /* 37 * Check what SIMD accelerations are supported. 38 * 39 * FIXME: This code is racy under a multi-threaded environment. 40 */ 41 LOCAL(void) 42 init_simd (void) 43 { 44 char *env = NULL; 45 46 if (simd_support != ~0U) 47 return; 48 49 simd_support = jpeg_simd_cpu_support(); 50 51 /* Force different settings through environment variables */ 52 env = getenv("JSIMD_FORCEMMX"); 53 if ((env != NULL) && (strcmp(env, "1") == 0)) 54 simd_support &= JSIMD_MMX; 55 env = getenv("JSIMD_FORCE3DNOW"); 56 if ((env != NULL) && (strcmp(env, "1") == 0)) 57 simd_support &= JSIMD_3DNOW|JSIMD_MMX; 58 env = getenv("JSIMD_FORCESSE"); 59 if ((env != NULL) && (strcmp(env, "1") == 0)) 60 simd_support &= JSIMD_SSE|JSIMD_MMX; 61 env = getenv("JSIMD_FORCESSE2"); 62 if ((env != NULL) && (strcmp(env, "1") == 0)) 63 simd_support &= JSIMD_SSE2; 64 env = getenv("JSIMD_FORCENONE"); 65 if ((env != NULL) && (strcmp(env, "1") == 0)) 66 simd_support = 0; 67 env = getenv("JSIMD_NOHUFFENC"); 68 if ((env != NULL) && (strcmp(env, "1") == 0)) 69 simd_huffman = 0; 70 } 71 72 GLOBAL(int) 73 jsimd_can_rgb_ycc (void) 74 { 75 init_simd(); 76 77 /* The code is optimised for these values only */ 78 if (BITS_IN_JSAMPLE != 8) 79 return 0; 80 if (sizeof(JDIMENSION) != 4) 81 return 0; 82 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) 83 return 0; 84 85 if ((simd_support & JSIMD_SSE2) && 86 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) 87 return 1; 88 if (simd_support & JSIMD_MMX) 89 return 1; 90 91 return 0; 92 } 93 94 GLOBAL(int) 95 jsimd_can_rgb_gray (void) 96 { 97 init_simd(); 98 99 /* The code is optimised for these values only */ 100 if (BITS_IN_JSAMPLE != 8) 101 return 0; 102 if (sizeof(JDIMENSION) != 4) 103 return 0; 104 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) 105 return 0; 106 107 if ((simd_support & JSIMD_SSE2) && 108 IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2)) 109 return 1; 110 if (simd_support & JSIMD_MMX) 111 return 1; 112 113 return 0; 114 } 115 116 GLOBAL(int) 117 jsimd_can_ycc_rgb (void) 118 { 119 init_simd(); 120 121 /* The code is optimised for these values only */ 122 if (BITS_IN_JSAMPLE != 8) 123 return 0; 124 if (sizeof(JDIMENSION) != 4) 125 return 0; 126 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) 127 return 0; 128 129 if ((simd_support & JSIMD_SSE2) && 130 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) 131 return 1; 132 if (simd_support & JSIMD_MMX) 133 return 1; 134 135 return 0; 136 } 137 138 GLOBAL(int) 139 jsimd_can_ycc_rgb565 (void) 140 { 141 return 0; 142 } 143 144 GLOBAL(void) 145 jsimd_rgb_ycc_convert (j_compress_ptr cinfo, 146 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, 147 JDIMENSION output_row, int num_rows) 148 { 149 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); 150 void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); 151 152 switch(cinfo->in_color_space) { 153 case JCS_EXT_RGB: 154 sse2fct=jsimd_extrgb_ycc_convert_sse2; 155 mmxfct=jsimd_extrgb_ycc_convert_mmx; 156 break; 157 case JCS_EXT_RGBX: 158 case JCS_EXT_RGBA: 159 sse2fct=jsimd_extrgbx_ycc_convert_sse2; 160 mmxfct=jsimd_extrgbx_ycc_convert_mmx; 161 break; 162 case JCS_EXT_BGR: 163 sse2fct=jsimd_extbgr_ycc_convert_sse2; 164 mmxfct=jsimd_extbgr_ycc_convert_mmx; 165 break; 166 case JCS_EXT_BGRX: 167 case JCS_EXT_BGRA: 168 sse2fct=jsimd_extbgrx_ycc_convert_sse2; 169 mmxfct=jsimd_extbgrx_ycc_convert_mmx; 170 break; 171 case JCS_EXT_XBGR: 172 case JCS_EXT_ABGR: 173 sse2fct=jsimd_extxbgr_ycc_convert_sse2; 174 mmxfct=jsimd_extxbgr_ycc_convert_mmx; 175 break; 176 case JCS_EXT_XRGB: 177 case JCS_EXT_ARGB: 178 sse2fct=jsimd_extxrgb_ycc_convert_sse2; 179 mmxfct=jsimd_extxrgb_ycc_convert_mmx; 180 break; 181 default: 182 sse2fct=jsimd_rgb_ycc_convert_sse2; 183 mmxfct=jsimd_rgb_ycc_convert_mmx; 184 break; 185 } 186 187 if ((simd_support & JSIMD_SSE2) && 188 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) 189 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); 190 else if (simd_support & JSIMD_MMX) 191 mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); 192 } 193 194 GLOBAL(void) 195 jsimd_rgb_gray_convert (j_compress_ptr cinfo, 196 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, 197 JDIMENSION output_row, int num_rows) 198 { 199 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); 200 void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); 201 202 switch(cinfo->in_color_space) { 203 case JCS_EXT_RGB: 204 sse2fct=jsimd_extrgb_gray_convert_sse2; 205 mmxfct=jsimd_extrgb_gray_convert_mmx; 206 break; 207 case JCS_EXT_RGBX: 208 case JCS_EXT_RGBA: 209 sse2fct=jsimd_extrgbx_gray_convert_sse2; 210 mmxfct=jsimd_extrgbx_gray_convert_mmx; 211 break; 212 case JCS_EXT_BGR: 213 sse2fct=jsimd_extbgr_gray_convert_sse2; 214 mmxfct=jsimd_extbgr_gray_convert_mmx; 215 break; 216 case JCS_EXT_BGRX: 217 case JCS_EXT_BGRA: 218 sse2fct=jsimd_extbgrx_gray_convert_sse2; 219 mmxfct=jsimd_extbgrx_gray_convert_mmx; 220 break; 221 case JCS_EXT_XBGR: 222 case JCS_EXT_ABGR: 223 sse2fct=jsimd_extxbgr_gray_convert_sse2; 224 mmxfct=jsimd_extxbgr_gray_convert_mmx; 225 break; 226 case JCS_EXT_XRGB: 227 case JCS_EXT_ARGB: 228 sse2fct=jsimd_extxrgb_gray_convert_sse2; 229 mmxfct=jsimd_extxrgb_gray_convert_mmx; 230 break; 231 default: 232 sse2fct=jsimd_rgb_gray_convert_sse2; 233 mmxfct=jsimd_rgb_gray_convert_mmx; 234 break; 235 } 236 237 if ((simd_support & JSIMD_SSE2) && 238 IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2)) 239 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); 240 else if (simd_support & JSIMD_MMX) 241 mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); 242 } 243 244 GLOBAL(void) 245 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, 246 JSAMPIMAGE input_buf, JDIMENSION input_row, 247 JSAMPARRAY output_buf, int num_rows) 248 { 249 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); 250 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); 251 252 switch(cinfo->out_color_space) { 253 case JCS_EXT_RGB: 254 sse2fct=jsimd_ycc_extrgb_convert_sse2; 255 mmxfct=jsimd_ycc_extrgb_convert_mmx; 256 break; 257 case JCS_EXT_RGBX: 258 case JCS_EXT_RGBA: 259 sse2fct=jsimd_ycc_extrgbx_convert_sse2; 260 mmxfct=jsimd_ycc_extrgbx_convert_mmx; 261 break; 262 case JCS_EXT_BGR: 263 sse2fct=jsimd_ycc_extbgr_convert_sse2; 264 mmxfct=jsimd_ycc_extbgr_convert_mmx; 265 break; 266 case JCS_EXT_BGRX: 267 case JCS_EXT_BGRA: 268 sse2fct=jsimd_ycc_extbgrx_convert_sse2; 269 mmxfct=jsimd_ycc_extbgrx_convert_mmx; 270 break; 271 case JCS_EXT_XBGR: 272 case JCS_EXT_ABGR: 273 sse2fct=jsimd_ycc_extxbgr_convert_sse2; 274 mmxfct=jsimd_ycc_extxbgr_convert_mmx; 275 break; 276 case JCS_EXT_XRGB: 277 case JCS_EXT_ARGB: 278 sse2fct=jsimd_ycc_extxrgb_convert_sse2; 279 mmxfct=jsimd_ycc_extxrgb_convert_mmx; 280 break; 281 default: 282 sse2fct=jsimd_ycc_rgb_convert_sse2; 283 mmxfct=jsimd_ycc_rgb_convert_mmx; 284 break; 285 } 286 287 if ((simd_support & JSIMD_SSE2) && 288 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) 289 sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); 290 else if (simd_support & JSIMD_MMX) 291 mmxfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); 292 } 293 294 GLOBAL(void) 295 jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, 296 JSAMPIMAGE input_buf, JDIMENSION input_row, 297 JSAMPARRAY output_buf, int num_rows) 298 { 299 } 300 301 GLOBAL(int) 302 jsimd_can_h2v2_downsample (void) 303 { 304 init_simd(); 305 306 /* The code is optimised for these values only */ 307 if (BITS_IN_JSAMPLE != 8) 308 return 0; 309 if (sizeof(JDIMENSION) != 4) 310 return 0; 311 312 if (simd_support & JSIMD_SSE2) 313 return 1; 314 if (simd_support & JSIMD_MMX) 315 return 1; 316 317 return 0; 318 } 319 320 GLOBAL(int) 321 jsimd_can_h2v1_downsample (void) 322 { 323 init_simd(); 324 325 /* The code is optimised for these values only */ 326 if (BITS_IN_JSAMPLE != 8) 327 return 0; 328 if (sizeof(JDIMENSION) != 4) 329 return 0; 330 331 if (simd_support & JSIMD_SSE2) 332 return 1; 333 if (simd_support & JSIMD_MMX) 334 return 1; 335 336 return 0; 337 } 338 339 GLOBAL(void) 340 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, 341 JSAMPARRAY input_data, JSAMPARRAY output_data) 342 { 343 if (simd_support & JSIMD_SSE2) 344 jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, 345 compptr->v_samp_factor, 346 compptr->width_in_blocks, input_data, 347 output_data); 348 else if (simd_support & JSIMD_MMX) 349 jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor, 350 compptr->v_samp_factor, compptr->width_in_blocks, 351 input_data, output_data); 352 } 353 354 GLOBAL(void) 355 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, 356 JSAMPARRAY input_data, JSAMPARRAY output_data) 357 { 358 if (simd_support & JSIMD_SSE2) 359 jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, 360 compptr->v_samp_factor, 361 compptr->width_in_blocks, input_data, 362 output_data); 363 else if (simd_support & JSIMD_MMX) 364 jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor, 365 compptr->v_samp_factor, compptr->width_in_blocks, 366 input_data, output_data); 367 } 368 369 GLOBAL(int) 370 jsimd_can_h2v2_upsample (void) 371 { 372 init_simd(); 373 374 /* The code is optimised for these values only */ 375 if (BITS_IN_JSAMPLE != 8) 376 return 0; 377 if (sizeof(JDIMENSION) != 4) 378 return 0; 379 380 if (simd_support & JSIMD_SSE2) 381 return 1; 382 if (simd_support & JSIMD_MMX) 383 return 1; 384 385 return 0; 386 } 387 388 GLOBAL(int) 389 jsimd_can_h2v1_upsample (void) 390 { 391 init_simd(); 392 393 /* The code is optimised for these values only */ 394 if (BITS_IN_JSAMPLE != 8) 395 return 0; 396 if (sizeof(JDIMENSION) != 4) 397 return 0; 398 399 if (simd_support & JSIMD_SSE2) 400 return 1; 401 if (simd_support & JSIMD_MMX) 402 return 1; 403 404 return 0; 405 } 406 407 GLOBAL(void) 408 jsimd_h2v2_upsample (j_decompress_ptr cinfo, 409 jpeg_component_info *compptr, 410 JSAMPARRAY input_data, 411 JSAMPARRAY *output_data_ptr) 412 { 413 if (simd_support & JSIMD_SSE2) 414 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width, 415 input_data, output_data_ptr); 416 else if (simd_support & JSIMD_MMX) 417 jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width, 418 input_data, output_data_ptr); 419 } 420 421 GLOBAL(void) 422 jsimd_h2v1_upsample (j_decompress_ptr cinfo, 423 jpeg_component_info *compptr, 424 JSAMPARRAY input_data, 425 JSAMPARRAY *output_data_ptr) 426 { 427 if (simd_support & JSIMD_SSE2) 428 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width, 429 input_data, output_data_ptr); 430 else if (simd_support & JSIMD_MMX) 431 jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width, 432 input_data, output_data_ptr); 433 } 434 435 GLOBAL(int) 436 jsimd_can_h2v2_fancy_upsample (void) 437 { 438 init_simd(); 439 440 /* The code is optimised for these values only */ 441 if (BITS_IN_JSAMPLE != 8) 442 return 0; 443 if (sizeof(JDIMENSION) != 4) 444 return 0; 445 446 if ((simd_support & JSIMD_SSE2) && 447 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) 448 return 1; 449 if (simd_support & JSIMD_MMX) 450 return 1; 451 452 return 0; 453 } 454 455 GLOBAL(int) 456 jsimd_can_h2v1_fancy_upsample (void) 457 { 458 init_simd(); 459 460 /* The code is optimised for these values only */ 461 if (BITS_IN_JSAMPLE != 8) 462 return 0; 463 if (sizeof(JDIMENSION) != 4) 464 return 0; 465 466 if ((simd_support & JSIMD_SSE2) && 467 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) 468 return 1; 469 if (simd_support & JSIMD_MMX) 470 return 1; 471 472 return 0; 473 } 474 475 GLOBAL(void) 476 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, 477 jpeg_component_info *compptr, 478 JSAMPARRAY input_data, 479 JSAMPARRAY *output_data_ptr) 480 { 481 if ((simd_support & JSIMD_SSE2) && 482 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) 483 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor, 484 compptr->downsampled_width, input_data, 485 output_data_ptr); 486 else if (simd_support & JSIMD_MMX) 487 jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor, 488 compptr->downsampled_width, input_data, 489 output_data_ptr); 490 } 491 492 GLOBAL(void) 493 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, 494 jpeg_component_info *compptr, 495 JSAMPARRAY input_data, 496 JSAMPARRAY *output_data_ptr) 497 { 498 if ((simd_support & JSIMD_SSE2) && 499 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) 500 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor, 501 compptr->downsampled_width, input_data, 502 output_data_ptr); 503 else if (simd_support & JSIMD_MMX) 504 jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor, 505 compptr->downsampled_width, input_data, 506 output_data_ptr); 507 } 508 509 GLOBAL(int) 510 jsimd_can_h2v2_merged_upsample (void) 511 { 512 init_simd(); 513 514 /* The code is optimised for these values only */ 515 if (BITS_IN_JSAMPLE != 8) 516 return 0; 517 if (sizeof(JDIMENSION) != 4) 518 return 0; 519 520 if ((simd_support & JSIMD_SSE2) && 521 IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) 522 return 1; 523 if (simd_support & JSIMD_MMX) 524 return 1; 525 526 return 0; 527 } 528 529 GLOBAL(int) 530 jsimd_can_h2v1_merged_upsample (void) 531 { 532 init_simd(); 533 534 /* The code is optimised for these values only */ 535 if (BITS_IN_JSAMPLE != 8) 536 return 0; 537 if (sizeof(JDIMENSION) != 4) 538 return 0; 539 540 if ((simd_support & JSIMD_SSE2) && 541 IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) 542 return 1; 543 if (simd_support & JSIMD_MMX) 544 return 1; 545 546 return 0; 547 } 548 549 GLOBAL(void) 550 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, 551 JSAMPIMAGE input_buf, 552 JDIMENSION in_row_group_ctr, 553 JSAMPARRAY output_buf) 554 { 555 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); 556 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); 557 558 switch(cinfo->out_color_space) { 559 case JCS_EXT_RGB: 560 sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2; 561 mmxfct=jsimd_h2v2_extrgb_merged_upsample_mmx; 562 break; 563 case JCS_EXT_RGBX: 564 case JCS_EXT_RGBA: 565 sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2; 566 mmxfct=jsimd_h2v2_extrgbx_merged_upsample_mmx; 567 break; 568 case JCS_EXT_BGR: 569 sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2; 570 mmxfct=jsimd_h2v2_extbgr_merged_upsample_mmx; 571 break; 572 case JCS_EXT_BGRX: 573 case JCS_EXT_BGRA: 574 sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2; 575 mmxfct=jsimd_h2v2_extbgrx_merged_upsample_mmx; 576 break; 577 case JCS_EXT_XBGR: 578 case JCS_EXT_ABGR: 579 sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2; 580 mmxfct=jsimd_h2v2_extxbgr_merged_upsample_mmx; 581 break; 582 case JCS_EXT_XRGB: 583 case JCS_EXT_ARGB: 584 sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2; 585 mmxfct=jsimd_h2v2_extxrgb_merged_upsample_mmx; 586 break; 587 default: 588 sse2fct=jsimd_h2v2_merged_upsample_sse2; 589 mmxfct=jsimd_h2v2_merged_upsample_mmx; 590 break; 591 } 592 593 if ((simd_support & JSIMD_SSE2) && 594 IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) 595 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); 596 else if (simd_support & JSIMD_MMX) 597 mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); 598 } 599 600 GLOBAL(void) 601 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, 602 JSAMPIMAGE input_buf, 603 JDIMENSION in_row_group_ctr, 604 JSAMPARRAY output_buf) 605 { 606 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); 607 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); 608 609 switch(cinfo->out_color_space) { 610 case JCS_EXT_RGB: 611 sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2; 612 mmxfct=jsimd_h2v1_extrgb_merged_upsample_mmx; 613 break; 614 case JCS_EXT_RGBX: 615 case JCS_EXT_RGBA: 616 sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2; 617 mmxfct=jsimd_h2v1_extrgbx_merged_upsample_mmx; 618 break; 619 case JCS_EXT_BGR: 620 sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2; 621 mmxfct=jsimd_h2v1_extbgr_merged_upsample_mmx; 622 break; 623 case JCS_EXT_BGRX: 624 case JCS_EXT_BGRA: 625 sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2; 626 mmxfct=jsimd_h2v1_extbgrx_merged_upsample_mmx; 627 break; 628 case JCS_EXT_XBGR: 629 case JCS_EXT_ABGR: 630 sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2; 631 mmxfct=jsimd_h2v1_extxbgr_merged_upsample_mmx; 632 break; 633 case JCS_EXT_XRGB: 634 case JCS_EXT_ARGB: 635 sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2; 636 mmxfct=jsimd_h2v1_extxrgb_merged_upsample_mmx; 637 break; 638 default: 639 sse2fct=jsimd_h2v1_merged_upsample_sse2; 640 mmxfct=jsimd_h2v1_merged_upsample_mmx; 641 break; 642 } 643 644 if ((simd_support & JSIMD_SSE2) && 645 IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) 646 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); 647 else if (simd_support & JSIMD_MMX) 648 mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); 649 } 650 651 GLOBAL(int) 652 jsimd_can_convsamp (void) 653 { 654 init_simd(); 655 656 /* The code is optimised for these values only */ 657 if (DCTSIZE != 8) 658 return 0; 659 if (BITS_IN_JSAMPLE != 8) 660 return 0; 661 if (sizeof(JDIMENSION) != 4) 662 return 0; 663 if (sizeof(DCTELEM) != 2) 664 return 0; 665 666 if (simd_support & JSIMD_SSE2) 667 return 1; 668 if (simd_support & JSIMD_MMX) 669 return 1; 670 671 return 0; 672 } 673 674 GLOBAL(int) 675 jsimd_can_convsamp_float (void) 676 { 677 init_simd(); 678 679 /* The code is optimised for these values only */ 680 if (DCTSIZE != 8) 681 return 0; 682 if (BITS_IN_JSAMPLE != 8) 683 return 0; 684 if (sizeof(JDIMENSION) != 4) 685 return 0; 686 if (sizeof(FAST_FLOAT) != 4) 687 return 0; 688 689 if (simd_support & JSIMD_SSE2) 690 return 1; 691 if (simd_support & JSIMD_SSE) 692 return 1; 693 if (simd_support & JSIMD_3DNOW) 694 return 1; 695 696 return 0; 697 } 698 699 GLOBAL(void) 700 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, 701 DCTELEM *workspace) 702 { 703 if (simd_support & JSIMD_SSE2) 704 jsimd_convsamp_sse2(sample_data, start_col, workspace); 705 else if (simd_support & JSIMD_MMX) 706 jsimd_convsamp_mmx(sample_data, start_col, workspace); 707 } 708 709 GLOBAL(void) 710 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, 711 FAST_FLOAT *workspace) 712 { 713 if (simd_support & JSIMD_SSE2) 714 jsimd_convsamp_float_sse2(sample_data, start_col, workspace); 715 else if (simd_support & JSIMD_SSE) 716 jsimd_convsamp_float_sse(sample_data, start_col, workspace); 717 else if (simd_support & JSIMD_3DNOW) 718 jsimd_convsamp_float_3dnow(sample_data, start_col, workspace); 719 } 720 721 GLOBAL(int) 722 jsimd_can_fdct_islow (void) 723 { 724 init_simd(); 725 726 /* The code is optimised for these values only */ 727 if (DCTSIZE != 8) 728 return 0; 729 if (sizeof(DCTELEM) != 2) 730 return 0; 731 732 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) 733 return 1; 734 if (simd_support & JSIMD_MMX) 735 return 1; 736 737 return 0; 738 } 739 740 GLOBAL(int) 741 jsimd_can_fdct_ifast (void) 742 { 743 init_simd(); 744 745 /* The code is optimised for these values only */ 746 if (DCTSIZE != 8) 747 return 0; 748 if (sizeof(DCTELEM) != 2) 749 return 0; 750 751 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2)) 752 return 1; 753 if (simd_support & JSIMD_MMX) 754 return 1; 755 756 return 0; 757 } 758 759 GLOBAL(int) 760 jsimd_can_fdct_float (void) 761 { 762 init_simd(); 763 764 /* The code is optimised for these values only */ 765 if (DCTSIZE != 8) 766 return 0; 767 if (sizeof(FAST_FLOAT) != 4) 768 return 0; 769 770 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) 771 return 1; 772 if (simd_support & JSIMD_3DNOW) 773 return 1; 774 775 return 0; 776 } 777 778 GLOBAL(void) 779 jsimd_fdct_islow (DCTELEM *data) 780 { 781 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) 782 jsimd_fdct_islow_sse2(data); 783 else if (simd_support & JSIMD_MMX) 784 jsimd_fdct_islow_mmx(data); 785 } 786 787 GLOBAL(void) 788 jsimd_fdct_ifast (DCTELEM *data) 789 { 790 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) 791 jsimd_fdct_ifast_sse2(data); 792 else if (simd_support & JSIMD_MMX) 793 jsimd_fdct_ifast_mmx(data); 794 } 795 796 GLOBAL(void) 797 jsimd_fdct_float (FAST_FLOAT *data) 798 { 799 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) 800 jsimd_fdct_float_sse(data); 801 else if (simd_support & JSIMD_3DNOW) 802 jsimd_fdct_float_3dnow(data); 803 } 804 805 GLOBAL(int) 806 jsimd_can_quantize (void) 807 { 808 init_simd(); 809 810 /* The code is optimised for these values only */ 811 if (DCTSIZE != 8) 812 return 0; 813 if (sizeof(JCOEF) != 2) 814 return 0; 815 if (sizeof(DCTELEM) != 2) 816 return 0; 817 818 if (simd_support & JSIMD_SSE2) 819 return 1; 820 if (simd_support & JSIMD_MMX) 821 return 1; 822 823 return 0; 824 } 825 826 GLOBAL(int) 827 jsimd_can_quantize_float (void) 828 { 829 init_simd(); 830 831 /* The code is optimised for these values only */ 832 if (DCTSIZE != 8) 833 return 0; 834 if (sizeof(JCOEF) != 2) 835 return 0; 836 if (sizeof(FAST_FLOAT) != 4) 837 return 0; 838 839 if (simd_support & JSIMD_SSE2) 840 return 1; 841 if (simd_support & JSIMD_SSE) 842 return 1; 843 if (simd_support & JSIMD_3DNOW) 844 return 1; 845 846 return 0; 847 } 848 849 GLOBAL(void) 850 jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors, 851 DCTELEM *workspace) 852 { 853 if (simd_support & JSIMD_SSE2) 854 jsimd_quantize_sse2(coef_block, divisors, workspace); 855 else if (simd_support & JSIMD_MMX) 856 jsimd_quantize_mmx(coef_block, divisors, workspace); 857 } 858 859 GLOBAL(void) 860 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, 861 FAST_FLOAT *workspace) 862 { 863 if (simd_support & JSIMD_SSE2) 864 jsimd_quantize_float_sse2(coef_block, divisors, workspace); 865 else if (simd_support & JSIMD_SSE) 866 jsimd_quantize_float_sse(coef_block, divisors, workspace); 867 else if (simd_support & JSIMD_3DNOW) 868 jsimd_quantize_float_3dnow(coef_block, divisors, workspace); 869 } 870 871 GLOBAL(int) 872 jsimd_can_idct_2x2 (void) 873 { 874 init_simd(); 875 876 /* The code is optimised for these values only */ 877 if (DCTSIZE != 8) 878 return 0; 879 if (sizeof(JCOEF) != 2) 880 return 0; 881 if (BITS_IN_JSAMPLE != 8) 882 return 0; 883 if (sizeof(JDIMENSION) != 4) 884 return 0; 885 if (sizeof(ISLOW_MULT_TYPE) != 2) 886 return 0; 887 888 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) 889 return 1; 890 if (simd_support & JSIMD_MMX) 891 return 1; 892 893 return 0; 894 } 895 896 GLOBAL(int) 897 jsimd_can_idct_4x4 (void) 898 { 899 init_simd(); 900 901 /* The code is optimised for these values only */ 902 if (DCTSIZE != 8) 903 return 0; 904 if (sizeof(JCOEF) != 2) 905 return 0; 906 if (BITS_IN_JSAMPLE != 8) 907 return 0; 908 if (sizeof(JDIMENSION) != 4) 909 return 0; 910 if (sizeof(ISLOW_MULT_TYPE) != 2) 911 return 0; 912 913 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) 914 return 1; 915 if (simd_support & JSIMD_MMX) 916 return 1; 917 918 return 0; 919 } 920 921 GLOBAL(void) 922 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, 923 JCOEFPTR coef_block, JSAMPARRAY output_buf, 924 JDIMENSION output_col) 925 { 926 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) 927 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, 928 output_col); 929 else if (simd_support & JSIMD_MMX) 930 jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col); 931 } 932 933 GLOBAL(void) 934 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, 935 JCOEFPTR coef_block, JSAMPARRAY output_buf, 936 JDIMENSION output_col) 937 { 938 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) 939 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, 940 output_col); 941 else if (simd_support & JSIMD_MMX) 942 jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col); 943 } 944 945 GLOBAL(int) 946 jsimd_can_idct_islow (void) 947 { 948 init_simd(); 949 950 /* The code is optimised for these values only */ 951 if (DCTSIZE != 8) 952 return 0; 953 if (sizeof(JCOEF) != 2) 954 return 0; 955 if (BITS_IN_JSAMPLE != 8) 956 return 0; 957 if (sizeof(JDIMENSION) != 4) 958 return 0; 959 if (sizeof(ISLOW_MULT_TYPE) != 2) 960 return 0; 961 962 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2)) 963 return 1; 964 if (simd_support & JSIMD_MMX) 965 return 1; 966 967 return 0; 968 } 969 970 GLOBAL(int) 971 jsimd_can_idct_ifast (void) 972 { 973 init_simd(); 974 975 /* The code is optimised for these values only */ 976 if (DCTSIZE != 8) 977 return 0; 978 if (sizeof(JCOEF) != 2) 979 return 0; 980 if (BITS_IN_JSAMPLE != 8) 981 return 0; 982 if (sizeof(JDIMENSION) != 4) 983 return 0; 984 if (sizeof(IFAST_MULT_TYPE) != 2) 985 return 0; 986 if (IFAST_SCALE_BITS != 2) 987 return 0; 988 989 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) 990 return 1; 991 if (simd_support & JSIMD_MMX) 992 return 1; 993 994 return 0; 995 } 996 997 GLOBAL(int) 998 jsimd_can_idct_float (void) 999 { 1000 init_simd(); 1001 1002 if (DCTSIZE != 8) 1003 return 0; 1004 if (sizeof(JCOEF) != 2) 1005 return 0; 1006 if (BITS_IN_JSAMPLE != 8) 1007 return 0; 1008 if (sizeof(JDIMENSION) != 4) 1009 return 0; 1010 if (sizeof(FAST_FLOAT) != 4) 1011 return 0; 1012 if (sizeof(FLOAT_MULT_TYPE) != 4) 1013 return 0; 1014 1015 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2)) 1016 return 1; 1017 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse)) 1018 return 1; 1019 if (simd_support & JSIMD_3DNOW) 1020 return 1; 1021 1022 return 0; 1023 } 1024 1025 GLOBAL(void) 1026 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, 1027 JCOEFPTR coef_block, JSAMPARRAY output_buf, 1028 JDIMENSION output_col) 1029 { 1030 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2)) 1031 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, 1032 output_col); 1033 else if (simd_support & JSIMD_MMX) 1034 jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf, 1035 output_col); 1036 } 1037 1038 GLOBAL(void) 1039 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, 1040 JCOEFPTR coef_block, JSAMPARRAY output_buf, 1041 JDIMENSION output_col) 1042 { 1043 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) 1044 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, 1045 output_col); 1046 else if (simd_support & JSIMD_MMX) 1047 jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf, 1048 output_col); 1049 } 1050 1051 GLOBAL(void) 1052 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, 1053 JCOEFPTR coef_block, JSAMPARRAY output_buf, 1054 JDIMENSION output_col) 1055 { 1056 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2)) 1057 jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf, 1058 output_col); 1059 else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse)) 1060 jsimd_idct_float_sse(compptr->dct_table, coef_block, output_buf, 1061 output_col); 1062 else if (simd_support & JSIMD_3DNOW) 1063 jsimd_idct_float_3dnow(compptr->dct_table, coef_block, output_buf, 1064 output_col); 1065 } 1066 1067 GLOBAL(int) 1068 jsimd_can_huff_encode_one_block (void) 1069 { 1070 init_simd(); 1071 1072 if (DCTSIZE != 8) 1073 return 0; 1074 if (sizeof(JCOEF) != 2) 1075 return 0; 1076 1077 if ((simd_support & JSIMD_SSE2) && simd_huffman && 1078 IS_ALIGNED_SSE(jconst_huff_encode_one_block)) 1079 return 1; 1080 1081 return 0; 1082 } 1083 1084 GLOBAL(JOCTET*) 1085 jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block, 1086 int last_dc_val, c_derived_tbl *dctbl, 1087 c_derived_tbl *actbl) 1088 { 1089 return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val, 1090 dctbl, actbl); 1091 } 1092