1 /* 2 * jsimd_x86_64.c 3 * 4 * Copyright 2009 Pierre Ossman <ossman (at) cendio.se> for Cendio AB 5 * Copyright (C) 2009-2011, 2014, 2016, D. R. Commander. 6 * Copyright (C) 2015, Matthieu Darbois. 7 * 8 * Based on the x86 SIMD extension for IJG JPEG library, 9 * Copyright (C) 1999-2006, MIYASAKA Masaru. 10 * For conditions of distribution and use, see copyright notice in jsimdext.inc 11 * 12 * This file contains the interface between the "normal" portions 13 * of the library and the SIMD implementations when running on a 14 * 64-bit x86 architecture. 15 */ 16 17 #define JPEG_INTERNALS 18 #include "../jinclude.h" 19 #include "../jpeglib.h" 20 #include "../jsimd.h" 21 #include "../jdct.h" 22 #include "../jsimddct.h" 23 #include "jsimd.h" 24 25 /* 26 * In the PIC cases, we have no guarantee that constants will keep 27 * their alignment. This macro allows us to verify it at runtime. 28 */ 29 #define IS_ALIGNED(ptr, order) (((size_t)ptr & ((1 << order) - 1)) == 0) 30 31 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */ 32 33 static unsigned int simd_support = ~0; 34 static unsigned int simd_huffman = 1; 35 36 /* 37 * Check what SIMD accelerations are supported. 38 * 39 * FIXME: This code is racy under a multi-threaded environment. 40 */ 41 LOCAL(void) 42 init_simd (void) 43 { 44 char *env = NULL; 45 46 if (simd_support != ~0U) 47 return; 48 49 simd_support = JSIMD_SSE2 | JSIMD_SSE; 50 51 /* Force different settings through environment variables */ 52 env = getenv("JSIMD_FORCENONE"); 53 if ((env != NULL) && (strcmp(env, "1") == 0)) 54 simd_support = 0; 55 env = getenv("JSIMD_NOHUFFENC"); 56 if ((env != NULL) && (strcmp(env, "1") == 0)) 57 simd_huffman = 0; 58 } 59 60 GLOBAL(int) 61 jsimd_can_rgb_ycc (void) 62 { 63 init_simd(); 64 65 /* The code is optimised for these values only */ 66 if (BITS_IN_JSAMPLE != 8) 67 return 0; 68 if (sizeof(JDIMENSION) != 4) 69 return 0; 70 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) 71 return 0; 72 73 if ((simd_support & JSIMD_SSE2) && 74 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) 75 return 1; 76 77 return 0; 78 } 79 80 GLOBAL(int) 81 jsimd_can_rgb_gray (void) 82 { 83 init_simd(); 84 85 /* The code is optimised for these values only */ 86 if (BITS_IN_JSAMPLE != 8) 87 return 0; 88 if (sizeof(JDIMENSION) != 4) 89 return 0; 90 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) 91 return 0; 92 93 if ((simd_support & JSIMD_SSE2) && 94 IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2)) 95 return 1; 96 97 return 0; 98 } 99 100 GLOBAL(int) 101 jsimd_can_ycc_rgb (void) 102 { 103 init_simd(); 104 105 /* The code is optimised for these values only */ 106 if (BITS_IN_JSAMPLE != 8) 107 return 0; 108 if (sizeof(JDIMENSION) != 4) 109 return 0; 110 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) 111 return 0; 112 113 if ((simd_support & JSIMD_SSE2) && 114 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) 115 return 1; 116 117 return 0; 118 } 119 120 GLOBAL(int) 121 jsimd_can_ycc_rgb565 (void) 122 { 123 return 0; 124 } 125 126 GLOBAL(void) 127 jsimd_rgb_ycc_convert (j_compress_ptr cinfo, 128 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, 129 JDIMENSION output_row, int num_rows) 130 { 131 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); 132 133 switch(cinfo->in_color_space) { 134 case JCS_EXT_RGB: 135 sse2fct=jsimd_extrgb_ycc_convert_sse2; 136 break; 137 case JCS_EXT_RGBX: 138 case JCS_EXT_RGBA: 139 sse2fct=jsimd_extrgbx_ycc_convert_sse2; 140 break; 141 case JCS_EXT_BGR: 142 sse2fct=jsimd_extbgr_ycc_convert_sse2; 143 break; 144 case JCS_EXT_BGRX: 145 case JCS_EXT_BGRA: 146 sse2fct=jsimd_extbgrx_ycc_convert_sse2; 147 break; 148 case JCS_EXT_XBGR: 149 case JCS_EXT_ABGR: 150 sse2fct=jsimd_extxbgr_ycc_convert_sse2; 151 break; 152 case JCS_EXT_XRGB: 153 case JCS_EXT_ARGB: 154 sse2fct=jsimd_extxrgb_ycc_convert_sse2; 155 break; 156 default: 157 sse2fct=jsimd_rgb_ycc_convert_sse2; 158 break; 159 } 160 161 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); 162 } 163 164 GLOBAL(void) 165 jsimd_rgb_gray_convert (j_compress_ptr cinfo, 166 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, 167 JDIMENSION output_row, int num_rows) 168 { 169 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); 170 171 switch(cinfo->in_color_space) { 172 case JCS_EXT_RGB: 173 sse2fct=jsimd_extrgb_gray_convert_sse2; 174 break; 175 case JCS_EXT_RGBX: 176 case JCS_EXT_RGBA: 177 sse2fct=jsimd_extrgbx_gray_convert_sse2; 178 break; 179 case JCS_EXT_BGR: 180 sse2fct=jsimd_extbgr_gray_convert_sse2; 181 break; 182 case JCS_EXT_BGRX: 183 case JCS_EXT_BGRA: 184 sse2fct=jsimd_extbgrx_gray_convert_sse2; 185 break; 186 case JCS_EXT_XBGR: 187 case JCS_EXT_ABGR: 188 sse2fct=jsimd_extxbgr_gray_convert_sse2; 189 break; 190 case JCS_EXT_XRGB: 191 case JCS_EXT_ARGB: 192 sse2fct=jsimd_extxrgb_gray_convert_sse2; 193 break; 194 default: 195 sse2fct=jsimd_rgb_gray_convert_sse2; 196 break; 197 } 198 199 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); 200 } 201 202 GLOBAL(void) 203 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, 204 JSAMPIMAGE input_buf, JDIMENSION input_row, 205 JSAMPARRAY output_buf, int num_rows) 206 { 207 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); 208 209 switch(cinfo->out_color_space) { 210 case JCS_EXT_RGB: 211 sse2fct=jsimd_ycc_extrgb_convert_sse2; 212 break; 213 case JCS_EXT_RGBX: 214 case JCS_EXT_RGBA: 215 sse2fct=jsimd_ycc_extrgbx_convert_sse2; 216 break; 217 case JCS_EXT_BGR: 218 sse2fct=jsimd_ycc_extbgr_convert_sse2; 219 break; 220 case JCS_EXT_BGRX: 221 case JCS_EXT_BGRA: 222 sse2fct=jsimd_ycc_extbgrx_convert_sse2; 223 break; 224 case JCS_EXT_XBGR: 225 case JCS_EXT_ABGR: 226 sse2fct=jsimd_ycc_extxbgr_convert_sse2; 227 break; 228 case JCS_EXT_XRGB: 229 case JCS_EXT_ARGB: 230 sse2fct=jsimd_ycc_extxrgb_convert_sse2; 231 break; 232 default: 233 sse2fct=jsimd_ycc_rgb_convert_sse2; 234 break; 235 } 236 237 sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); 238 } 239 240 GLOBAL(void) 241 jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, 242 JSAMPIMAGE input_buf, JDIMENSION input_row, 243 JSAMPARRAY output_buf, int num_rows) 244 { 245 } 246 247 GLOBAL(int) 248 jsimd_can_h2v2_downsample (void) 249 { 250 init_simd(); 251 252 /* The code is optimised for these values only */ 253 if (BITS_IN_JSAMPLE != 8) 254 return 0; 255 if (sizeof(JDIMENSION) != 4) 256 return 0; 257 258 if (simd_support & JSIMD_SSE2) 259 return 1; 260 261 return 0; 262 } 263 264 GLOBAL(int) 265 jsimd_can_h2v1_downsample (void) 266 { 267 init_simd(); 268 269 /* The code is optimised for these values only */ 270 if (BITS_IN_JSAMPLE != 8) 271 return 0; 272 if (sizeof(JDIMENSION) != 4) 273 return 0; 274 275 if (simd_support & JSIMD_SSE2) 276 return 1; 277 278 return 0; 279 } 280 281 GLOBAL(void) 282 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, 283 JSAMPARRAY input_data, JSAMPARRAY output_data) 284 { 285 jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, 286 compptr->v_samp_factor, compptr->width_in_blocks, 287 input_data, output_data); 288 } 289 290 GLOBAL(void) 291 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, 292 JSAMPARRAY input_data, JSAMPARRAY output_data) 293 { 294 jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, 295 compptr->v_samp_factor, compptr->width_in_blocks, 296 input_data, output_data); 297 } 298 299 GLOBAL(int) 300 jsimd_can_h2v2_upsample (void) 301 { 302 init_simd(); 303 304 /* The code is optimised for these values only */ 305 if (BITS_IN_JSAMPLE != 8) 306 return 0; 307 if (sizeof(JDIMENSION) != 4) 308 return 0; 309 310 if (simd_support & JSIMD_SSE2) 311 return 1; 312 313 return 0; 314 } 315 316 GLOBAL(int) 317 jsimd_can_h2v1_upsample (void) 318 { 319 init_simd(); 320 321 /* The code is optimised for these values only */ 322 if (BITS_IN_JSAMPLE != 8) 323 return 0; 324 if (sizeof(JDIMENSION) != 4) 325 return 0; 326 327 if (simd_support & JSIMD_SSE2) 328 return 1; 329 330 return 0; 331 } 332 333 GLOBAL(void) 334 jsimd_h2v2_upsample (j_decompress_ptr cinfo, 335 jpeg_component_info *compptr, 336 JSAMPARRAY input_data, 337 JSAMPARRAY *output_data_ptr) 338 { 339 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width, 340 input_data, output_data_ptr); 341 } 342 343 GLOBAL(void) 344 jsimd_h2v1_upsample (j_decompress_ptr cinfo, 345 jpeg_component_info *compptr, 346 JSAMPARRAY input_data, 347 JSAMPARRAY *output_data_ptr) 348 { 349 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width, 350 input_data, output_data_ptr); 351 } 352 353 GLOBAL(int) 354 jsimd_can_h2v2_fancy_upsample (void) 355 { 356 init_simd(); 357 358 /* The code is optimised for these values only */ 359 if (BITS_IN_JSAMPLE != 8) 360 return 0; 361 if (sizeof(JDIMENSION) != 4) 362 return 0; 363 364 if ((simd_support & JSIMD_SSE2) && 365 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) 366 return 1; 367 368 return 0; 369 } 370 371 GLOBAL(int) 372 jsimd_can_h2v1_fancy_upsample (void) 373 { 374 init_simd(); 375 376 /* The code is optimised for these values only */ 377 if (BITS_IN_JSAMPLE != 8) 378 return 0; 379 if (sizeof(JDIMENSION) != 4) 380 return 0; 381 382 if ((simd_support & JSIMD_SSE2) && 383 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) 384 return 1; 385 386 return 0; 387 } 388 389 GLOBAL(void) 390 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, 391 jpeg_component_info *compptr, 392 JSAMPARRAY input_data, 393 JSAMPARRAY *output_data_ptr) 394 { 395 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor, 396 compptr->downsampled_width, input_data, 397 output_data_ptr); 398 } 399 400 GLOBAL(void) 401 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, 402 jpeg_component_info *compptr, 403 JSAMPARRAY input_data, 404 JSAMPARRAY *output_data_ptr) 405 { 406 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor, 407 compptr->downsampled_width, input_data, 408 output_data_ptr); 409 } 410 411 GLOBAL(int) 412 jsimd_can_h2v2_merged_upsample (void) 413 { 414 init_simd(); 415 416 /* The code is optimised for these values only */ 417 if (BITS_IN_JSAMPLE != 8) 418 return 0; 419 if (sizeof(JDIMENSION) != 4) 420 return 0; 421 422 if ((simd_support & JSIMD_SSE2) && 423 IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) 424 return 1; 425 426 return 0; 427 } 428 429 GLOBAL(int) 430 jsimd_can_h2v1_merged_upsample (void) 431 { 432 init_simd(); 433 434 /* The code is optimised for these values only */ 435 if (BITS_IN_JSAMPLE != 8) 436 return 0; 437 if (sizeof(JDIMENSION) != 4) 438 return 0; 439 440 if ((simd_support & JSIMD_SSE2) && 441 IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) 442 return 1; 443 444 return 0; 445 } 446 447 GLOBAL(void) 448 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, 449 JSAMPIMAGE input_buf, 450 JDIMENSION in_row_group_ctr, 451 JSAMPARRAY output_buf) 452 { 453 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); 454 455 switch(cinfo->out_color_space) { 456 case JCS_EXT_RGB: 457 sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2; 458 break; 459 case JCS_EXT_RGBX: 460 case JCS_EXT_RGBA: 461 sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2; 462 break; 463 case JCS_EXT_BGR: 464 sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2; 465 break; 466 case JCS_EXT_BGRX: 467 case JCS_EXT_BGRA: 468 sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2; 469 break; 470 case JCS_EXT_XBGR: 471 case JCS_EXT_ABGR: 472 sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2; 473 break; 474 case JCS_EXT_XRGB: 475 case JCS_EXT_ARGB: 476 sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2; 477 break; 478 default: 479 sse2fct=jsimd_h2v2_merged_upsample_sse2; 480 break; 481 } 482 483 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); 484 } 485 486 GLOBAL(void) 487 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, 488 JSAMPIMAGE input_buf, 489 JDIMENSION in_row_group_ctr, 490 JSAMPARRAY output_buf) 491 { 492 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); 493 494 switch(cinfo->out_color_space) { 495 case JCS_EXT_RGB: 496 sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2; 497 break; 498 case JCS_EXT_RGBX: 499 case JCS_EXT_RGBA: 500 sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2; 501 break; 502 case JCS_EXT_BGR: 503 sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2; 504 break; 505 case JCS_EXT_BGRX: 506 case JCS_EXT_BGRA: 507 sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2; 508 break; 509 case JCS_EXT_XBGR: 510 case JCS_EXT_ABGR: 511 sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2; 512 break; 513 case JCS_EXT_XRGB: 514 case JCS_EXT_ARGB: 515 sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2; 516 break; 517 default: 518 sse2fct=jsimd_h2v1_merged_upsample_sse2; 519 break; 520 } 521 522 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); 523 } 524 525 GLOBAL(int) 526 jsimd_can_convsamp (void) 527 { 528 init_simd(); 529 530 /* The code is optimised for these values only */ 531 if (DCTSIZE != 8) 532 return 0; 533 if (BITS_IN_JSAMPLE != 8) 534 return 0; 535 if (sizeof(JDIMENSION) != 4) 536 return 0; 537 if (sizeof(DCTELEM) != 2) 538 return 0; 539 540 if (simd_support & JSIMD_SSE2) 541 return 1; 542 543 return 0; 544 } 545 546 GLOBAL(int) 547 jsimd_can_convsamp_float (void) 548 { 549 init_simd(); 550 551 /* The code is optimised for these values only */ 552 if (DCTSIZE != 8) 553 return 0; 554 if (BITS_IN_JSAMPLE != 8) 555 return 0; 556 if (sizeof(JDIMENSION) != 4) 557 return 0; 558 if (sizeof(FAST_FLOAT) != 4) 559 return 0; 560 561 if (simd_support & JSIMD_SSE2) 562 return 1; 563 564 return 0; 565 } 566 567 GLOBAL(void) 568 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, 569 DCTELEM *workspace) 570 { 571 jsimd_convsamp_sse2(sample_data, start_col, workspace); 572 } 573 574 GLOBAL(void) 575 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, 576 FAST_FLOAT *workspace) 577 { 578 jsimd_convsamp_float_sse2(sample_data, start_col, workspace); 579 } 580 581 GLOBAL(int) 582 jsimd_can_fdct_islow (void) 583 { 584 init_simd(); 585 586 /* The code is optimised for these values only */ 587 if (DCTSIZE != 8) 588 return 0; 589 if (sizeof(DCTELEM) != 2) 590 return 0; 591 592 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) 593 return 1; 594 595 return 0; 596 } 597 598 GLOBAL(int) 599 jsimd_can_fdct_ifast (void) 600 { 601 init_simd(); 602 603 /* The code is optimised for these values only */ 604 if (DCTSIZE != 8) 605 return 0; 606 if (sizeof(DCTELEM) != 2) 607 return 0; 608 609 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2)) 610 return 1; 611 612 return 0; 613 } 614 615 GLOBAL(int) 616 jsimd_can_fdct_float (void) 617 { 618 init_simd(); 619 620 /* The code is optimised for these values only */ 621 if (DCTSIZE != 8) 622 return 0; 623 if (sizeof(FAST_FLOAT) != 4) 624 return 0; 625 626 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) 627 return 1; 628 629 return 0; 630 } 631 632 GLOBAL(void) 633 jsimd_fdct_islow (DCTELEM *data) 634 { 635 jsimd_fdct_islow_sse2(data); 636 } 637 638 GLOBAL(void) 639 jsimd_fdct_ifast (DCTELEM *data) 640 { 641 jsimd_fdct_ifast_sse2(data); 642 } 643 644 GLOBAL(void) 645 jsimd_fdct_float (FAST_FLOAT *data) 646 { 647 jsimd_fdct_float_sse(data); 648 } 649 650 GLOBAL(int) 651 jsimd_can_quantize (void) 652 { 653 init_simd(); 654 655 /* The code is optimised for these values only */ 656 if (DCTSIZE != 8) 657 return 0; 658 if (sizeof(JCOEF) != 2) 659 return 0; 660 if (sizeof(DCTELEM) != 2) 661 return 0; 662 663 if (simd_support & JSIMD_SSE2) 664 return 1; 665 666 return 0; 667 } 668 669 GLOBAL(int) 670 jsimd_can_quantize_float (void) 671 { 672 init_simd(); 673 674 /* The code is optimised for these values only */ 675 if (DCTSIZE != 8) 676 return 0; 677 if (sizeof(JCOEF) != 2) 678 return 0; 679 if (sizeof(FAST_FLOAT) != 4) 680 return 0; 681 682 if (simd_support & JSIMD_SSE2) 683 return 1; 684 685 return 0; 686 } 687 688 GLOBAL(void) 689 jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors, 690 DCTELEM *workspace) 691 { 692 jsimd_quantize_sse2(coef_block, divisors, workspace); 693 } 694 695 GLOBAL(void) 696 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, 697 FAST_FLOAT *workspace) 698 { 699 jsimd_quantize_float_sse2(coef_block, divisors, workspace); 700 } 701 702 GLOBAL(int) 703 jsimd_can_idct_2x2 (void) 704 { 705 init_simd(); 706 707 /* The code is optimised for these values only */ 708 if (DCTSIZE != 8) 709 return 0; 710 if (sizeof(JCOEF) != 2) 711 return 0; 712 if (BITS_IN_JSAMPLE != 8) 713 return 0; 714 if (sizeof(JDIMENSION) != 4) 715 return 0; 716 if (sizeof(ISLOW_MULT_TYPE) != 2) 717 return 0; 718 719 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) 720 return 1; 721 722 return 0; 723 } 724 725 GLOBAL(int) 726 jsimd_can_idct_4x4 (void) 727 { 728 init_simd(); 729 730 /* The code is optimised for these values only */ 731 if (DCTSIZE != 8) 732 return 0; 733 if (sizeof(JCOEF) != 2) 734 return 0; 735 if (BITS_IN_JSAMPLE != 8) 736 return 0; 737 if (sizeof(JDIMENSION) != 4) 738 return 0; 739 if (sizeof(ISLOW_MULT_TYPE) != 2) 740 return 0; 741 742 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) 743 return 1; 744 745 return 0; 746 } 747 748 GLOBAL(void) 749 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, 750 JCOEFPTR coef_block, JSAMPARRAY output_buf, 751 JDIMENSION output_col) 752 { 753 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col); 754 } 755 756 GLOBAL(void) 757 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, 758 JCOEFPTR coef_block, JSAMPARRAY output_buf, 759 JDIMENSION output_col) 760 { 761 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col); 762 } 763 764 GLOBAL(int) 765 jsimd_can_idct_islow (void) 766 { 767 init_simd(); 768 769 /* The code is optimised for these values only */ 770 if (DCTSIZE != 8) 771 return 0; 772 if (sizeof(JCOEF) != 2) 773 return 0; 774 if (BITS_IN_JSAMPLE != 8) 775 return 0; 776 if (sizeof(JDIMENSION) != 4) 777 return 0; 778 if (sizeof(ISLOW_MULT_TYPE) != 2) 779 return 0; 780 781 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2)) 782 return 1; 783 784 return 0; 785 } 786 787 GLOBAL(int) 788 jsimd_can_idct_ifast (void) 789 { 790 init_simd(); 791 792 /* The code is optimised for these values only */ 793 if (DCTSIZE != 8) 794 return 0; 795 if (sizeof(JCOEF) != 2) 796 return 0; 797 if (BITS_IN_JSAMPLE != 8) 798 return 0; 799 if (sizeof(JDIMENSION) != 4) 800 return 0; 801 if (sizeof(IFAST_MULT_TYPE) != 2) 802 return 0; 803 if (IFAST_SCALE_BITS != 2) 804 return 0; 805 806 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) 807 return 1; 808 809 return 0; 810 } 811 812 GLOBAL(int) 813 jsimd_can_idct_float (void) 814 { 815 init_simd(); 816 817 if (DCTSIZE != 8) 818 return 0; 819 if (sizeof(JCOEF) != 2) 820 return 0; 821 if (BITS_IN_JSAMPLE != 8) 822 return 0; 823 if (sizeof(JDIMENSION) != 4) 824 return 0; 825 if (sizeof(FAST_FLOAT) != 4) 826 return 0; 827 if (sizeof(FLOAT_MULT_TYPE) != 4) 828 return 0; 829 830 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2)) 831 return 1; 832 833 return 0; 834 } 835 836 GLOBAL(void) 837 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, 838 JCOEFPTR coef_block, JSAMPARRAY output_buf, 839 JDIMENSION output_col) 840 { 841 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, 842 output_col); 843 } 844 845 GLOBAL(void) 846 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, 847 JCOEFPTR coef_block, JSAMPARRAY output_buf, 848 JDIMENSION output_col) 849 { 850 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, 851 output_col); 852 } 853 854 GLOBAL(void) 855 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, 856 JCOEFPTR coef_block, JSAMPARRAY output_buf, 857 JDIMENSION output_col) 858 { 859 jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf, 860 output_col); 861 } 862 863 GLOBAL(int) 864 jsimd_can_huff_encode_one_block (void) 865 { 866 init_simd(); 867 868 if (DCTSIZE != 8) 869 return 0; 870 if (sizeof(JCOEF) != 2) 871 return 0; 872 873 if ((simd_support & JSIMD_SSE2) && simd_huffman && 874 IS_ALIGNED_SSE(jconst_huff_encode_one_block)) 875 return 1; 876 877 return 0; 878 } 879 880 GLOBAL(JOCTET*) 881 jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block, 882 int last_dc_val, c_derived_tbl *dctbl, 883 c_derived_tbl *actbl) 884 { 885 return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val, 886 dctbl, actbl); 887 } 888