Home | History | Annotate | Download | only in simd
      1 /*
      2  * jsimd_x86_64.c
      3  *
      4  * Copyright 2009 Pierre Ossman <ossman (at) cendio.se> for Cendio AB
      5  * Copyright (C) 2009-2011, 2014, 2016, D. R. Commander.
      6  * Copyright (C) 2015, Matthieu Darbois.
      7  *
      8  * Based on the x86 SIMD extension for IJG JPEG library,
      9  * Copyright (C) 1999-2006, MIYASAKA Masaru.
     10  * For conditions of distribution and use, see copyright notice in jsimdext.inc
     11  *
     12  * This file contains the interface between the "normal" portions
     13  * of the library and the SIMD implementations when running on a
     14  * 64-bit x86 architecture.
     15  */
     16 
     17 #define JPEG_INTERNALS
     18 #include "../jinclude.h"
     19 #include "../jpeglib.h"
     20 #include "../jsimd.h"
     21 #include "../jdct.h"
     22 #include "../jsimddct.h"
     23 #include "jsimd.h"
     24 
     25 /*
     26  * In the PIC cases, we have no guarantee that constants will keep
     27  * their alignment. This macro allows us to verify it at runtime.
     28  */
     29 #define IS_ALIGNED(ptr, order) (((size_t)ptr & ((1 << order) - 1)) == 0)
     30 
     31 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
     32 
     33 static unsigned int simd_support = ~0;
     34 static unsigned int simd_huffman = 1;
     35 
     36 /*
     37  * Check what SIMD accelerations are supported.
     38  *
     39  * FIXME: This code is racy under a multi-threaded environment.
     40  */
     41 LOCAL(void)
     42 init_simd (void)
     43 {
     44   char *env = NULL;
     45 
     46   if (simd_support != ~0U)
     47     return;
     48 
     49   simd_support = JSIMD_SSE2 | JSIMD_SSE;
     50 
     51   /* Force different settings through environment variables */
     52   env = getenv("JSIMD_FORCENONE");
     53   if ((env != NULL) && (strcmp(env, "1") == 0))
     54     simd_support = 0;
     55   env = getenv("JSIMD_NOHUFFENC");
     56   if ((env != NULL) && (strcmp(env, "1") == 0))
     57     simd_huffman = 0;
     58 }
     59 
     60 GLOBAL(int)
     61 jsimd_can_rgb_ycc (void)
     62 {
     63   init_simd();
     64 
     65   /* The code is optimised for these values only */
     66   if (BITS_IN_JSAMPLE != 8)
     67     return 0;
     68   if (sizeof(JDIMENSION) != 4)
     69     return 0;
     70   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
     71     return 0;
     72 
     73   if ((simd_support & JSIMD_SSE2) &&
     74       IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
     75     return 1;
     76 
     77   return 0;
     78 }
     79 
     80 GLOBAL(int)
     81 jsimd_can_rgb_gray (void)
     82 {
     83   init_simd();
     84 
     85   /* The code is optimised for these values only */
     86   if (BITS_IN_JSAMPLE != 8)
     87     return 0;
     88   if (sizeof(JDIMENSION) != 4)
     89     return 0;
     90   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
     91     return 0;
     92 
     93   if ((simd_support & JSIMD_SSE2) &&
     94       IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
     95     return 1;
     96 
     97   return 0;
     98 }
     99 
    100 GLOBAL(int)
    101 jsimd_can_ycc_rgb (void)
    102 {
    103   init_simd();
    104 
    105   /* The code is optimised for these values only */
    106   if (BITS_IN_JSAMPLE != 8)
    107     return 0;
    108   if (sizeof(JDIMENSION) != 4)
    109     return 0;
    110   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
    111     return 0;
    112 
    113   if ((simd_support & JSIMD_SSE2) &&
    114       IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
    115     return 1;
    116 
    117   return 0;
    118 }
    119 
    120 GLOBAL(int)
    121 jsimd_can_ycc_rgb565 (void)
    122 {
    123   return 0;
    124 }
    125 
    126 GLOBAL(void)
    127 jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
    128                        JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
    129                        JDIMENSION output_row, int num_rows)
    130 {
    131   void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
    132 
    133   switch(cinfo->in_color_space) {
    134     case JCS_EXT_RGB:
    135       sse2fct=jsimd_extrgb_ycc_convert_sse2;
    136       break;
    137     case JCS_EXT_RGBX:
    138     case JCS_EXT_RGBA:
    139       sse2fct=jsimd_extrgbx_ycc_convert_sse2;
    140       break;
    141     case JCS_EXT_BGR:
    142       sse2fct=jsimd_extbgr_ycc_convert_sse2;
    143       break;
    144     case JCS_EXT_BGRX:
    145     case JCS_EXT_BGRA:
    146       sse2fct=jsimd_extbgrx_ycc_convert_sse2;
    147       break;
    148     case JCS_EXT_XBGR:
    149     case JCS_EXT_ABGR:
    150       sse2fct=jsimd_extxbgr_ycc_convert_sse2;
    151       break;
    152     case JCS_EXT_XRGB:
    153     case JCS_EXT_ARGB:
    154       sse2fct=jsimd_extxrgb_ycc_convert_sse2;
    155       break;
    156     default:
    157       sse2fct=jsimd_rgb_ycc_convert_sse2;
    158       break;
    159   }
    160 
    161   sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
    162 }
    163 
    164 GLOBAL(void)
    165 jsimd_rgb_gray_convert (j_compress_ptr cinfo,
    166                         JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
    167                         JDIMENSION output_row, int num_rows)
    168 {
    169   void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
    170 
    171   switch(cinfo->in_color_space) {
    172     case JCS_EXT_RGB:
    173       sse2fct=jsimd_extrgb_gray_convert_sse2;
    174       break;
    175     case JCS_EXT_RGBX:
    176     case JCS_EXT_RGBA:
    177       sse2fct=jsimd_extrgbx_gray_convert_sse2;
    178       break;
    179     case JCS_EXT_BGR:
    180       sse2fct=jsimd_extbgr_gray_convert_sse2;
    181       break;
    182     case JCS_EXT_BGRX:
    183     case JCS_EXT_BGRA:
    184       sse2fct=jsimd_extbgrx_gray_convert_sse2;
    185       break;
    186     case JCS_EXT_XBGR:
    187     case JCS_EXT_ABGR:
    188       sse2fct=jsimd_extxbgr_gray_convert_sse2;
    189       break;
    190     case JCS_EXT_XRGB:
    191     case JCS_EXT_ARGB:
    192       sse2fct=jsimd_extxrgb_gray_convert_sse2;
    193       break;
    194     default:
    195       sse2fct=jsimd_rgb_gray_convert_sse2;
    196       break;
    197   }
    198 
    199   sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
    200 }
    201 
    202 GLOBAL(void)
    203 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
    204                        JSAMPIMAGE input_buf, JDIMENSION input_row,
    205                        JSAMPARRAY output_buf, int num_rows)
    206 {
    207   void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
    208 
    209   switch(cinfo->out_color_space) {
    210     case JCS_EXT_RGB:
    211       sse2fct=jsimd_ycc_extrgb_convert_sse2;
    212       break;
    213     case JCS_EXT_RGBX:
    214     case JCS_EXT_RGBA:
    215       sse2fct=jsimd_ycc_extrgbx_convert_sse2;
    216       break;
    217     case JCS_EXT_BGR:
    218       sse2fct=jsimd_ycc_extbgr_convert_sse2;
    219       break;
    220     case JCS_EXT_BGRX:
    221     case JCS_EXT_BGRA:
    222       sse2fct=jsimd_ycc_extbgrx_convert_sse2;
    223       break;
    224     case JCS_EXT_XBGR:
    225     case JCS_EXT_ABGR:
    226       sse2fct=jsimd_ycc_extxbgr_convert_sse2;
    227       break;
    228     case JCS_EXT_XRGB:
    229     case JCS_EXT_ARGB:
    230       sse2fct=jsimd_ycc_extxrgb_convert_sse2;
    231       break;
    232     default:
    233       sse2fct=jsimd_ycc_rgb_convert_sse2;
    234       break;
    235   }
    236 
    237   sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
    238 }
    239 
    240 GLOBAL(void)
    241 jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo,
    242                           JSAMPIMAGE input_buf, JDIMENSION input_row,
    243                           JSAMPARRAY output_buf, int num_rows)
    244 {
    245 }
    246 
    247 GLOBAL(int)
    248 jsimd_can_h2v2_downsample (void)
    249 {
    250   init_simd();
    251 
    252   /* The code is optimised for these values only */
    253   if (BITS_IN_JSAMPLE != 8)
    254     return 0;
    255   if (sizeof(JDIMENSION) != 4)
    256     return 0;
    257 
    258   if (simd_support & JSIMD_SSE2)
    259     return 1;
    260 
    261   return 0;
    262 }
    263 
    264 GLOBAL(int)
    265 jsimd_can_h2v1_downsample (void)
    266 {
    267   init_simd();
    268 
    269   /* The code is optimised for these values only */
    270   if (BITS_IN_JSAMPLE != 8)
    271     return 0;
    272   if (sizeof(JDIMENSION) != 4)
    273     return 0;
    274 
    275   if (simd_support & JSIMD_SSE2)
    276     return 1;
    277 
    278   return 0;
    279 }
    280 
    281 GLOBAL(void)
    282 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
    283                        JSAMPARRAY input_data, JSAMPARRAY output_data)
    284 {
    285   jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
    286                              compptr->v_samp_factor, compptr->width_in_blocks,
    287                              input_data, output_data);
    288 }
    289 
    290 GLOBAL(void)
    291 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
    292                        JSAMPARRAY input_data, JSAMPARRAY output_data)
    293 {
    294   jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
    295                              compptr->v_samp_factor, compptr->width_in_blocks,
    296                              input_data, output_data);
    297 }
    298 
    299 GLOBAL(int)
    300 jsimd_can_h2v2_upsample (void)
    301 {
    302   init_simd();
    303 
    304   /* The code is optimised for these values only */
    305   if (BITS_IN_JSAMPLE != 8)
    306     return 0;
    307   if (sizeof(JDIMENSION) != 4)
    308     return 0;
    309 
    310   if (simd_support & JSIMD_SSE2)
    311     return 1;
    312 
    313   return 0;
    314 }
    315 
    316 GLOBAL(int)
    317 jsimd_can_h2v1_upsample (void)
    318 {
    319   init_simd();
    320 
    321   /* The code is optimised for these values only */
    322   if (BITS_IN_JSAMPLE != 8)
    323     return 0;
    324   if (sizeof(JDIMENSION) != 4)
    325     return 0;
    326 
    327   if (simd_support & JSIMD_SSE2)
    328     return 1;
    329 
    330   return 0;
    331 }
    332 
    333 GLOBAL(void)
    334 jsimd_h2v2_upsample (j_decompress_ptr cinfo,
    335                      jpeg_component_info *compptr,
    336                      JSAMPARRAY input_data,
    337                      JSAMPARRAY *output_data_ptr)
    338 {
    339   jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
    340                            input_data, output_data_ptr);
    341 }
    342 
    343 GLOBAL(void)
    344 jsimd_h2v1_upsample (j_decompress_ptr cinfo,
    345                      jpeg_component_info *compptr,
    346                      JSAMPARRAY input_data,
    347                      JSAMPARRAY *output_data_ptr)
    348 {
    349   jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
    350                            input_data, output_data_ptr);
    351 }
    352 
    353 GLOBAL(int)
    354 jsimd_can_h2v2_fancy_upsample (void)
    355 {
    356   init_simd();
    357 
    358   /* The code is optimised for these values only */
    359   if (BITS_IN_JSAMPLE != 8)
    360     return 0;
    361   if (sizeof(JDIMENSION) != 4)
    362     return 0;
    363 
    364   if ((simd_support & JSIMD_SSE2) &&
    365       IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
    366     return 1;
    367 
    368   return 0;
    369 }
    370 
    371 GLOBAL(int)
    372 jsimd_can_h2v1_fancy_upsample (void)
    373 {
    374   init_simd();
    375 
    376   /* The code is optimised for these values only */
    377   if (BITS_IN_JSAMPLE != 8)
    378     return 0;
    379   if (sizeof(JDIMENSION) != 4)
    380     return 0;
    381 
    382   if ((simd_support & JSIMD_SSE2) &&
    383       IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
    384     return 1;
    385 
    386   return 0;
    387 }
    388 
    389 GLOBAL(void)
    390 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
    391                            jpeg_component_info *compptr,
    392                            JSAMPARRAY input_data,
    393                            JSAMPARRAY *output_data_ptr)
    394 {
    395   jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
    396                                  compptr->downsampled_width, input_data,
    397                                  output_data_ptr);
    398 }
    399 
    400 GLOBAL(void)
    401 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
    402                            jpeg_component_info *compptr,
    403                            JSAMPARRAY input_data,
    404                            JSAMPARRAY *output_data_ptr)
    405 {
    406   jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
    407                                  compptr->downsampled_width, input_data,
    408                                  output_data_ptr);
    409 }
    410 
    411 GLOBAL(int)
    412 jsimd_can_h2v2_merged_upsample (void)
    413 {
    414   init_simd();
    415 
    416   /* The code is optimised for these values only */
    417   if (BITS_IN_JSAMPLE != 8)
    418     return 0;
    419   if (sizeof(JDIMENSION) != 4)
    420     return 0;
    421 
    422   if ((simd_support & JSIMD_SSE2) &&
    423       IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
    424     return 1;
    425 
    426   return 0;
    427 }
    428 
    429 GLOBAL(int)
    430 jsimd_can_h2v1_merged_upsample (void)
    431 {
    432   init_simd();
    433 
    434   /* The code is optimised for these values only */
    435   if (BITS_IN_JSAMPLE != 8)
    436     return 0;
    437   if (sizeof(JDIMENSION) != 4)
    438     return 0;
    439 
    440   if ((simd_support & JSIMD_SSE2) &&
    441       IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
    442     return 1;
    443 
    444   return 0;
    445 }
    446 
    447 GLOBAL(void)
    448 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
    449                             JSAMPIMAGE input_buf,
    450                             JDIMENSION in_row_group_ctr,
    451                             JSAMPARRAY output_buf)
    452 {
    453   void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
    454 
    455   switch(cinfo->out_color_space) {
    456     case JCS_EXT_RGB:
    457       sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2;
    458       break;
    459     case JCS_EXT_RGBX:
    460     case JCS_EXT_RGBA:
    461       sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2;
    462       break;
    463     case JCS_EXT_BGR:
    464       sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2;
    465       break;
    466     case JCS_EXT_BGRX:
    467     case JCS_EXT_BGRA:
    468       sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2;
    469       break;
    470     case JCS_EXT_XBGR:
    471     case JCS_EXT_ABGR:
    472       sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2;
    473       break;
    474     case JCS_EXT_XRGB:
    475     case JCS_EXT_ARGB:
    476       sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2;
    477       break;
    478     default:
    479       sse2fct=jsimd_h2v2_merged_upsample_sse2;
    480       break;
    481   }
    482 
    483   sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
    484 }
    485 
    486 GLOBAL(void)
    487 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
    488                             JSAMPIMAGE input_buf,
    489                             JDIMENSION in_row_group_ctr,
    490                             JSAMPARRAY output_buf)
    491 {
    492   void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
    493 
    494   switch(cinfo->out_color_space) {
    495     case JCS_EXT_RGB:
    496       sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2;
    497       break;
    498     case JCS_EXT_RGBX:
    499     case JCS_EXT_RGBA:
    500       sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2;
    501       break;
    502     case JCS_EXT_BGR:
    503       sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2;
    504       break;
    505     case JCS_EXT_BGRX:
    506     case JCS_EXT_BGRA:
    507       sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2;
    508       break;
    509     case JCS_EXT_XBGR:
    510     case JCS_EXT_ABGR:
    511       sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2;
    512       break;
    513     case JCS_EXT_XRGB:
    514     case JCS_EXT_ARGB:
    515       sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2;
    516       break;
    517     default:
    518       sse2fct=jsimd_h2v1_merged_upsample_sse2;
    519       break;
    520   }
    521 
    522   sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
    523 }
    524 
    525 GLOBAL(int)
    526 jsimd_can_convsamp (void)
    527 {
    528   init_simd();
    529 
    530   /* The code is optimised for these values only */
    531   if (DCTSIZE != 8)
    532     return 0;
    533   if (BITS_IN_JSAMPLE != 8)
    534     return 0;
    535   if (sizeof(JDIMENSION) != 4)
    536     return 0;
    537   if (sizeof(DCTELEM) != 2)
    538     return 0;
    539 
    540   if (simd_support & JSIMD_SSE2)
    541     return 1;
    542 
    543   return 0;
    544 }
    545 
    546 GLOBAL(int)
    547 jsimd_can_convsamp_float (void)
    548 {
    549   init_simd();
    550 
    551   /* The code is optimised for these values only */
    552   if (DCTSIZE != 8)
    553     return 0;
    554   if (BITS_IN_JSAMPLE != 8)
    555     return 0;
    556   if (sizeof(JDIMENSION) != 4)
    557     return 0;
    558   if (sizeof(FAST_FLOAT) != 4)
    559     return 0;
    560 
    561   if (simd_support & JSIMD_SSE2)
    562     return 1;
    563 
    564   return 0;
    565 }
    566 
    567 GLOBAL(void)
    568 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
    569                 DCTELEM *workspace)
    570 {
    571   jsimd_convsamp_sse2(sample_data, start_col, workspace);
    572 }
    573 
    574 GLOBAL(void)
    575 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
    576                       FAST_FLOAT *workspace)
    577 {
    578   jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
    579 }
    580 
    581 GLOBAL(int)
    582 jsimd_can_fdct_islow (void)
    583 {
    584   init_simd();
    585 
    586   /* The code is optimised for these values only */
    587   if (DCTSIZE != 8)
    588     return 0;
    589   if (sizeof(DCTELEM) != 2)
    590     return 0;
    591 
    592   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
    593     return 1;
    594 
    595   return 0;
    596 }
    597 
    598 GLOBAL(int)
    599 jsimd_can_fdct_ifast (void)
    600 {
    601   init_simd();
    602 
    603   /* The code is optimised for these values only */
    604   if (DCTSIZE != 8)
    605     return 0;
    606   if (sizeof(DCTELEM) != 2)
    607     return 0;
    608 
    609   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
    610     return 1;
    611 
    612   return 0;
    613 }
    614 
    615 GLOBAL(int)
    616 jsimd_can_fdct_float (void)
    617 {
    618   init_simd();
    619 
    620   /* The code is optimised for these values only */
    621   if (DCTSIZE != 8)
    622     return 0;
    623   if (sizeof(FAST_FLOAT) != 4)
    624     return 0;
    625 
    626   if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
    627     return 1;
    628 
    629   return 0;
    630 }
    631 
    632 GLOBAL(void)
    633 jsimd_fdct_islow (DCTELEM *data)
    634 {
    635   jsimd_fdct_islow_sse2(data);
    636 }
    637 
    638 GLOBAL(void)
    639 jsimd_fdct_ifast (DCTELEM *data)
    640 {
    641   jsimd_fdct_ifast_sse2(data);
    642 }
    643 
    644 GLOBAL(void)
    645 jsimd_fdct_float (FAST_FLOAT *data)
    646 {
    647   jsimd_fdct_float_sse(data);
    648 }
    649 
    650 GLOBAL(int)
    651 jsimd_can_quantize (void)
    652 {
    653   init_simd();
    654 
    655   /* The code is optimised for these values only */
    656   if (DCTSIZE != 8)
    657     return 0;
    658   if (sizeof(JCOEF) != 2)
    659     return 0;
    660   if (sizeof(DCTELEM) != 2)
    661     return 0;
    662 
    663   if (simd_support & JSIMD_SSE2)
    664     return 1;
    665 
    666   return 0;
    667 }
    668 
    669 GLOBAL(int)
    670 jsimd_can_quantize_float (void)
    671 {
    672   init_simd();
    673 
    674   /* The code is optimised for these values only */
    675   if (DCTSIZE != 8)
    676     return 0;
    677   if (sizeof(JCOEF) != 2)
    678     return 0;
    679   if (sizeof(FAST_FLOAT) != 4)
    680     return 0;
    681 
    682   if (simd_support & JSIMD_SSE2)
    683     return 1;
    684 
    685   return 0;
    686 }
    687 
    688 GLOBAL(void)
    689 jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors,
    690                 DCTELEM *workspace)
    691 {
    692   jsimd_quantize_sse2(coef_block, divisors, workspace);
    693 }
    694 
    695 GLOBAL(void)
    696 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors,
    697                       FAST_FLOAT *workspace)
    698 {
    699   jsimd_quantize_float_sse2(coef_block, divisors, workspace);
    700 }
    701 
    702 GLOBAL(int)
    703 jsimd_can_idct_2x2 (void)
    704 {
    705   init_simd();
    706 
    707   /* The code is optimised for these values only */
    708   if (DCTSIZE != 8)
    709     return 0;
    710   if (sizeof(JCOEF) != 2)
    711     return 0;
    712   if (BITS_IN_JSAMPLE != 8)
    713     return 0;
    714   if (sizeof(JDIMENSION) != 4)
    715     return 0;
    716   if (sizeof(ISLOW_MULT_TYPE) != 2)
    717     return 0;
    718 
    719   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
    720     return 1;
    721 
    722   return 0;
    723 }
    724 
    725 GLOBAL(int)
    726 jsimd_can_idct_4x4 (void)
    727 {
    728   init_simd();
    729 
    730   /* The code is optimised for these values only */
    731   if (DCTSIZE != 8)
    732     return 0;
    733   if (sizeof(JCOEF) != 2)
    734     return 0;
    735   if (BITS_IN_JSAMPLE != 8)
    736     return 0;
    737   if (sizeof(JDIMENSION) != 4)
    738     return 0;
    739   if (sizeof(ISLOW_MULT_TYPE) != 2)
    740     return 0;
    741 
    742   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
    743     return 1;
    744 
    745   return 0;
    746 }
    747 
    748 GLOBAL(void)
    749 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
    750                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
    751                 JDIMENSION output_col)
    752 {
    753   jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
    754 }
    755 
    756 GLOBAL(void)
    757 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
    758                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
    759                 JDIMENSION output_col)
    760 {
    761   jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
    762 }
    763 
    764 GLOBAL(int)
    765 jsimd_can_idct_islow (void)
    766 {
    767   init_simd();
    768 
    769   /* The code is optimised for these values only */
    770   if (DCTSIZE != 8)
    771     return 0;
    772   if (sizeof(JCOEF) != 2)
    773     return 0;
    774   if (BITS_IN_JSAMPLE != 8)
    775     return 0;
    776   if (sizeof(JDIMENSION) != 4)
    777     return 0;
    778   if (sizeof(ISLOW_MULT_TYPE) != 2)
    779     return 0;
    780 
    781   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
    782     return 1;
    783 
    784   return 0;
    785 }
    786 
    787 GLOBAL(int)
    788 jsimd_can_idct_ifast (void)
    789 {
    790   init_simd();
    791 
    792   /* The code is optimised for these values only */
    793   if (DCTSIZE != 8)
    794     return 0;
    795   if (sizeof(JCOEF) != 2)
    796     return 0;
    797   if (BITS_IN_JSAMPLE != 8)
    798     return 0;
    799   if (sizeof(JDIMENSION) != 4)
    800     return 0;
    801   if (sizeof(IFAST_MULT_TYPE) != 2)
    802     return 0;
    803   if (IFAST_SCALE_BITS != 2)
    804     return 0;
    805 
    806   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
    807     return 1;
    808 
    809   return 0;
    810 }
    811 
    812 GLOBAL(int)
    813 jsimd_can_idct_float (void)
    814 {
    815   init_simd();
    816 
    817   if (DCTSIZE != 8)
    818     return 0;
    819   if (sizeof(JCOEF) != 2)
    820     return 0;
    821   if (BITS_IN_JSAMPLE != 8)
    822     return 0;
    823   if (sizeof(JDIMENSION) != 4)
    824     return 0;
    825   if (sizeof(FAST_FLOAT) != 4)
    826     return 0;
    827   if (sizeof(FLOAT_MULT_TYPE) != 4)
    828     return 0;
    829 
    830   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
    831     return 1;
    832 
    833   return 0;
    834 }
    835 
    836 GLOBAL(void)
    837 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr,
    838                   JCOEFPTR coef_block, JSAMPARRAY output_buf,
    839                   JDIMENSION output_col)
    840 {
    841   jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
    842                         output_col);
    843 }
    844 
    845 GLOBAL(void)
    846 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr,
    847                   JCOEFPTR coef_block, JSAMPARRAY output_buf,
    848                   JDIMENSION output_col)
    849 {
    850   jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
    851                         output_col);
    852 }
    853 
    854 GLOBAL(void)
    855 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr,
    856                   JCOEFPTR coef_block, JSAMPARRAY output_buf,
    857                   JDIMENSION output_col)
    858 {
    859   jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
    860                         output_col);
    861 }
    862 
    863 GLOBAL(int)
    864 jsimd_can_huff_encode_one_block (void)
    865 {
    866   init_simd();
    867 
    868   if (DCTSIZE != 8)
    869     return 0;
    870   if (sizeof(JCOEF) != 2)
    871     return 0;
    872 
    873   if ((simd_support & JSIMD_SSE2) && simd_huffman &&
    874       IS_ALIGNED_SSE(jconst_huff_encode_one_block))
    875     return 1;
    876 
    877   return 0;
    878 }
    879 
    880 GLOBAL(JOCTET*)
    881 jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block,
    882                              int last_dc_val, c_derived_tbl *dctbl,
    883                              c_derived_tbl *actbl)
    884 {
    885   return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val,
    886                                           dctbl, actbl);
    887 }
    888