Home | History | Annotate | Download | only in simd
      1 /*
      2  * jsimd_i386.c
      3  *
      4  * Copyright 2009 Pierre Ossman <ossman (at) cendio.se> for Cendio AB
      5  * Copyright (C) 2009-2011, 2013-2014, 2016, D. R. Commander.
      6  * Copyright (C) 2015, Matthieu Darbois.
      7  *
      8  * Based on the x86 SIMD extension for IJG JPEG library,
      9  * Copyright (C) 1999-2006, MIYASAKA Masaru.
     10  * For conditions of distribution and use, see copyright notice in jsimdext.inc
     11  *
     12  * This file contains the interface between the "normal" portions
     13  * of the library and the SIMD implementations when running on a
     14  * 32-bit x86 architecture.
     15  */
     16 
     17 #define JPEG_INTERNALS
     18 #include "../jinclude.h"
     19 #include "../jpeglib.h"
     20 #include "../jsimd.h"
     21 #include "../jdct.h"
     22 #include "../jsimddct.h"
     23 #include "jsimd.h"
     24 
     25 /*
     26  * In the PIC cases, we have no guarantee that constants will keep
     27  * their alignment. This macro allows us to verify it at runtime.
     28  */
     29 #define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0)
     30 
     31 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
     32 
     33 static unsigned int simd_support = ~0;
     34 static unsigned int simd_huffman = 1;
     35 
     36 /*
     37  * Check what SIMD accelerations are supported.
     38  *
     39  * FIXME: This code is racy under a multi-threaded environment.
     40  */
     41 LOCAL(void)
     42 init_simd (void)
     43 {
     44   char *env = NULL;
     45 
     46   if (simd_support != ~0U)
     47     return;
     48 
     49   simd_support = jpeg_simd_cpu_support();
     50 
     51   /* Force different settings through environment variables */
     52   env = getenv("JSIMD_FORCEMMX");
     53   if ((env != NULL) && (strcmp(env, "1") == 0))
     54     simd_support &= JSIMD_MMX;
     55   env = getenv("JSIMD_FORCE3DNOW");
     56   if ((env != NULL) && (strcmp(env, "1") == 0))
     57     simd_support &= JSIMD_3DNOW|JSIMD_MMX;
     58   env = getenv("JSIMD_FORCESSE");
     59   if ((env != NULL) && (strcmp(env, "1") == 0))
     60     simd_support &= JSIMD_SSE|JSIMD_MMX;
     61   env = getenv("JSIMD_FORCESSE2");
     62   if ((env != NULL) && (strcmp(env, "1") == 0))
     63     simd_support &= JSIMD_SSE2;
     64   env = getenv("JSIMD_FORCENONE");
     65   if ((env != NULL) && (strcmp(env, "1") == 0))
     66     simd_support = 0;
     67   env = getenv("JSIMD_NOHUFFENC");
     68   if ((env != NULL) && (strcmp(env, "1") == 0))
     69     simd_huffman = 0;
     70 }
     71 
     72 GLOBAL(int)
     73 jsimd_can_rgb_ycc (void)
     74 {
     75   init_simd();
     76 
     77   /* The code is optimised for these values only */
     78   if (BITS_IN_JSAMPLE != 8)
     79     return 0;
     80   if (sizeof(JDIMENSION) != 4)
     81     return 0;
     82   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
     83     return 0;
     84 
     85   if ((simd_support & JSIMD_SSE2) &&
     86       IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
     87     return 1;
     88   if (simd_support & JSIMD_MMX)
     89     return 1;
     90 
     91   return 0;
     92 }
     93 
     94 GLOBAL(int)
     95 jsimd_can_rgb_gray (void)
     96 {
     97   init_simd();
     98 
     99   /* The code is optimised for these values only */
    100   if (BITS_IN_JSAMPLE != 8)
    101     return 0;
    102   if (sizeof(JDIMENSION) != 4)
    103     return 0;
    104   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
    105     return 0;
    106 
    107   if ((simd_support & JSIMD_SSE2) &&
    108       IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
    109     return 1;
    110   if (simd_support & JSIMD_MMX)
    111     return 1;
    112 
    113   return 0;
    114 }
    115 
    116 GLOBAL(int)
    117 jsimd_can_ycc_rgb (void)
    118 {
    119   init_simd();
    120 
    121   /* The code is optimised for these values only */
    122   if (BITS_IN_JSAMPLE != 8)
    123     return 0;
    124   if (sizeof(JDIMENSION) != 4)
    125     return 0;
    126   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
    127     return 0;
    128 
    129   if ((simd_support & JSIMD_SSE2) &&
    130       IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
    131     return 1;
    132   if (simd_support & JSIMD_MMX)
    133     return 1;
    134 
    135   return 0;
    136 }
    137 
    138 GLOBAL(int)
    139 jsimd_can_ycc_rgb565 (void)
    140 {
    141   return 0;
    142 }
    143 
    144 GLOBAL(void)
    145 jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
    146                        JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
    147                        JDIMENSION output_row, int num_rows)
    148 {
    149   void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
    150   void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
    151 
    152   switch(cinfo->in_color_space) {
    153     case JCS_EXT_RGB:
    154       sse2fct=jsimd_extrgb_ycc_convert_sse2;
    155       mmxfct=jsimd_extrgb_ycc_convert_mmx;
    156       break;
    157     case JCS_EXT_RGBX:
    158     case JCS_EXT_RGBA:
    159       sse2fct=jsimd_extrgbx_ycc_convert_sse2;
    160       mmxfct=jsimd_extrgbx_ycc_convert_mmx;
    161       break;
    162     case JCS_EXT_BGR:
    163       sse2fct=jsimd_extbgr_ycc_convert_sse2;
    164       mmxfct=jsimd_extbgr_ycc_convert_mmx;
    165       break;
    166     case JCS_EXT_BGRX:
    167     case JCS_EXT_BGRA:
    168       sse2fct=jsimd_extbgrx_ycc_convert_sse2;
    169       mmxfct=jsimd_extbgrx_ycc_convert_mmx;
    170       break;
    171     case JCS_EXT_XBGR:
    172     case JCS_EXT_ABGR:
    173       sse2fct=jsimd_extxbgr_ycc_convert_sse2;
    174       mmxfct=jsimd_extxbgr_ycc_convert_mmx;
    175       break;
    176     case JCS_EXT_XRGB:
    177     case JCS_EXT_ARGB:
    178       sse2fct=jsimd_extxrgb_ycc_convert_sse2;
    179       mmxfct=jsimd_extxrgb_ycc_convert_mmx;
    180       break;
    181     default:
    182       sse2fct=jsimd_rgb_ycc_convert_sse2;
    183       mmxfct=jsimd_rgb_ycc_convert_mmx;
    184       break;
    185   }
    186 
    187   if ((simd_support & JSIMD_SSE2) &&
    188       IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
    189     sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
    190   else if (simd_support & JSIMD_MMX)
    191     mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
    192 }
    193 
    194 GLOBAL(void)
    195 jsimd_rgb_gray_convert (j_compress_ptr cinfo,
    196                         JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
    197                         JDIMENSION output_row, int num_rows)
    198 {
    199   void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
    200   void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
    201 
    202   switch(cinfo->in_color_space) {
    203     case JCS_EXT_RGB:
    204       sse2fct=jsimd_extrgb_gray_convert_sse2;
    205       mmxfct=jsimd_extrgb_gray_convert_mmx;
    206       break;
    207     case JCS_EXT_RGBX:
    208     case JCS_EXT_RGBA:
    209       sse2fct=jsimd_extrgbx_gray_convert_sse2;
    210       mmxfct=jsimd_extrgbx_gray_convert_mmx;
    211       break;
    212     case JCS_EXT_BGR:
    213       sse2fct=jsimd_extbgr_gray_convert_sse2;
    214       mmxfct=jsimd_extbgr_gray_convert_mmx;
    215       break;
    216     case JCS_EXT_BGRX:
    217     case JCS_EXT_BGRA:
    218       sse2fct=jsimd_extbgrx_gray_convert_sse2;
    219       mmxfct=jsimd_extbgrx_gray_convert_mmx;
    220       break;
    221     case JCS_EXT_XBGR:
    222     case JCS_EXT_ABGR:
    223       sse2fct=jsimd_extxbgr_gray_convert_sse2;
    224       mmxfct=jsimd_extxbgr_gray_convert_mmx;
    225       break;
    226     case JCS_EXT_XRGB:
    227     case JCS_EXT_ARGB:
    228       sse2fct=jsimd_extxrgb_gray_convert_sse2;
    229       mmxfct=jsimd_extxrgb_gray_convert_mmx;
    230       break;
    231     default:
    232       sse2fct=jsimd_rgb_gray_convert_sse2;
    233       mmxfct=jsimd_rgb_gray_convert_mmx;
    234       break;
    235   }
    236 
    237   if ((simd_support & JSIMD_SSE2) &&
    238       IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
    239     sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
    240   else if (simd_support & JSIMD_MMX)
    241     mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
    242 }
    243 
    244 GLOBAL(void)
    245 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
    246                        JSAMPIMAGE input_buf, JDIMENSION input_row,
    247                        JSAMPARRAY output_buf, int num_rows)
    248 {
    249   void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
    250   void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
    251 
    252   switch(cinfo->out_color_space) {
    253     case JCS_EXT_RGB:
    254       sse2fct=jsimd_ycc_extrgb_convert_sse2;
    255       mmxfct=jsimd_ycc_extrgb_convert_mmx;
    256       break;
    257     case JCS_EXT_RGBX:
    258     case JCS_EXT_RGBA:
    259       sse2fct=jsimd_ycc_extrgbx_convert_sse2;
    260       mmxfct=jsimd_ycc_extrgbx_convert_mmx;
    261       break;
    262     case JCS_EXT_BGR:
    263       sse2fct=jsimd_ycc_extbgr_convert_sse2;
    264       mmxfct=jsimd_ycc_extbgr_convert_mmx;
    265       break;
    266     case JCS_EXT_BGRX:
    267     case JCS_EXT_BGRA:
    268       sse2fct=jsimd_ycc_extbgrx_convert_sse2;
    269       mmxfct=jsimd_ycc_extbgrx_convert_mmx;
    270       break;
    271     case JCS_EXT_XBGR:
    272     case JCS_EXT_ABGR:
    273       sse2fct=jsimd_ycc_extxbgr_convert_sse2;
    274       mmxfct=jsimd_ycc_extxbgr_convert_mmx;
    275       break;
    276     case JCS_EXT_XRGB:
    277     case JCS_EXT_ARGB:
    278       sse2fct=jsimd_ycc_extxrgb_convert_sse2;
    279       mmxfct=jsimd_ycc_extxrgb_convert_mmx;
    280       break;
    281     default:
    282       sse2fct=jsimd_ycc_rgb_convert_sse2;
    283       mmxfct=jsimd_ycc_rgb_convert_mmx;
    284       break;
    285   }
    286 
    287   if ((simd_support & JSIMD_SSE2) &&
    288       IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
    289     sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
    290   else if (simd_support & JSIMD_MMX)
    291     mmxfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
    292 }
    293 
    294 GLOBAL(void)
    295 jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo,
    296                           JSAMPIMAGE input_buf, JDIMENSION input_row,
    297                           JSAMPARRAY output_buf, int num_rows)
    298 {
    299 }
    300 
    301 GLOBAL(int)
    302 jsimd_can_h2v2_downsample (void)
    303 {
    304   init_simd();
    305 
    306   /* The code is optimised for these values only */
    307   if (BITS_IN_JSAMPLE != 8)
    308     return 0;
    309   if (sizeof(JDIMENSION) != 4)
    310     return 0;
    311 
    312   if (simd_support & JSIMD_SSE2)
    313     return 1;
    314   if (simd_support & JSIMD_MMX)
    315     return 1;
    316 
    317   return 0;
    318 }
    319 
    320 GLOBAL(int)
    321 jsimd_can_h2v1_downsample (void)
    322 {
    323   init_simd();
    324 
    325   /* The code is optimised for these values only */
    326   if (BITS_IN_JSAMPLE != 8)
    327     return 0;
    328   if (sizeof(JDIMENSION) != 4)
    329     return 0;
    330 
    331   if (simd_support & JSIMD_SSE2)
    332     return 1;
    333   if (simd_support & JSIMD_MMX)
    334     return 1;
    335 
    336   return 0;
    337 }
    338 
    339 GLOBAL(void)
    340 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
    341                        JSAMPARRAY input_data, JSAMPARRAY output_data)
    342 {
    343   if (simd_support & JSIMD_SSE2)
    344     jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
    345                                compptr->v_samp_factor,
    346                                compptr->width_in_blocks, input_data,
    347                                output_data);
    348   else if (simd_support & JSIMD_MMX)
    349     jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
    350                               compptr->v_samp_factor, compptr->width_in_blocks,
    351                               input_data, output_data);
    352 }
    353 
    354 GLOBAL(void)
    355 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
    356                        JSAMPARRAY input_data, JSAMPARRAY output_data)
    357 {
    358   if (simd_support & JSIMD_SSE2)
    359     jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
    360                                compptr->v_samp_factor,
    361                                compptr->width_in_blocks, input_data,
    362                                output_data);
    363   else if (simd_support & JSIMD_MMX)
    364     jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
    365                               compptr->v_samp_factor, compptr->width_in_blocks,
    366                               input_data, output_data);
    367 }
    368 
    369 GLOBAL(int)
    370 jsimd_can_h2v2_upsample (void)
    371 {
    372   init_simd();
    373 
    374   /* The code is optimised for these values only */
    375   if (BITS_IN_JSAMPLE != 8)
    376     return 0;
    377   if (sizeof(JDIMENSION) != 4)
    378     return 0;
    379 
    380   if (simd_support & JSIMD_SSE2)
    381     return 1;
    382   if (simd_support & JSIMD_MMX)
    383     return 1;
    384 
    385   return 0;
    386 }
    387 
    388 GLOBAL(int)
    389 jsimd_can_h2v1_upsample (void)
    390 {
    391   init_simd();
    392 
    393   /* The code is optimised for these values only */
    394   if (BITS_IN_JSAMPLE != 8)
    395     return 0;
    396   if (sizeof(JDIMENSION) != 4)
    397     return 0;
    398 
    399   if (simd_support & JSIMD_SSE2)
    400     return 1;
    401   if (simd_support & JSIMD_MMX)
    402     return 1;
    403 
    404   return 0;
    405 }
    406 
    407 GLOBAL(void)
    408 jsimd_h2v2_upsample (j_decompress_ptr cinfo,
    409                      jpeg_component_info *compptr,
    410                      JSAMPARRAY input_data,
    411                      JSAMPARRAY *output_data_ptr)
    412 {
    413   if (simd_support & JSIMD_SSE2)
    414     jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
    415                              input_data, output_data_ptr);
    416   else if (simd_support & JSIMD_MMX)
    417     jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width,
    418                             input_data, output_data_ptr);
    419 }
    420 
    421 GLOBAL(void)
    422 jsimd_h2v1_upsample (j_decompress_ptr cinfo,
    423                      jpeg_component_info *compptr,
    424                      JSAMPARRAY input_data,
    425                      JSAMPARRAY *output_data_ptr)
    426 {
    427   if (simd_support & JSIMD_SSE2)
    428     jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
    429                              input_data, output_data_ptr);
    430   else if (simd_support & JSIMD_MMX)
    431     jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width,
    432                             input_data, output_data_ptr);
    433 }
    434 
    435 GLOBAL(int)
    436 jsimd_can_h2v2_fancy_upsample (void)
    437 {
    438   init_simd();
    439 
    440   /* The code is optimised for these values only */
    441   if (BITS_IN_JSAMPLE != 8)
    442     return 0;
    443   if (sizeof(JDIMENSION) != 4)
    444     return 0;
    445 
    446   if ((simd_support & JSIMD_SSE2) &&
    447       IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
    448     return 1;
    449   if (simd_support & JSIMD_MMX)
    450     return 1;
    451 
    452   return 0;
    453 }
    454 
    455 GLOBAL(int)
    456 jsimd_can_h2v1_fancy_upsample (void)
    457 {
    458   init_simd();
    459 
    460   /* The code is optimised for these values only */
    461   if (BITS_IN_JSAMPLE != 8)
    462     return 0;
    463   if (sizeof(JDIMENSION) != 4)
    464     return 0;
    465 
    466   if ((simd_support & JSIMD_SSE2) &&
    467       IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
    468     return 1;
    469   if (simd_support & JSIMD_MMX)
    470     return 1;
    471 
    472   return 0;
    473 }
    474 
    475 GLOBAL(void)
    476 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
    477                            jpeg_component_info *compptr,
    478                            JSAMPARRAY input_data,
    479                            JSAMPARRAY *output_data_ptr)
    480 {
    481   if ((simd_support & JSIMD_SSE2) &&
    482       IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
    483     jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
    484                                    compptr->downsampled_width, input_data,
    485                                    output_data_ptr);
    486   else if (simd_support & JSIMD_MMX)
    487     jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor,
    488                                   compptr->downsampled_width, input_data,
    489                                   output_data_ptr);
    490 }
    491 
    492 GLOBAL(void)
    493 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
    494                            jpeg_component_info *compptr,
    495                            JSAMPARRAY input_data,
    496                            JSAMPARRAY *output_data_ptr)
    497 {
    498   if ((simd_support & JSIMD_SSE2) &&
    499       IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
    500     jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
    501                                    compptr->downsampled_width, input_data,
    502                                    output_data_ptr);
    503   else if (simd_support & JSIMD_MMX)
    504     jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor,
    505                                   compptr->downsampled_width, input_data,
    506                                   output_data_ptr);
    507 }
    508 
    509 GLOBAL(int)
    510 jsimd_can_h2v2_merged_upsample (void)
    511 {
    512   init_simd();
    513 
    514   /* The code is optimised for these values only */
    515   if (BITS_IN_JSAMPLE != 8)
    516     return 0;
    517   if (sizeof(JDIMENSION) != 4)
    518     return 0;
    519 
    520   if ((simd_support & JSIMD_SSE2) &&
    521       IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
    522     return 1;
    523   if (simd_support & JSIMD_MMX)
    524     return 1;
    525 
    526   return 0;
    527 }
    528 
    529 GLOBAL(int)
    530 jsimd_can_h2v1_merged_upsample (void)
    531 {
    532   init_simd();
    533 
    534   /* The code is optimised for these values only */
    535   if (BITS_IN_JSAMPLE != 8)
    536     return 0;
    537   if (sizeof(JDIMENSION) != 4)
    538     return 0;
    539 
    540   if ((simd_support & JSIMD_SSE2) &&
    541       IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
    542     return 1;
    543   if (simd_support & JSIMD_MMX)
    544     return 1;
    545 
    546   return 0;
    547 }
    548 
    549 GLOBAL(void)
    550 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
    551                             JSAMPIMAGE input_buf,
    552                             JDIMENSION in_row_group_ctr,
    553                             JSAMPARRAY output_buf)
    554 {
    555   void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
    556   void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
    557 
    558   switch(cinfo->out_color_space) {
    559     case JCS_EXT_RGB:
    560       sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2;
    561       mmxfct=jsimd_h2v2_extrgb_merged_upsample_mmx;
    562       break;
    563     case JCS_EXT_RGBX:
    564     case JCS_EXT_RGBA:
    565       sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2;
    566       mmxfct=jsimd_h2v2_extrgbx_merged_upsample_mmx;
    567       break;
    568     case JCS_EXT_BGR:
    569       sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2;
    570       mmxfct=jsimd_h2v2_extbgr_merged_upsample_mmx;
    571       break;
    572     case JCS_EXT_BGRX:
    573     case JCS_EXT_BGRA:
    574       sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2;
    575       mmxfct=jsimd_h2v2_extbgrx_merged_upsample_mmx;
    576       break;
    577     case JCS_EXT_XBGR:
    578     case JCS_EXT_ABGR:
    579       sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2;
    580       mmxfct=jsimd_h2v2_extxbgr_merged_upsample_mmx;
    581       break;
    582     case JCS_EXT_XRGB:
    583     case JCS_EXT_ARGB:
    584       sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2;
    585       mmxfct=jsimd_h2v2_extxrgb_merged_upsample_mmx;
    586       break;
    587     default:
    588       sse2fct=jsimd_h2v2_merged_upsample_sse2;
    589       mmxfct=jsimd_h2v2_merged_upsample_mmx;
    590       break;
    591   }
    592 
    593   if ((simd_support & JSIMD_SSE2) &&
    594       IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
    595     sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
    596   else if (simd_support & JSIMD_MMX)
    597     mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
    598 }
    599 
    600 GLOBAL(void)
    601 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
    602                             JSAMPIMAGE input_buf,
    603                             JDIMENSION in_row_group_ctr,
    604                             JSAMPARRAY output_buf)
    605 {
    606   void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
    607   void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
    608 
    609   switch(cinfo->out_color_space) {
    610     case JCS_EXT_RGB:
    611       sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2;
    612       mmxfct=jsimd_h2v1_extrgb_merged_upsample_mmx;
    613       break;
    614     case JCS_EXT_RGBX:
    615     case JCS_EXT_RGBA:
    616       sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2;
    617       mmxfct=jsimd_h2v1_extrgbx_merged_upsample_mmx;
    618       break;
    619     case JCS_EXT_BGR:
    620       sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2;
    621       mmxfct=jsimd_h2v1_extbgr_merged_upsample_mmx;
    622       break;
    623     case JCS_EXT_BGRX:
    624     case JCS_EXT_BGRA:
    625       sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2;
    626       mmxfct=jsimd_h2v1_extbgrx_merged_upsample_mmx;
    627       break;
    628     case JCS_EXT_XBGR:
    629     case JCS_EXT_ABGR:
    630       sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2;
    631       mmxfct=jsimd_h2v1_extxbgr_merged_upsample_mmx;
    632       break;
    633     case JCS_EXT_XRGB:
    634     case JCS_EXT_ARGB:
    635       sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2;
    636       mmxfct=jsimd_h2v1_extxrgb_merged_upsample_mmx;
    637       break;
    638     default:
    639       sse2fct=jsimd_h2v1_merged_upsample_sse2;
    640       mmxfct=jsimd_h2v1_merged_upsample_mmx;
    641       break;
    642   }
    643 
    644   if ((simd_support & JSIMD_SSE2) &&
    645       IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
    646     sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
    647   else if (simd_support & JSIMD_MMX)
    648     mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
    649 }
    650 
    651 GLOBAL(int)
    652 jsimd_can_convsamp (void)
    653 {
    654   init_simd();
    655 
    656   /* The code is optimised for these values only */
    657   if (DCTSIZE != 8)
    658     return 0;
    659   if (BITS_IN_JSAMPLE != 8)
    660     return 0;
    661   if (sizeof(JDIMENSION) != 4)
    662     return 0;
    663   if (sizeof(DCTELEM) != 2)
    664     return 0;
    665 
    666   if (simd_support & JSIMD_SSE2)
    667     return 1;
    668   if (simd_support & JSIMD_MMX)
    669     return 1;
    670 
    671   return 0;
    672 }
    673 
    674 GLOBAL(int)
    675 jsimd_can_convsamp_float (void)
    676 {
    677   init_simd();
    678 
    679   /* The code is optimised for these values only */
    680   if (DCTSIZE != 8)
    681     return 0;
    682   if (BITS_IN_JSAMPLE != 8)
    683     return 0;
    684   if (sizeof(JDIMENSION) != 4)
    685     return 0;
    686   if (sizeof(FAST_FLOAT) != 4)
    687     return 0;
    688 
    689   if (simd_support & JSIMD_SSE2)
    690     return 1;
    691   if (simd_support & JSIMD_SSE)
    692     return 1;
    693   if (simd_support & JSIMD_3DNOW)
    694     return 1;
    695 
    696   return 0;
    697 }
    698 
    699 GLOBAL(void)
    700 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
    701                 DCTELEM *workspace)
    702 {
    703   if (simd_support & JSIMD_SSE2)
    704     jsimd_convsamp_sse2(sample_data, start_col, workspace);
    705   else if (simd_support & JSIMD_MMX)
    706     jsimd_convsamp_mmx(sample_data, start_col, workspace);
    707 }
    708 
    709 GLOBAL(void)
    710 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
    711                       FAST_FLOAT *workspace)
    712 {
    713   if (simd_support & JSIMD_SSE2)
    714     jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
    715   else if (simd_support & JSIMD_SSE)
    716     jsimd_convsamp_float_sse(sample_data, start_col, workspace);
    717   else if (simd_support & JSIMD_3DNOW)
    718     jsimd_convsamp_float_3dnow(sample_data, start_col, workspace);
    719 }
    720 
    721 GLOBAL(int)
    722 jsimd_can_fdct_islow (void)
    723 {
    724   init_simd();
    725 
    726   /* The code is optimised for these values only */
    727   if (DCTSIZE != 8)
    728     return 0;
    729   if (sizeof(DCTELEM) != 2)
    730     return 0;
    731 
    732   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
    733     return 1;
    734   if (simd_support & JSIMD_MMX)
    735     return 1;
    736 
    737   return 0;
    738 }
    739 
    740 GLOBAL(int)
    741 jsimd_can_fdct_ifast (void)
    742 {
    743   init_simd();
    744 
    745   /* The code is optimised for these values only */
    746   if (DCTSIZE != 8)
    747     return 0;
    748   if (sizeof(DCTELEM) != 2)
    749     return 0;
    750 
    751   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
    752     return 1;
    753   if (simd_support & JSIMD_MMX)
    754     return 1;
    755 
    756   return 0;
    757 }
    758 
    759 GLOBAL(int)
    760 jsimd_can_fdct_float (void)
    761 {
    762   init_simd();
    763 
    764   /* The code is optimised for these values only */
    765   if (DCTSIZE != 8)
    766     return 0;
    767   if (sizeof(FAST_FLOAT) != 4)
    768     return 0;
    769 
    770   if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
    771     return 1;
    772   if (simd_support & JSIMD_3DNOW)
    773     return 1;
    774 
    775   return 0;
    776 }
    777 
    778 GLOBAL(void)
    779 jsimd_fdct_islow (DCTELEM *data)
    780 {
    781   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
    782     jsimd_fdct_islow_sse2(data);
    783   else if (simd_support & JSIMD_MMX)
    784     jsimd_fdct_islow_mmx(data);
    785 }
    786 
    787 GLOBAL(void)
    788 jsimd_fdct_ifast (DCTELEM *data)
    789 {
    790   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
    791     jsimd_fdct_ifast_sse2(data);
    792   else if (simd_support & JSIMD_MMX)
    793     jsimd_fdct_ifast_mmx(data);
    794 }
    795 
    796 GLOBAL(void)
    797 jsimd_fdct_float (FAST_FLOAT *data)
    798 {
    799   if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
    800     jsimd_fdct_float_sse(data);
    801   else if (simd_support & JSIMD_3DNOW)
    802     jsimd_fdct_float_3dnow(data);
    803 }
    804 
    805 GLOBAL(int)
    806 jsimd_can_quantize (void)
    807 {
    808   init_simd();
    809 
    810   /* The code is optimised for these values only */
    811   if (DCTSIZE != 8)
    812     return 0;
    813   if (sizeof(JCOEF) != 2)
    814     return 0;
    815   if (sizeof(DCTELEM) != 2)
    816     return 0;
    817 
    818   if (simd_support & JSIMD_SSE2)
    819     return 1;
    820   if (simd_support & JSIMD_MMX)
    821     return 1;
    822 
    823   return 0;
    824 }
    825 
    826 GLOBAL(int)
    827 jsimd_can_quantize_float (void)
    828 {
    829   init_simd();
    830 
    831   /* The code is optimised for these values only */
    832   if (DCTSIZE != 8)
    833     return 0;
    834   if (sizeof(JCOEF) != 2)
    835     return 0;
    836   if (sizeof(FAST_FLOAT) != 4)
    837     return 0;
    838 
    839   if (simd_support & JSIMD_SSE2)
    840     return 1;
    841   if (simd_support & JSIMD_SSE)
    842     return 1;
    843   if (simd_support & JSIMD_3DNOW)
    844     return 1;
    845 
    846   return 0;
    847 }
    848 
    849 GLOBAL(void)
    850 jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors,
    851                 DCTELEM *workspace)
    852 {
    853   if (simd_support & JSIMD_SSE2)
    854     jsimd_quantize_sse2(coef_block, divisors, workspace);
    855   else if (simd_support & JSIMD_MMX)
    856     jsimd_quantize_mmx(coef_block, divisors, workspace);
    857 }
    858 
    859 GLOBAL(void)
    860 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors,
    861                       FAST_FLOAT *workspace)
    862 {
    863   if (simd_support & JSIMD_SSE2)
    864     jsimd_quantize_float_sse2(coef_block, divisors, workspace);
    865   else if (simd_support & JSIMD_SSE)
    866     jsimd_quantize_float_sse(coef_block, divisors, workspace);
    867   else if (simd_support & JSIMD_3DNOW)
    868     jsimd_quantize_float_3dnow(coef_block, divisors, workspace);
    869 }
    870 
    871 GLOBAL(int)
    872 jsimd_can_idct_2x2 (void)
    873 {
    874   init_simd();
    875 
    876   /* The code is optimised for these values only */
    877   if (DCTSIZE != 8)
    878     return 0;
    879   if (sizeof(JCOEF) != 2)
    880     return 0;
    881   if (BITS_IN_JSAMPLE != 8)
    882     return 0;
    883   if (sizeof(JDIMENSION) != 4)
    884     return 0;
    885   if (sizeof(ISLOW_MULT_TYPE) != 2)
    886     return 0;
    887 
    888   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
    889     return 1;
    890   if (simd_support & JSIMD_MMX)
    891     return 1;
    892 
    893   return 0;
    894 }
    895 
    896 GLOBAL(int)
    897 jsimd_can_idct_4x4 (void)
    898 {
    899   init_simd();
    900 
    901   /* The code is optimised for these values only */
    902   if (DCTSIZE != 8)
    903     return 0;
    904   if (sizeof(JCOEF) != 2)
    905     return 0;
    906   if (BITS_IN_JSAMPLE != 8)
    907     return 0;
    908   if (sizeof(JDIMENSION) != 4)
    909     return 0;
    910   if (sizeof(ISLOW_MULT_TYPE) != 2)
    911     return 0;
    912 
    913   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
    914     return 1;
    915   if (simd_support & JSIMD_MMX)
    916     return 1;
    917 
    918   return 0;
    919 }
    920 
    921 GLOBAL(void)
    922 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
    923                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
    924                 JDIMENSION output_col)
    925 {
    926   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
    927     jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf,
    928                         output_col);
    929   else if (simd_support & JSIMD_MMX)
    930     jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col);
    931 }
    932 
    933 GLOBAL(void)
    934 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
    935                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
    936                 JDIMENSION output_col)
    937 {
    938   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
    939     jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf,
    940                         output_col);
    941   else if (simd_support & JSIMD_MMX)
    942     jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col);
    943 }
    944 
    945 GLOBAL(int)
    946 jsimd_can_idct_islow (void)
    947 {
    948   init_simd();
    949 
    950   /* The code is optimised for these values only */
    951   if (DCTSIZE != 8)
    952     return 0;
    953   if (sizeof(JCOEF) != 2)
    954     return 0;
    955   if (BITS_IN_JSAMPLE != 8)
    956     return 0;
    957   if (sizeof(JDIMENSION) != 4)
    958     return 0;
    959   if (sizeof(ISLOW_MULT_TYPE) != 2)
    960     return 0;
    961 
    962   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
    963     return 1;
    964   if (simd_support & JSIMD_MMX)
    965     return 1;
    966 
    967   return 0;
    968 }
    969 
    970 GLOBAL(int)
    971 jsimd_can_idct_ifast (void)
    972 {
    973   init_simd();
    974 
    975   /* The code is optimised for these values only */
    976   if (DCTSIZE != 8)
    977     return 0;
    978   if (sizeof(JCOEF) != 2)
    979     return 0;
    980   if (BITS_IN_JSAMPLE != 8)
    981     return 0;
    982   if (sizeof(JDIMENSION) != 4)
    983     return 0;
    984   if (sizeof(IFAST_MULT_TYPE) != 2)
    985     return 0;
    986   if (IFAST_SCALE_BITS != 2)
    987     return 0;
    988 
    989   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
    990     return 1;
    991   if (simd_support & JSIMD_MMX)
    992     return 1;
    993 
    994   return 0;
    995 }
    996 
    997 GLOBAL(int)
    998 jsimd_can_idct_float (void)
    999 {
   1000   init_simd();
   1001 
   1002   if (DCTSIZE != 8)
   1003     return 0;
   1004   if (sizeof(JCOEF) != 2)
   1005     return 0;
   1006   if (BITS_IN_JSAMPLE != 8)
   1007     return 0;
   1008   if (sizeof(JDIMENSION) != 4)
   1009     return 0;
   1010   if (sizeof(FAST_FLOAT) != 4)
   1011     return 0;
   1012   if (sizeof(FLOAT_MULT_TYPE) != 4)
   1013     return 0;
   1014 
   1015   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
   1016     return 1;
   1017   if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
   1018     return 1;
   1019   if (simd_support & JSIMD_3DNOW)
   1020     return 1;
   1021 
   1022   return 0;
   1023 }
   1024 
   1025 GLOBAL(void)
   1026 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr,
   1027                   JCOEFPTR coef_block, JSAMPARRAY output_buf,
   1028                   JDIMENSION output_col)
   1029 {
   1030   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
   1031     jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
   1032                           output_col);
   1033   else if (simd_support & JSIMD_MMX)
   1034     jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf,
   1035                          output_col);
   1036 }
   1037 
   1038 GLOBAL(void)
   1039 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr,
   1040                   JCOEFPTR coef_block, JSAMPARRAY output_buf,
   1041                   JDIMENSION output_col)
   1042 {
   1043   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
   1044     jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
   1045                           output_col);
   1046   else if (simd_support & JSIMD_MMX)
   1047     jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf,
   1048                          output_col);
   1049 }
   1050 
   1051 GLOBAL(void)
   1052 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr,
   1053                   JCOEFPTR coef_block, JSAMPARRAY output_buf,
   1054                   JDIMENSION output_col)
   1055 {
   1056   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
   1057     jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
   1058                           output_col);
   1059   else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
   1060     jsimd_idct_float_sse(compptr->dct_table, coef_block, output_buf,
   1061                          output_col);
   1062   else if (simd_support & JSIMD_3DNOW)
   1063     jsimd_idct_float_3dnow(compptr->dct_table, coef_block, output_buf,
   1064                            output_col);
   1065 }
   1066 
   1067 GLOBAL(int)
   1068 jsimd_can_huff_encode_one_block (void)
   1069 {
   1070   init_simd();
   1071 
   1072   if (DCTSIZE != 8)
   1073     return 0;
   1074   if (sizeof(JCOEF) != 2)
   1075     return 0;
   1076 
   1077   if ((simd_support & JSIMD_SSE2) && simd_huffman &&
   1078       IS_ALIGNED_SSE(jconst_huff_encode_one_block))
   1079     return 1;
   1080 
   1081   return 0;
   1082 }
   1083 
   1084 GLOBAL(JOCTET*)
   1085 jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block,
   1086                              int last_dc_val, c_derived_tbl *dctbl,
   1087                              c_derived_tbl *actbl)
   1088 {
   1089   return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val,
   1090                                           dctbl, actbl);
   1091 }
   1092