Home | History | Annotate | Download | only in simd
      1 /*
      2  * jsimd_i386.c
      3  *
      4  * Copyright 2009 Pierre Ossman <ossman (at) cendio.se> for Cendio AB
      5  * Copyright 2009-2011 D. R. Commander
      6  *
      7  * Based on the x86 SIMD extension for IJG JPEG library,
      8  * Copyright (C) 1999-2006, MIYASAKA Masaru.
      9  * For conditions of distribution and use, see copyright notice in jsimdext.inc
     10  *
     11  * This file contains the interface between the "normal" portions
     12  * of the library and the SIMD implementations when running on a
     13  * 32-bit x86 architecture.
     14  */
     15 
     16 #define JPEG_INTERNALS
     17 #include "../jinclude.h"
     18 #include "../jpeglib.h"
     19 #include "../jsimd.h"
     20 #include "../jdct.h"
     21 #include "../jsimddct.h"
     22 #include "jsimd.h"
     23 
     24 /*
     25  * In the PIC cases, we have no guarantee that constants will keep
     26  * their alignment. This macro allows us to verify it at runtime.
     27  */
     28 #define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0)
     29 
     30 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
     31 
     32 static unsigned int simd_support = ~0;
     33 
     34 /*
     35  * Check what SIMD accelerations are supported.
     36  *
     37  * FIXME: This code is racy under a multi-threaded environment.
     38  */
     39 LOCAL(void)
     40 init_simd (void)
     41 {
     42   char *env = NULL;
     43 
     44   if (simd_support != ~0U)
     45     return;
     46 
     47   simd_support = jpeg_simd_cpu_support();
     48 
     49   /* Force different settings through environment variables */
     50   env = getenv("JSIMD_FORCEMMX");
     51   if ((env != NULL) && (strcmp(env, "1") == 0))
     52     simd_support &= JSIMD_MMX;
     53   env = getenv("JSIMD_FORCE3DNOW");
     54   if ((env != NULL) && (strcmp(env, "1") == 0))
     55     simd_support &= JSIMD_3DNOW|JSIMD_MMX;
     56   env = getenv("JSIMD_FORCESSE");
     57   if ((env != NULL) && (strcmp(env, "1") == 0))
     58     simd_support &= JSIMD_SSE|JSIMD_MMX;
     59   env = getenv("JSIMD_FORCESSE2");
     60   if ((env != NULL) && (strcmp(env, "1") == 0))
     61     simd_support &= JSIMD_SSE2;
     62 }
     63 
     64 #ifndef JPEG_DECODE_ONLY
     65 GLOBAL(int)
     66 jsimd_can_rgb_ycc (void)
     67 {
     68   init_simd();
     69 
     70   /* The code is optimised for these values only */
     71   if (BITS_IN_JSAMPLE != 8)
     72     return 0;
     73   if (sizeof(JDIMENSION) != 4)
     74     return 0;
     75   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
     76     return 0;
     77 
     78   if ((simd_support & JSIMD_SSE2) &&
     79       IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
     80     return 1;
     81   if (simd_support & JSIMD_MMX)
     82     return 1;
     83 
     84   return 0;
     85 }
     86 #endif
     87 
     88 GLOBAL(int)
     89 jsimd_can_rgb_gray (void)
     90 {
     91   init_simd();
     92 
     93   /* The code is optimised for these values only */
     94   if (BITS_IN_JSAMPLE != 8)
     95     return 0;
     96   if (sizeof(JDIMENSION) != 4)
     97     return 0;
     98   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
     99     return 0;
    100 
    101   if ((simd_support & JSIMD_SSE2) &&
    102       IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
    103     return 1;
    104   if (simd_support & JSIMD_MMX)
    105     return 1;
    106 
    107   return 0;
    108 }
    109 
    110 GLOBAL(int)
    111 jsimd_can_ycc_rgb (void)
    112 {
    113   init_simd();
    114 
    115   /* The code is optimised for these values only */
    116   if (BITS_IN_JSAMPLE != 8)
    117     return 0;
    118   if (sizeof(JDIMENSION) != 4)
    119     return 0;
    120   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
    121     return 0;
    122 
    123   if ((simd_support & JSIMD_SSE2) &&
    124       IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
    125     return 1;
    126   if (simd_support & JSIMD_MMX)
    127     return 1;
    128 
    129   return 0;
    130 }
    131 
    132 #ifndef JPEG_DECODE_ONLY
    133 GLOBAL(void)
    134 jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
    135                        JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
    136                        JDIMENSION output_row, int num_rows)
    137 {
    138   void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
    139   void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
    140 
    141   switch(cinfo->in_color_space)
    142   {
    143     case JCS_EXT_RGB:
    144       sse2fct=jsimd_extrgb_ycc_convert_sse2;
    145       mmxfct=jsimd_extrgb_ycc_convert_mmx;
    146       break;
    147     case JCS_EXT_RGBX:
    148     case JCS_EXT_RGBA:
    149       sse2fct=jsimd_extrgbx_ycc_convert_sse2;
    150       mmxfct=jsimd_extrgbx_ycc_convert_mmx;
    151       break;
    152     case JCS_EXT_BGR:
    153       sse2fct=jsimd_extbgr_ycc_convert_sse2;
    154       mmxfct=jsimd_extbgr_ycc_convert_mmx;
    155       break;
    156     case JCS_EXT_BGRX:
    157     case JCS_EXT_BGRA:
    158       sse2fct=jsimd_extbgrx_ycc_convert_sse2;
    159       mmxfct=jsimd_extbgrx_ycc_convert_mmx;
    160       break;
    161     case JCS_EXT_XBGR:
    162     case JCS_EXT_ABGR:
    163       sse2fct=jsimd_extxbgr_ycc_convert_sse2;
    164       mmxfct=jsimd_extxbgr_ycc_convert_mmx;
    165       break;
    166     case JCS_EXT_XRGB:
    167     case JCS_EXT_ARGB:
    168       sse2fct=jsimd_extxrgb_ycc_convert_sse2;
    169       mmxfct=jsimd_extxrgb_ycc_convert_mmx;
    170       break;
    171     default:
    172       sse2fct=jsimd_rgb_ycc_convert_sse2;
    173       mmxfct=jsimd_rgb_ycc_convert_mmx;
    174       break;
    175   }
    176 
    177   if ((simd_support & JSIMD_SSE2) &&
    178       IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
    179     sse2fct(cinfo->image_width, input_buf,
    180         output_buf, output_row, num_rows);
    181   else if (simd_support & JSIMD_MMX)
    182     mmxfct(cinfo->image_width, input_buf,
    183         output_buf, output_row, num_rows);
    184 }
    185 #endif
    186 
    187 GLOBAL(void)
    188 jsimd_rgb_gray_convert (j_compress_ptr cinfo,
    189                         JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
    190                         JDIMENSION output_row, int num_rows)
    191 {
    192   void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
    193   void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
    194 
    195   switch(cinfo->in_color_space)
    196   {
    197     case JCS_EXT_RGB:
    198       sse2fct=jsimd_extrgb_gray_convert_sse2;
    199       mmxfct=jsimd_extrgb_gray_convert_mmx;
    200       break;
    201     case JCS_EXT_RGBX:
    202     case JCS_EXT_RGBA:
    203       sse2fct=jsimd_extrgbx_gray_convert_sse2;
    204       mmxfct=jsimd_extrgbx_gray_convert_mmx;
    205       break;
    206     case JCS_EXT_BGR:
    207       sse2fct=jsimd_extbgr_gray_convert_sse2;
    208       mmxfct=jsimd_extbgr_gray_convert_mmx;
    209       break;
    210     case JCS_EXT_BGRX:
    211     case JCS_EXT_BGRA:
    212       sse2fct=jsimd_extbgrx_gray_convert_sse2;
    213       mmxfct=jsimd_extbgrx_gray_convert_mmx;
    214       break;
    215     case JCS_EXT_XBGR:
    216     case JCS_EXT_ABGR:
    217       sse2fct=jsimd_extxbgr_gray_convert_sse2;
    218       mmxfct=jsimd_extxbgr_gray_convert_mmx;
    219       break;
    220     case JCS_EXT_XRGB:
    221     case JCS_EXT_ARGB:
    222       sse2fct=jsimd_extxrgb_gray_convert_sse2;
    223       mmxfct=jsimd_extxrgb_gray_convert_mmx;
    224       break;
    225     default:
    226       sse2fct=jsimd_rgb_gray_convert_sse2;
    227       mmxfct=jsimd_rgb_gray_convert_mmx;
    228       break;
    229   }
    230 
    231   if ((simd_support & JSIMD_SSE2) &&
    232       IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
    233     sse2fct(cinfo->image_width, input_buf,
    234         output_buf, output_row, num_rows);
    235   else if (simd_support & JSIMD_MMX)
    236     mmxfct(cinfo->image_width, input_buf,
    237         output_buf, output_row, num_rows);
    238 }
    239 
    240 GLOBAL(void)
    241 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
    242                        JSAMPIMAGE input_buf, JDIMENSION input_row,
    243                        JSAMPARRAY output_buf, int num_rows)
    244 {
    245   void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
    246   void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
    247 
    248   switch(cinfo->out_color_space)
    249   {
    250     case JCS_EXT_RGB:
    251       sse2fct=jsimd_ycc_extrgb_convert_sse2;
    252       mmxfct=jsimd_ycc_extrgb_convert_mmx;
    253       break;
    254     case JCS_EXT_RGBX:
    255     case JCS_EXT_RGBA:
    256       sse2fct=jsimd_ycc_extrgbx_convert_sse2;
    257       mmxfct=jsimd_ycc_extrgbx_convert_mmx;
    258       break;
    259     case JCS_EXT_BGR:
    260       sse2fct=jsimd_ycc_extbgr_convert_sse2;
    261       mmxfct=jsimd_ycc_extbgr_convert_mmx;
    262       break;
    263     case JCS_EXT_BGRX:
    264     case JCS_EXT_BGRA:
    265       sse2fct=jsimd_ycc_extbgrx_convert_sse2;
    266       mmxfct=jsimd_ycc_extbgrx_convert_mmx;
    267       break;
    268     case JCS_EXT_XBGR:
    269     case JCS_EXT_ABGR:
    270       sse2fct=jsimd_ycc_extxbgr_convert_sse2;
    271       mmxfct=jsimd_ycc_extxbgr_convert_mmx;
    272       break;
    273     case JCS_EXT_XRGB:
    274     case JCS_EXT_ARGB:
    275       sse2fct=jsimd_ycc_extxrgb_convert_sse2;
    276       mmxfct=jsimd_ycc_extxrgb_convert_mmx;
    277       break;
    278     default:
    279       sse2fct=jsimd_ycc_rgb_convert_sse2;
    280       mmxfct=jsimd_ycc_rgb_convert_mmx;
    281       break;
    282   }
    283 
    284   if ((simd_support & JSIMD_SSE2) &&
    285       IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
    286     sse2fct(cinfo->output_width, input_buf,
    287         input_row, output_buf, num_rows);
    288   else if (simd_support & JSIMD_MMX)
    289     mmxfct(cinfo->output_width, input_buf,
    290         input_row, output_buf, num_rows);
    291 }
    292 
    293 #ifndef JPEG_DECODE_ONLY
    294 GLOBAL(int)
    295 jsimd_can_h2v2_downsample (void)
    296 {
    297   init_simd();
    298 
    299   /* The code is optimised for these values only */
    300   if (BITS_IN_JSAMPLE != 8)
    301     return 0;
    302   if (sizeof(JDIMENSION) != 4)
    303     return 0;
    304 
    305   if (simd_support & JSIMD_SSE2)
    306     return 1;
    307   if (simd_support & JSIMD_MMX)
    308     return 1;
    309 
    310   return 0;
    311 }
    312 
    313 GLOBAL(int)
    314 jsimd_can_h2v1_downsample (void)
    315 {
    316   init_simd();
    317 
    318   /* The code is optimised for these values only */
    319   if (BITS_IN_JSAMPLE != 8)
    320     return 0;
    321   if (sizeof(JDIMENSION) != 4)
    322     return 0;
    323 
    324   if (simd_support & JSIMD_SSE2)
    325     return 1;
    326   if (simd_support & JSIMD_MMX)
    327     return 1;
    328 
    329   return 0;
    330 }
    331 
    332 GLOBAL(void)
    333 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
    334                        JSAMPARRAY input_data, JSAMPARRAY output_data)
    335 {
    336   if (simd_support & JSIMD_SSE2)
    337     jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
    338         compptr->v_samp_factor, compptr->width_in_blocks,
    339         input_data, output_data);
    340   else if (simd_support & JSIMD_MMX)
    341     jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
    342         compptr->v_samp_factor, compptr->width_in_blocks,
    343         input_data, output_data);
    344 }
    345 
    346 GLOBAL(void)
    347 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
    348                        JSAMPARRAY input_data, JSAMPARRAY output_data)
    349 {
    350   if (simd_support & JSIMD_SSE2)
    351     jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
    352         compptr->v_samp_factor, compptr->width_in_blocks,
    353         input_data, output_data);
    354   else if (simd_support & JSIMD_MMX)
    355     jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
    356         compptr->v_samp_factor, compptr->width_in_blocks,
    357         input_data, output_data);
    358 }
    359 #endif
    360 
    361 GLOBAL(int)
    362 jsimd_can_h2v2_upsample (void)
    363 {
    364   init_simd();
    365 
    366   /* The code is optimised for these values only */
    367   if (BITS_IN_JSAMPLE != 8)
    368     return 0;
    369   if (sizeof(JDIMENSION) != 4)
    370     return 0;
    371 
    372   if (simd_support & JSIMD_SSE2)
    373     return 1;
    374   if (simd_support & JSIMD_MMX)
    375     return 1;
    376 
    377   return 0;
    378 }
    379 
    380 GLOBAL(int)
    381 jsimd_can_h2v1_upsample (void)
    382 {
    383   init_simd();
    384 
    385   /* The code is optimised for these values only */
    386   if (BITS_IN_JSAMPLE != 8)
    387     return 0;
    388   if (sizeof(JDIMENSION) != 4)
    389     return 0;
    390 
    391   if (simd_support & JSIMD_SSE2)
    392     return 1;
    393   if (simd_support & JSIMD_MMX)
    394     return 1;
    395 
    396   return 0;
    397 }
    398 
    399 GLOBAL(void)
    400 jsimd_h2v2_upsample (j_decompress_ptr cinfo,
    401                      jpeg_component_info * compptr,
    402                      JSAMPARRAY input_data,
    403                      JSAMPARRAY * output_data_ptr)
    404 {
    405   if (simd_support & JSIMD_SSE2)
    406     jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor,
    407         cinfo->output_width, input_data, output_data_ptr);
    408   else if (simd_support & JSIMD_MMX)
    409     jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor,
    410         cinfo->output_width, input_data, output_data_ptr);
    411 }
    412 
    413 GLOBAL(void)
    414 jsimd_h2v1_upsample (j_decompress_ptr cinfo,
    415                      jpeg_component_info * compptr,
    416                      JSAMPARRAY input_data,
    417                      JSAMPARRAY * output_data_ptr)
    418 {
    419   if (simd_support & JSIMD_SSE2)
    420     jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor,
    421         cinfo->output_width, input_data, output_data_ptr);
    422   else if (simd_support & JSIMD_MMX)
    423     jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor,
    424         cinfo->output_width, input_data, output_data_ptr);
    425 }
    426 
    427 GLOBAL(int)
    428 jsimd_can_h2v2_fancy_upsample (void)
    429 {
    430   init_simd();
    431 
    432   /* The code is optimised for these values only */
    433   if (BITS_IN_JSAMPLE != 8)
    434     return 0;
    435   if (sizeof(JDIMENSION) != 4)
    436     return 0;
    437 
    438   if ((simd_support & JSIMD_SSE2) &&
    439       IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
    440     return 1;
    441   if (simd_support & JSIMD_MMX)
    442     return 1;
    443 
    444   return 0;
    445 }
    446 
    447 GLOBAL(int)
    448 jsimd_can_h2v1_fancy_upsample (void)
    449 {
    450   init_simd();
    451 
    452   /* The code is optimised for these values only */
    453   if (BITS_IN_JSAMPLE != 8)
    454     return 0;
    455   if (sizeof(JDIMENSION) != 4)
    456     return 0;
    457 
    458   if ((simd_support & JSIMD_SSE2) &&
    459       IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
    460     return 1;
    461   if (simd_support & JSIMD_MMX)
    462     return 1;
    463 
    464   return 0;
    465 }
    466 
    467 GLOBAL(void)
    468 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
    469                            jpeg_component_info * compptr,
    470                            JSAMPARRAY input_data,
    471                            JSAMPARRAY * output_data_ptr)
    472 {
    473   if ((simd_support & JSIMD_SSE2) &&
    474       IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
    475     jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
    476         compptr->downsampled_width, input_data, output_data_ptr);
    477   else if (simd_support & JSIMD_MMX)
    478     jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor,
    479         compptr->downsampled_width, input_data, output_data_ptr);
    480 }
    481 
    482 GLOBAL(void)
    483 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
    484                            jpeg_component_info * compptr,
    485                            JSAMPARRAY input_data,
    486                            JSAMPARRAY * output_data_ptr)
    487 {
    488   if ((simd_support & JSIMD_SSE2) &&
    489       IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
    490     jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
    491         compptr->downsampled_width, input_data, output_data_ptr);
    492   else if (simd_support & JSIMD_MMX)
    493     jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor,
    494         compptr->downsampled_width, input_data, output_data_ptr);
    495 }
    496 
    497 GLOBAL(int)
    498 jsimd_can_h2v2_merged_upsample (void)
    499 {
    500   init_simd();
    501 
    502   /* The code is optimised for these values only */
    503   if (BITS_IN_JSAMPLE != 8)
    504     return 0;
    505   if (sizeof(JDIMENSION) != 4)
    506     return 0;
    507 
    508   if ((simd_support & JSIMD_SSE2) &&
    509       IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
    510     return 1;
    511   if (simd_support & JSIMD_MMX)
    512     return 1;
    513 
    514   return 0;
    515 }
    516 
    517 GLOBAL(int)
    518 jsimd_can_h2v1_merged_upsample (void)
    519 {
    520   init_simd();
    521 
    522   /* The code is optimised for these values only */
    523   if (BITS_IN_JSAMPLE != 8)
    524     return 0;
    525   if (sizeof(JDIMENSION) != 4)
    526     return 0;
    527 
    528   if ((simd_support & JSIMD_SSE2) &&
    529       IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
    530     return 1;
    531   if (simd_support & JSIMD_MMX)
    532     return 1;
    533 
    534   return 0;
    535 }
    536 
    537 GLOBAL(void)
    538 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
    539                             JSAMPIMAGE input_buf,
    540                             JDIMENSION in_row_group_ctr,
    541                             JSAMPARRAY output_buf)
    542 {
    543   void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
    544   void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
    545 
    546   switch(cinfo->out_color_space)
    547   {
    548     case JCS_EXT_RGB:
    549       sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2;
    550       mmxfct=jsimd_h2v2_extrgb_merged_upsample_mmx;
    551       break;
    552     case JCS_EXT_RGBX:
    553     case JCS_EXT_RGBA:
    554       sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2;
    555       mmxfct=jsimd_h2v2_extrgbx_merged_upsample_mmx;
    556       break;
    557     case JCS_EXT_BGR:
    558       sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2;
    559       mmxfct=jsimd_h2v2_extbgr_merged_upsample_mmx;
    560       break;
    561     case JCS_EXT_BGRX:
    562     case JCS_EXT_BGRA:
    563       sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2;
    564       mmxfct=jsimd_h2v2_extbgrx_merged_upsample_mmx;
    565       break;
    566     case JCS_EXT_XBGR:
    567     case JCS_EXT_ABGR:
    568       sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2;
    569       mmxfct=jsimd_h2v2_extxbgr_merged_upsample_mmx;
    570       break;
    571     case JCS_EXT_XRGB:
    572     case JCS_EXT_ARGB:
    573       sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2;
    574       mmxfct=jsimd_h2v2_extxrgb_merged_upsample_mmx;
    575       break;
    576     default:
    577       sse2fct=jsimd_h2v2_merged_upsample_sse2;
    578       mmxfct=jsimd_h2v2_merged_upsample_mmx;
    579       break;
    580   }
    581 
    582   if ((simd_support & JSIMD_SSE2) &&
    583       IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
    584     sse2fct(cinfo->output_width, input_buf,
    585         in_row_group_ctr, output_buf);
    586   else if (simd_support & JSIMD_MMX)
    587     mmxfct(cinfo->output_width, input_buf,
    588         in_row_group_ctr, output_buf);
    589 }
    590 
    591 GLOBAL(void)
    592 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
    593                             JSAMPIMAGE input_buf,
    594                             JDIMENSION in_row_group_ctr,
    595                             JSAMPARRAY output_buf)
    596 {
    597   void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
    598   void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
    599 
    600   switch(cinfo->out_color_space)
    601   {
    602     case JCS_EXT_RGB:
    603       sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2;
    604       mmxfct=jsimd_h2v1_extrgb_merged_upsample_mmx;
    605       break;
    606     case JCS_EXT_RGBX:
    607     case JCS_EXT_RGBA:
    608       sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2;
    609       mmxfct=jsimd_h2v1_extrgbx_merged_upsample_mmx;
    610       break;
    611     case JCS_EXT_BGR:
    612       sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2;
    613       mmxfct=jsimd_h2v1_extbgr_merged_upsample_mmx;
    614       break;
    615     case JCS_EXT_BGRX:
    616     case JCS_EXT_BGRA:
    617       sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2;
    618       mmxfct=jsimd_h2v1_extbgrx_merged_upsample_mmx;
    619       break;
    620     case JCS_EXT_XBGR:
    621     case JCS_EXT_ABGR:
    622       sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2;
    623       mmxfct=jsimd_h2v1_extxbgr_merged_upsample_mmx;
    624       break;
    625     case JCS_EXT_XRGB:
    626     case JCS_EXT_ARGB:
    627       sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2;
    628       mmxfct=jsimd_h2v1_extxrgb_merged_upsample_mmx;
    629       break;
    630     default:
    631       sse2fct=jsimd_h2v1_merged_upsample_sse2;
    632       mmxfct=jsimd_h2v1_merged_upsample_mmx;
    633       break;
    634   }
    635 
    636   if ((simd_support & JSIMD_SSE2) &&
    637       IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
    638     sse2fct(cinfo->output_width, input_buf,
    639         in_row_group_ctr, output_buf);
    640   else if (simd_support & JSIMD_MMX)
    641     mmxfct(cinfo->output_width, input_buf,
    642         in_row_group_ctr, output_buf);
    643 }
    644 
    645 #ifndef JPEG_DECODE_ONLY
    646 GLOBAL(int)
    647 jsimd_can_convsamp (void)
    648 {
    649   init_simd();
    650 
    651   /* The code is optimised for these values only */
    652   if (DCTSIZE != 8)
    653     return 0;
    654   if (BITS_IN_JSAMPLE != 8)
    655     return 0;
    656   if (sizeof(JDIMENSION) != 4)
    657     return 0;
    658   if (sizeof(DCTELEM) != 2)
    659     return 0;
    660 
    661   if (simd_support & JSIMD_SSE2)
    662     return 1;
    663   if (simd_support & JSIMD_MMX)
    664     return 1;
    665 
    666   return 0;
    667 }
    668 
    669 GLOBAL(int)
    670 jsimd_can_convsamp_float (void)
    671 {
    672   init_simd();
    673 
    674   /* The code is optimised for these values only */
    675   if (DCTSIZE != 8)
    676     return 0;
    677   if (BITS_IN_JSAMPLE != 8)
    678     return 0;
    679   if (sizeof(JDIMENSION) != 4)
    680     return 0;
    681   if (sizeof(FAST_FLOAT) != 4)
    682     return 0;
    683 
    684   if (simd_support & JSIMD_SSE2)
    685     return 1;
    686   if (simd_support & JSIMD_SSE)
    687     return 1;
    688   if (simd_support & JSIMD_3DNOW)
    689     return 1;
    690 
    691   return 0;
    692 }
    693 
    694 GLOBAL(void)
    695 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
    696                 DCTELEM * workspace)
    697 {
    698   if (simd_support & JSIMD_SSE2)
    699     jsimd_convsamp_sse2(sample_data, start_col, workspace);
    700   else if (simd_support & JSIMD_MMX)
    701     jsimd_convsamp_mmx(sample_data, start_col, workspace);
    702 }
    703 
    704 GLOBAL(void)
    705 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
    706                       FAST_FLOAT * workspace)
    707 {
    708   if (simd_support & JSIMD_SSE2)
    709     jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
    710   else if (simd_support & JSIMD_SSE)
    711     jsimd_convsamp_float_sse(sample_data, start_col, workspace);
    712   else if (simd_support & JSIMD_3DNOW)
    713     jsimd_convsamp_float_3dnow(sample_data, start_col, workspace);
    714 }
    715 
    716 GLOBAL(int)
    717 jsimd_can_fdct_islow (void)
    718 {
    719   init_simd();
    720 
    721   /* The code is optimised for these values only */
    722   if (DCTSIZE != 8)
    723     return 0;
    724   if (sizeof(DCTELEM) != 2)
    725     return 0;
    726 
    727   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
    728     return 1;
    729   if (simd_support & JSIMD_MMX)
    730     return 1;
    731 
    732   return 0;
    733 }
    734 
    735 GLOBAL(int)
    736 jsimd_can_fdct_ifast (void)
    737 {
    738   init_simd();
    739 
    740   /* The code is optimised for these values only */
    741   if (DCTSIZE != 8)
    742     return 0;
    743   if (sizeof(DCTELEM) != 2)
    744     return 0;
    745 
    746   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
    747     return 1;
    748   if (simd_support & JSIMD_MMX)
    749     return 1;
    750 
    751   return 0;
    752 }
    753 
    754 GLOBAL(int)
    755 jsimd_can_fdct_float (void)
    756 {
    757   init_simd();
    758 
    759   /* The code is optimised for these values only */
    760   if (DCTSIZE != 8)
    761     return 0;
    762   if (sizeof(FAST_FLOAT) != 4)
    763     return 0;
    764 
    765   if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
    766     return 1;
    767   if (simd_support & JSIMD_3DNOW)
    768     return 1;
    769 
    770   return 0;
    771 }
    772 
    773 GLOBAL(void)
    774 jsimd_fdct_islow (DCTELEM * data)
    775 {
    776   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
    777     jsimd_fdct_islow_sse2(data);
    778   else if (simd_support & JSIMD_MMX)
    779     jsimd_fdct_islow_mmx(data);
    780 }
    781 
    782 GLOBAL(void)
    783 jsimd_fdct_ifast (DCTELEM * data)
    784 {
    785   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
    786     jsimd_fdct_ifast_sse2(data);
    787   else if (simd_support & JSIMD_MMX)
    788     jsimd_fdct_ifast_mmx(data);
    789 }
    790 
    791 GLOBAL(void)
    792 jsimd_fdct_float (FAST_FLOAT * data)
    793 {
    794   if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
    795     jsimd_fdct_float_sse(data);
    796   else if (simd_support & JSIMD_3DNOW)
    797     jsimd_fdct_float_3dnow(data);
    798 }
    799 
    800 GLOBAL(int)
    801 jsimd_can_quantize (void)
    802 {
    803   init_simd();
    804 
    805   /* The code is optimised for these values only */
    806   if (DCTSIZE != 8)
    807     return 0;
    808   if (sizeof(JCOEF) != 2)
    809     return 0;
    810   if (sizeof(DCTELEM) != 2)
    811     return 0;
    812 
    813   if (simd_support & JSIMD_SSE2)
    814     return 1;
    815   if (simd_support & JSIMD_MMX)
    816     return 1;
    817 
    818   return 0;
    819 }
    820 
    821 GLOBAL(int)
    822 jsimd_can_quantize_float (void)
    823 {
    824   init_simd();
    825 
    826   /* The code is optimised for these values only */
    827   if (DCTSIZE != 8)
    828     return 0;
    829   if (sizeof(JCOEF) != 2)
    830     return 0;
    831   if (sizeof(FAST_FLOAT) != 4)
    832     return 0;
    833 
    834   if (simd_support & JSIMD_SSE2)
    835     return 1;
    836   if (simd_support & JSIMD_SSE)
    837     return 1;
    838   if (simd_support & JSIMD_3DNOW)
    839     return 1;
    840 
    841   return 0;
    842 }
    843 
    844 GLOBAL(void)
    845 jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
    846                 DCTELEM * workspace)
    847 {
    848   if (simd_support & JSIMD_SSE2)
    849     jsimd_quantize_sse2(coef_block, divisors, workspace);
    850   else if (simd_support & JSIMD_MMX)
    851     jsimd_quantize_mmx(coef_block, divisors, workspace);
    852 }
    853 
    854 GLOBAL(void)
    855 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
    856                       FAST_FLOAT * workspace)
    857 {
    858   if (simd_support & JSIMD_SSE2)
    859     jsimd_quantize_float_sse2(coef_block, divisors, workspace);
    860   else if (simd_support & JSIMD_SSE)
    861     jsimd_quantize_float_sse(coef_block, divisors, workspace);
    862   else if (simd_support & JSIMD_3DNOW)
    863     jsimd_quantize_float_3dnow(coef_block, divisors, workspace);
    864 }
    865 #endif
    866 
    867 GLOBAL(int)
    868 jsimd_can_idct_2x2 (void)
    869 {
    870   init_simd();
    871 
    872   /* The code is optimised for these values only */
    873   if (DCTSIZE != 8)
    874     return 0;
    875   if (sizeof(JCOEF) != 2)
    876     return 0;
    877   if (BITS_IN_JSAMPLE != 8)
    878     return 0;
    879   if (sizeof(JDIMENSION) != 4)
    880     return 0;
    881   if (sizeof(ISLOW_MULT_TYPE) != 2)
    882     return 0;
    883 
    884   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
    885     return 1;
    886   if (simd_support & JSIMD_MMX)
    887     return 1;
    888 
    889   return 0;
    890 }
    891 
    892 GLOBAL(int)
    893 jsimd_can_idct_4x4 (void)
    894 {
    895   init_simd();
    896 
    897   /* The code is optimised for these values only */
    898   if (DCTSIZE != 8)
    899     return 0;
    900   if (sizeof(JCOEF) != 2)
    901     return 0;
    902   if (BITS_IN_JSAMPLE != 8)
    903     return 0;
    904   if (sizeof(JDIMENSION) != 4)
    905     return 0;
    906   if (sizeof(ISLOW_MULT_TYPE) != 2)
    907     return 0;
    908 
    909   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
    910     return 1;
    911   if (simd_support & JSIMD_MMX)
    912     return 1;
    913 
    914   return 0;
    915 }
    916 
    917 GLOBAL(void)
    918 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
    919                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
    920                 JDIMENSION output_col)
    921 {
    922   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
    923     jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
    924   else if (simd_support & JSIMD_MMX)
    925     jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col);
    926 }
    927 
    928 GLOBAL(void)
    929 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
    930                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
    931                 JDIMENSION output_col)
    932 {
    933   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
    934     jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
    935   else if (simd_support & JSIMD_MMX)
    936     jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col);
    937 }
    938 
    939 GLOBAL(int)
    940 jsimd_can_idct_islow (void)
    941 {
    942   init_simd();
    943 
    944   /* The code is optimised for these values only */
    945   if (DCTSIZE != 8)
    946     return 0;
    947   if (sizeof(JCOEF) != 2)
    948     return 0;
    949   if (BITS_IN_JSAMPLE != 8)
    950     return 0;
    951   if (sizeof(JDIMENSION) != 4)
    952     return 0;
    953   if (sizeof(ISLOW_MULT_TYPE) != 2)
    954     return 0;
    955 
    956   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
    957     return 1;
    958   if (simd_support & JSIMD_MMX)
    959     return 1;
    960 
    961   return 0;
    962 }
    963 
    964 GLOBAL(int)
    965 jsimd_can_idct_ifast (void)
    966 {
    967   init_simd();
    968 
    969   /* The code is optimised for these values only */
    970   if (DCTSIZE != 8)
    971     return 0;
    972   if (sizeof(JCOEF) != 2)
    973     return 0;
    974   if (BITS_IN_JSAMPLE != 8)
    975     return 0;
    976   if (sizeof(JDIMENSION) != 4)
    977     return 0;
    978   if (sizeof(IFAST_MULT_TYPE) != 2)
    979     return 0;
    980   if (IFAST_SCALE_BITS != 2)
    981     return 0;
    982 
    983   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
    984     return 1;
    985   if (simd_support & JSIMD_MMX)
    986     return 1;
    987 
    988   return 0;
    989 }
    990 
    991 GLOBAL(int)
    992 jsimd_can_idct_float (void)
    993 {
    994   init_simd();
    995 
    996   if (DCTSIZE != 8)
    997     return 0;
    998   if (sizeof(JCOEF) != 2)
    999     return 0;
   1000   if (BITS_IN_JSAMPLE != 8)
   1001     return 0;
   1002   if (sizeof(JDIMENSION) != 4)
   1003     return 0;
   1004   if (sizeof(FAST_FLOAT) != 4)
   1005     return 0;
   1006   if (sizeof(FLOAT_MULT_TYPE) != 4)
   1007     return 0;
   1008 
   1009   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
   1010     return 1;
   1011   if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
   1012     return 1;
   1013   if (simd_support & JSIMD_3DNOW)
   1014     return 1;
   1015 
   1016   return 0;
   1017 }
   1018 
   1019 GLOBAL(void)
   1020 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
   1021                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
   1022                 JDIMENSION output_col)
   1023 {
   1024   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
   1025     jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, output_col);
   1026   else if (simd_support & JSIMD_MMX)
   1027     jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf, output_col);
   1028 }
   1029 
   1030 GLOBAL(void)
   1031 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
   1032                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
   1033                 JDIMENSION output_col)
   1034 {
   1035   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
   1036     jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, output_col);
   1037   else if (simd_support & JSIMD_MMX)
   1038     jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf, output_col);
   1039 }
   1040 
   1041 GLOBAL(void)
   1042 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
   1043                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
   1044                 JDIMENSION output_col)
   1045 {
   1046   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
   1047     jsimd_idct_float_sse2(compptr->dct_table, coef_block,
   1048         output_buf, output_col);
   1049   else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
   1050     jsimd_idct_float_sse(compptr->dct_table, coef_block,
   1051         output_buf, output_col);
   1052   else if (simd_support & JSIMD_3DNOW)
   1053     jsimd_idct_float_3dnow(compptr->dct_table, coef_block,
   1054         output_buf, output_col);
   1055 }
   1056