Home | History | Annotate | Download | only in simd
      1 /*
      2  * jsimd_arm64.c
      3  *
      4  * Copyright 2009 Pierre Ossman <ossman (at) cendio.se> for Cendio AB
      5  * Copyright 2009-2011, 2013-2014 D. R. Commander
      6  *
      7  * Based on the x86 SIMD extension for IJG JPEG library,
      8  * Copyright (C) 1999-2006, MIYASAKA Masaru.
      9  * For conditions of distribution and use, see copyright notice in jsimdext.inc
     10  *
     11  * This file contains the interface between the "normal" portions
     12  * of the library and the SIMD implementations when running on a
     13  * 64-bit ARM architecture.
     14  */
     15 
     16 #define JPEG_INTERNALS
     17 #include "../jinclude.h"
     18 #include "../jpeglib.h"
     19 #include "../jsimd.h"
     20 #include "../jdct.h"
     21 #include "../jsimddct.h"
     22 #include "jsimd.h"
     23 
     24 #include <stdio.h>
     25 #include <string.h>
     26 #include <ctype.h>
     27 
     28 static unsigned int simd_support = ~0;
     29 
     30 /*
     31  * Check what SIMD accelerations are supported.
     32  *
     33  * FIXME: This code is racy under a multi-threaded environment.
     34  */
     35 
     36 /*
     37  * ARMv8 architectures support NEON extensions by default.
     38  * It is no longer optional as it was with ARMv7.
     39  */
     40 
     41 
     42 LOCAL(void)
     43 init_simd (void)
     44 {
     45   char *env = NULL;
     46 
     47   if (simd_support != ~0U)
     48     return;
     49 
     50   simd_support = 0;
     51 
     52   simd_support |= JSIMD_ARM_NEON;
     53 
     54   /* Force different settings through environment variables */
     55   env = getenv("JSIMD_FORCENEON");
     56   if ((env != NULL) && (strcmp(env, "1") == 0))
     57     simd_support &= JSIMD_ARM_NEON;
     58   env = getenv("JSIMD_FORCENONE");
     59   if ((env != NULL) && (strcmp(env, "1") == 0))
     60     simd_support = 0;
     61 }
     62 
     63 GLOBAL(int)
     64 jsimd_can_rgb_ycc (void)
     65 {
     66   init_simd();
     67 
     68   return 0;
     69 }
     70 
     71 GLOBAL(int)
     72 jsimd_can_rgb_gray (void)
     73 {
     74   init_simd();
     75 
     76   return 0;
     77 }
     78 
     79 GLOBAL(int)
     80 jsimd_can_ycc_rgb (void)
     81 {
     82   init_simd();
     83 
     84   /* The code is optimised for these values only */
     85   if (BITS_IN_JSAMPLE != 8)
     86     return 0;
     87   if (sizeof(JDIMENSION) != 4)
     88     return 0;
     89   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
     90     return 0;
     91 
     92   if (simd_support & JSIMD_ARM_NEON)
     93     return 1;
     94 
     95   return 0;
     96 }
     97 
     98 GLOBAL(int)
     99 jsimd_can_ycc_rgb565 (void)
    100 {
    101   init_simd();
    102 
    103   /* The code is optimised for these values only */
    104   if (BITS_IN_JSAMPLE != 8)
    105     return 0;
    106   if (sizeof(JDIMENSION) != 4)
    107     return 0;
    108 
    109   if (simd_support & JSIMD_ARM_NEON)
    110     return 1;
    111 
    112   return 0;
    113 }
    114 
    115 GLOBAL(void)
    116 jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
    117                        JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
    118                        JDIMENSION output_row, int num_rows)
    119 {
    120 }
    121 
    122 GLOBAL(void)
    123 jsimd_rgb_gray_convert (j_compress_ptr cinfo,
    124                         JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
    125                         JDIMENSION output_row, int num_rows)
    126 {
    127 }
    128 
    129 GLOBAL(void)
    130 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
    131                        JSAMPIMAGE input_buf, JDIMENSION input_row,
    132                        JSAMPARRAY output_buf, int num_rows)
    133 {
    134   void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
    135 
    136   switch(cinfo->out_color_space) {
    137     case JCS_EXT_RGB:
    138       neonfct=jsimd_ycc_extrgb_convert_neon;
    139       break;
    140     case JCS_EXT_RGBX:
    141     case JCS_EXT_RGBA:
    142       neonfct=jsimd_ycc_extrgbx_convert_neon;
    143       break;
    144     case JCS_EXT_BGR:
    145       neonfct=jsimd_ycc_extbgr_convert_neon;
    146       break;
    147     case JCS_EXT_BGRX:
    148     case JCS_EXT_BGRA:
    149       neonfct=jsimd_ycc_extbgrx_convert_neon;
    150       break;
    151     case JCS_EXT_XBGR:
    152     case JCS_EXT_ABGR:
    153       neonfct=jsimd_ycc_extxbgr_convert_neon;
    154       break;
    155     case JCS_EXT_XRGB:
    156     case JCS_EXT_ARGB:
    157       neonfct=jsimd_ycc_extxrgb_convert_neon;
    158       break;
    159     default:
    160       neonfct=jsimd_ycc_extrgb_convert_neon;
    161       break;
    162   }
    163 
    164   if (simd_support & JSIMD_ARM_NEON)
    165     neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
    166 }
    167 
    168 GLOBAL(void)
    169 jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo,
    170                           JSAMPIMAGE input_buf, JDIMENSION input_row,
    171                           JSAMPARRAY output_buf, int num_rows)
    172 {
    173   if (simd_support & JSIMD_ARM_NEON)
    174     jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row,
    175                                   output_buf, num_rows);
    176 }
    177 
    178 GLOBAL(int)
    179 jsimd_can_h2v2_downsample (void)
    180 {
    181   init_simd();
    182 
    183   return 0;
    184 }
    185 
    186 GLOBAL(int)
    187 jsimd_can_h2v1_downsample (void)
    188 {
    189   init_simd();
    190 
    191   return 0;
    192 }
    193 
    194 GLOBAL(void)
    195 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
    196                        JSAMPARRAY input_data, JSAMPARRAY output_data)
    197 {
    198 }
    199 
    200 GLOBAL(void)
    201 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
    202                        JSAMPARRAY input_data, JSAMPARRAY output_data)
    203 {
    204 }
    205 
    206 GLOBAL(int)
    207 jsimd_can_h2v2_upsample (void)
    208 {
    209   init_simd();
    210 
    211   return 0;
    212 }
    213 
    214 GLOBAL(int)
    215 jsimd_can_h2v1_upsample (void)
    216 {
    217   init_simd();
    218 
    219   return 0;
    220 }
    221 
    222 GLOBAL(void)
    223 jsimd_h2v2_upsample (j_decompress_ptr cinfo,
    224                      jpeg_component_info * compptr,
    225                      JSAMPARRAY input_data,
    226                      JSAMPARRAY * output_data_ptr)
    227 {
    228 }
    229 
    230 GLOBAL(void)
    231 jsimd_h2v1_upsample (j_decompress_ptr cinfo,
    232                      jpeg_component_info * compptr,
    233                      JSAMPARRAY input_data,
    234                      JSAMPARRAY * output_data_ptr)
    235 {
    236 }
    237 
    238 GLOBAL(int)
    239 jsimd_can_h2v2_fancy_upsample (void)
    240 {
    241   init_simd();
    242 
    243   return 0;
    244 }
    245 
    246 GLOBAL(int)
    247 jsimd_can_h2v1_fancy_upsample (void)
    248 {
    249   init_simd();
    250 
    251   return 0;
    252 }
    253 
    254 GLOBAL(void)
    255 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
    256                            jpeg_component_info * compptr,
    257                            JSAMPARRAY input_data,
    258                            JSAMPARRAY * output_data_ptr)
    259 {
    260 }
    261 
    262 GLOBAL(void)
    263 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
    264                            jpeg_component_info * compptr,
    265                            JSAMPARRAY input_data,
    266                            JSAMPARRAY * output_data_ptr)
    267 {
    268 }
    269 
    270 GLOBAL(int)
    271 jsimd_can_h2v2_merged_upsample (void)
    272 {
    273   init_simd();
    274 
    275   return 0;
    276 }
    277 
    278 GLOBAL(int)
    279 jsimd_can_h2v1_merged_upsample (void)
    280 {
    281   init_simd();
    282 
    283   return 0;
    284 }
    285 
    286 GLOBAL(void)
    287 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
    288                             JSAMPIMAGE input_buf,
    289                             JDIMENSION in_row_group_ctr,
    290                             JSAMPARRAY output_buf)
    291 {
    292 }
    293 
    294 GLOBAL(void)
    295 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
    296                             JSAMPIMAGE input_buf,
    297                             JDIMENSION in_row_group_ctr,
    298                             JSAMPARRAY output_buf)
    299 {
    300 }
    301 
    302 GLOBAL(int)
    303 jsimd_can_convsamp (void)
    304 {
    305   init_simd();
    306 
    307   return 0;
    308 }
    309 
    310 GLOBAL(int)
    311 jsimd_can_convsamp_float (void)
    312 {
    313   init_simd();
    314 
    315   return 0;
    316 }
    317 
    318 GLOBAL(void)
    319 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
    320                 DCTELEM * workspace)
    321 {
    322 }
    323 
    324 GLOBAL(void)
    325 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
    326                       FAST_FLOAT * workspace)
    327 {
    328 }
    329 
    330 GLOBAL(int)
    331 jsimd_can_fdct_islow (void)
    332 {
    333   init_simd();
    334 
    335   return 0;
    336 }
    337 
    338 GLOBAL(int)
    339 jsimd_can_fdct_ifast (void)
    340 {
    341   init_simd();
    342 
    343   return 0;
    344 }
    345 
    346 GLOBAL(int)
    347 jsimd_can_fdct_float (void)
    348 {
    349   init_simd();
    350 
    351   return 0;
    352 }
    353 
    354 GLOBAL(void)
    355 jsimd_fdct_islow (DCTELEM * data)
    356 {
    357 }
    358 
    359 GLOBAL(void)
    360 jsimd_fdct_ifast (DCTELEM * data)
    361 {
    362 }
    363 
    364 GLOBAL(void)
    365 jsimd_fdct_float (FAST_FLOAT * data)
    366 {
    367 }
    368 
    369 GLOBAL(int)
    370 jsimd_can_quantize (void)
    371 {
    372   init_simd();
    373 
    374   return 0;
    375 }
    376 
    377 GLOBAL(int)
    378 jsimd_can_quantize_float (void)
    379 {
    380   init_simd();
    381 
    382   return 0;
    383 }
    384 
    385 GLOBAL(void)
    386 jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
    387                 DCTELEM * workspace)
    388 {
    389 }
    390 
    391 GLOBAL(void)
    392 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
    393                       FAST_FLOAT * workspace)
    394 {
    395 }
    396 
    397 GLOBAL(int)
    398 jsimd_can_idct_2x2 (void)
    399 {
    400   init_simd();
    401 
    402   /* The code is optimised for these values only */
    403   if (DCTSIZE != 8)
    404     return 0;
    405   if (sizeof(JCOEF) != 2)
    406     return 0;
    407   if (BITS_IN_JSAMPLE != 8)
    408     return 0;
    409   if (sizeof(JDIMENSION) != 4)
    410     return 0;
    411   if (sizeof(ISLOW_MULT_TYPE) != 2)
    412     return 0;
    413 
    414   if (simd_support & JSIMD_ARM_NEON)
    415     return 1;
    416 
    417   return 0;
    418 }
    419 
    420 GLOBAL(int)
    421 jsimd_can_idct_4x4 (void)
    422 {
    423   init_simd();
    424 
    425   /* The code is optimised for these values only */
    426   if (DCTSIZE != 8)
    427     return 0;
    428   if (sizeof(JCOEF) != 2)
    429     return 0;
    430   if (BITS_IN_JSAMPLE != 8)
    431     return 0;
    432   if (sizeof(JDIMENSION) != 4)
    433     return 0;
    434   if (sizeof(ISLOW_MULT_TYPE) != 2)
    435     return 0;
    436 
    437   if (simd_support & JSIMD_ARM_NEON)
    438     return 1;
    439 
    440   return 0;
    441 }
    442 
    443 GLOBAL(void)
    444 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
    445                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
    446                 JDIMENSION output_col)
    447 {
    448   if (simd_support & JSIMD_ARM_NEON)
    449     jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf,
    450                         output_col);
    451 }
    452 
    453 GLOBAL(void)
    454 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
    455                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
    456                 JDIMENSION output_col)
    457 {
    458   if (simd_support & JSIMD_ARM_NEON)
    459     jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf,
    460                         output_col);
    461 }
    462 
    463 GLOBAL(int)
    464 jsimd_can_idct_islow (void)
    465 {
    466   init_simd();
    467 
    468   /* The code is optimised for these values only */
    469   if (DCTSIZE != 8)
    470     return 0;
    471   if (sizeof(JCOEF) != 2)
    472     return 0;
    473   if (BITS_IN_JSAMPLE != 8)
    474     return 0;
    475   if (sizeof(JDIMENSION) != 4)
    476     return 0;
    477   if (sizeof(ISLOW_MULT_TYPE) != 2)
    478     return 0;
    479 
    480   if (simd_support & JSIMD_ARM_NEON)
    481     return 1;
    482 
    483   return 0;
    484 }
    485 
    486 GLOBAL(int)
    487 jsimd_can_idct_ifast (void)
    488 {
    489   init_simd();
    490 
    491   /* The code is optimised for these values only */
    492   if (DCTSIZE != 8)
    493     return 0;
    494   if (sizeof(JCOEF) != 2)
    495     return 0;
    496   if (BITS_IN_JSAMPLE != 8)
    497     return 0;
    498   if (sizeof(JDIMENSION) != 4)
    499     return 0;
    500   if (sizeof(IFAST_MULT_TYPE) != 2)
    501     return 0;
    502   if (IFAST_SCALE_BITS != 2)
    503     return 0;
    504 
    505   if (simd_support & JSIMD_ARM_NEON)
    506     return 1;
    507 
    508   return 0;
    509 }
    510 
    511 GLOBAL(int)
    512 jsimd_can_idct_float (void)
    513 {
    514   init_simd();
    515 
    516   return 0;
    517 }
    518 
    519 GLOBAL(void)
    520 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
    521                   JCOEFPTR coef_block, JSAMPARRAY output_buf,
    522                   JDIMENSION output_col)
    523 {
    524   if (simd_support & JSIMD_ARM_NEON)
    525     jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf,
    526                           output_col);
    527 }
    528 
    529 GLOBAL(void)
    530 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
    531                   JCOEFPTR coef_block, JSAMPARRAY output_buf,
    532                   JDIMENSION output_col)
    533 {
    534   if (simd_support & JSIMD_ARM_NEON)
    535     jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf,
    536                           output_col);
    537 }
    538 
    539 GLOBAL(void)
    540 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
    541                   JCOEFPTR coef_block, JSAMPARRAY output_buf,
    542                   JDIMENSION output_col)
    543 {
    544 }
    545