Home | History | Annotate | Download | only in simd
      1 /*
      2  * jsimd_arm.c
      3  *
      4  * Copyright 2009 Pierre Ossman <ossman (at) cendio.se> for Cendio AB
      5  * Copyright 2009-2011 D. R. Commander
      6  *
      7  * Based on the x86 SIMD extension for IJG JPEG library,
      8  * Copyright (C) 1999-2006, MIYASAKA Masaru.
      9  * For conditions of distribution and use, see copyright notice in jsimdext.inc
     10  *
     11  * This file contains the interface between the "normal" portions
     12  * of the library and the SIMD implementations when running on
     13  * ARM architecture.
     14  *
     15  * Based on the stubs from 'jsimd_none.c'
     16  */
     17 
     18 #define JPEG_INTERNALS
     19 #include "../jinclude.h"
     20 #include "../jpeglib.h"
     21 #include "../jsimd.h"
     22 #include "../jdct.h"
     23 #include "../jsimddct.h"
     24 #include "jsimd.h"
     25 
     26 #include <stdio.h>
     27 #include <string.h>
     28 #include <ctype.h>
     29 
     30 static unsigned int simd_support = ~0;
     31 
     32 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
     33 
     34 #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
     35 
     36 LOCAL(int)
     37 check_feature (char *buffer, char *feature)
     38 {
     39   char *p;
     40   if (*feature == 0)
     41     return 0;
     42   if (strncmp(buffer, "Features", 8) != 0)
     43     return 0;
     44   buffer += 8;
     45   while (isspace(*buffer))
     46     buffer++;
     47 
     48   /* Check if 'feature' is present in the buffer as a separate word */
     49   while ((p = strstr(buffer, feature))) {
     50     if (p > buffer && !isspace(*(p - 1))) {
     51       buffer++;
     52       continue;
     53     }
     54     p += strlen(feature);
     55     if (*p != 0 && !isspace(*p)) {
     56       buffer++;
     57       continue;
     58     }
     59     return 1;
     60   }
     61   return 0;
     62 }
     63 
     64 LOCAL(int)
     65 parse_proc_cpuinfo (int bufsize)
     66 {
     67   char *buffer = (char *)malloc(bufsize);
     68   FILE *fd;
     69   simd_support = 0;
     70 
     71   if (!buffer)
     72     return 0;
     73 
     74   fd = fopen("/proc/cpuinfo", "r");
     75   if (fd) {
     76     while (fgets(buffer, bufsize, fd)) {
     77       if (!strchr(buffer, '\n') && !feof(fd)) {
     78         /* "impossible" happened - insufficient size of the buffer! */
     79         fclose(fd);
     80         free(buffer);
     81         return 0;
     82       }
     83       if (check_feature(buffer, "neon"))
     84         simd_support |= JSIMD_ARM_NEON;
     85     }
     86     fclose(fd);
     87   }
     88   free(buffer);
     89   return 1;
     90 }
     91 
     92 #endif
     93 
     94 /*
     95  * Check what SIMD accelerations are supported.
     96  *
     97  * FIXME: This code is racy under a multi-threaded environment.
     98  */
     99 LOCAL(void)
    100 init_simd (void)
    101 {
    102   char *env = NULL;
    103 #if !defined(__ARM_NEON__) && defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
    104   int bufsize = 1024; /* an initial guess for the line buffer size limit */
    105 #endif
    106 
    107   if (simd_support != ~0U)
    108     return;
    109 
    110   simd_support = 0;
    111 
    112 #if defined(__ARM_NEON__)
    113   simd_support |= JSIMD_ARM_NEON;
    114 #elif defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
    115   /* We still have a chance to use NEON regardless of globally used
    116    * -mcpu/-mfpu options passed to gcc by performing runtime detection via
    117    * /proc/cpuinfo parsing on linux/android */
    118   while (!parse_proc_cpuinfo(bufsize)) {
    119     bufsize *= 2;
    120     if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT)
    121       break;
    122   }
    123 #endif
    124 
    125   /* Force different settings through environment variables */
    126   env = getenv("JSIMD_FORCE_ARM_NEON");
    127   if ((env != NULL) && (strcmp(env, "1") == 0))
    128     simd_support &= JSIMD_ARM_NEON;
    129   env = getenv("JSIMD_FORCE_NO_SIMD");
    130   if ((env != NULL) && (strcmp(env, "1") == 0))
    131     simd_support = 0;
    132 }
    133 
    134 GLOBAL(int)
    135 jsimd_can_rgb_ycc (void)
    136 {
    137   init_simd();
    138 
    139   /* The code is optimised for these values only */
    140   if (BITS_IN_JSAMPLE != 8)
    141     return 0;
    142   if (sizeof(JDIMENSION) != 4)
    143     return 0;
    144   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
    145     return 0;
    146 
    147   if (simd_support & JSIMD_ARM_NEON)
    148     return 1;
    149 
    150   return 0;
    151 }
    152 
    153 GLOBAL(int)
    154 jsimd_can_rgb_gray (void)
    155 {
    156   init_simd();
    157 
    158   return 0;
    159 }
    160 
    161 GLOBAL(int)
    162 jsimd_can_ycc_rgb (void)
    163 {
    164   init_simd();
    165 
    166   /* The code is optimised for these values only */
    167   if (BITS_IN_JSAMPLE != 8)
    168     return 0;
    169   if (sizeof(JDIMENSION) != 4)
    170     return 0;
    171   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
    172     return 0;
    173   if (simd_support & JSIMD_ARM_NEON)
    174     return 1;
    175 
    176   return 0;
    177 }
    178 
    179 GLOBAL(void)
    180 jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
    181                        JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
    182                        JDIMENSION output_row, int num_rows)
    183 {
    184   void (*neonfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
    185 
    186   switch(cinfo->in_color_space)
    187   {
    188     case JCS_EXT_RGB:
    189       neonfct=jsimd_extrgb_ycc_convert_neon;
    190       break;
    191     case JCS_EXT_RGBX:
    192     case JCS_EXT_RGBA:
    193       neonfct=jsimd_extrgbx_ycc_convert_neon;
    194       break;
    195     case JCS_EXT_BGR:
    196       neonfct=jsimd_extbgr_ycc_convert_neon;
    197       break;
    198     case JCS_EXT_BGRX:
    199     case JCS_EXT_BGRA:
    200       neonfct=jsimd_extbgrx_ycc_convert_neon;
    201       break;
    202     case JCS_EXT_XBGR:
    203     case JCS_EXT_ABGR:
    204       neonfct=jsimd_extxbgr_ycc_convert_neon;
    205       break;
    206     case JCS_EXT_XRGB:
    207     case JCS_EXT_ARGB:
    208       neonfct=jsimd_extxrgb_ycc_convert_neon;
    209       break;
    210     default:
    211       neonfct=jsimd_extrgb_ycc_convert_neon;
    212       break;
    213   }
    214 
    215   if (simd_support & JSIMD_ARM_NEON)
    216     neonfct(cinfo->image_width, input_buf,
    217         output_buf, output_row, num_rows);
    218 }
    219 
    220 GLOBAL(void)
    221 jsimd_rgb_gray_convert (j_compress_ptr cinfo,
    222                         JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
    223                         JDIMENSION output_row, int num_rows)
    224 {
    225 }
    226 
    227 GLOBAL(void)
    228 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
    229                        JSAMPIMAGE input_buf, JDIMENSION input_row,
    230                        JSAMPARRAY output_buf, int num_rows)
    231 {
    232   void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
    233 
    234   switch(cinfo->out_color_space)
    235   {
    236     case JCS_EXT_RGB:
    237       neonfct=jsimd_ycc_extrgb_convert_neon;
    238       break;
    239     case JCS_EXT_RGBX:
    240     case JCS_EXT_RGBA:
    241       neonfct=jsimd_ycc_extrgbx_convert_neon;
    242       break;
    243     case JCS_EXT_BGR:
    244       neonfct=jsimd_ycc_extbgr_convert_neon;
    245       break;
    246     case JCS_EXT_BGRX:
    247     case JCS_EXT_BGRA:
    248       neonfct=jsimd_ycc_extbgrx_convert_neon;
    249       break;
    250     case JCS_EXT_XBGR:
    251     case JCS_EXT_ABGR:
    252       neonfct=jsimd_ycc_extxbgr_convert_neon;
    253       break;
    254     case JCS_EXT_XRGB:
    255     case JCS_EXT_ARGB:
    256       neonfct=jsimd_ycc_extxrgb_convert_neon;
    257       break;
    258   default:
    259       neonfct=jsimd_ycc_extrgb_convert_neon;
    260       break;
    261   }
    262 
    263   if (simd_support & JSIMD_ARM_NEON)
    264     neonfct(cinfo->output_width, input_buf,
    265         input_row, output_buf, num_rows);
    266 }
    267 
    268 GLOBAL(int)
    269 jsimd_can_h2v2_downsample (void)
    270 {
    271   init_simd();
    272 
    273   return 0;
    274 }
    275 
    276 GLOBAL(int)
    277 jsimd_can_h2v1_downsample (void)
    278 {
    279   init_simd();
    280 
    281   return 0;
    282 }
    283 
    284 GLOBAL(void)
    285 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
    286                        JSAMPARRAY input_data, JSAMPARRAY output_data)
    287 {
    288 }
    289 
    290 GLOBAL(void)
    291 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
    292                        JSAMPARRAY input_data, JSAMPARRAY output_data)
    293 {
    294 }
    295 
    296 GLOBAL(int)
    297 jsimd_can_h2v2_upsample (void)
    298 {
    299   init_simd();
    300 
    301   return 0;
    302 }
    303 
    304 GLOBAL(int)
    305 jsimd_can_h2v1_upsample (void)
    306 {
    307   init_simd();
    308 
    309   return 0;
    310 }
    311 
    312 GLOBAL(void)
    313 jsimd_h2v2_upsample (j_decompress_ptr cinfo,
    314                      jpeg_component_info * compptr,
    315                      JSAMPARRAY input_data,
    316                      JSAMPARRAY * output_data_ptr)
    317 {
    318 }
    319 
    320 GLOBAL(void)
    321 jsimd_h2v1_upsample (j_decompress_ptr cinfo,
    322                      jpeg_component_info * compptr,
    323                      JSAMPARRAY input_data,
    324                      JSAMPARRAY * output_data_ptr)
    325 {
    326 }
    327 
    328 GLOBAL(int)
    329 jsimd_can_h2v2_fancy_upsample (void)
    330 {
    331   init_simd();
    332 
    333   return 0;
    334 }
    335 
    336 GLOBAL(int)
    337 jsimd_can_h2v1_fancy_upsample (void)
    338 {
    339   init_simd();
    340 
    341   /* The code is optimised for these values only */
    342   if (BITS_IN_JSAMPLE != 8)
    343     return 0;
    344   if (sizeof(JDIMENSION) != 4)
    345     return 0;
    346 
    347   if (simd_support & JSIMD_ARM_NEON)
    348     return 1;
    349 
    350   return 0;
    351 }
    352 
    353 GLOBAL(void)
    354 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
    355                            jpeg_component_info * compptr,
    356                            JSAMPARRAY input_data,
    357                            JSAMPARRAY * output_data_ptr)
    358 {
    359 }
    360 
    361 GLOBAL(void)
    362 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
    363                            jpeg_component_info * compptr,
    364                            JSAMPARRAY input_data,
    365                            JSAMPARRAY * output_data_ptr)
    366 {
    367   if (simd_support & JSIMD_ARM_NEON)
    368     jsimd_h2v1_fancy_upsample_neon(cinfo->max_v_samp_factor,
    369         compptr->downsampled_width, input_data, output_data_ptr);
    370 }
    371 
    372 GLOBAL(int)
    373 jsimd_can_h2v2_merged_upsample (void)
    374 {
    375   init_simd();
    376 
    377   return 0;
    378 }
    379 
    380 GLOBAL(int)
    381 jsimd_can_h2v1_merged_upsample (void)
    382 {
    383   init_simd();
    384 
    385   return 0;
    386 }
    387 
    388 GLOBAL(void)
    389 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
    390                             JSAMPIMAGE input_buf,
    391                             JDIMENSION in_row_group_ctr,
    392                             JSAMPARRAY output_buf)
    393 {
    394 }
    395 
    396 GLOBAL(void)
    397 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
    398                             JSAMPIMAGE input_buf,
    399                             JDIMENSION in_row_group_ctr,
    400                             JSAMPARRAY output_buf)
    401 {
    402 }
    403 
    404 GLOBAL(int)
    405 jsimd_can_convsamp (void)
    406 {
    407   init_simd();
    408 
    409   /* The code is optimised for these values only */
    410   if (DCTSIZE != 8)
    411     return 0;
    412   if (BITS_IN_JSAMPLE != 8)
    413     return 0;
    414   if (sizeof(JDIMENSION) != 4)
    415     return 0;
    416   if (sizeof(DCTELEM) != 2)
    417     return 0;
    418 
    419   if (simd_support & JSIMD_ARM_NEON)
    420     return 1;
    421 
    422   return 0;
    423 }
    424 
    425 GLOBAL(int)
    426 jsimd_can_convsamp_float (void)
    427 {
    428   init_simd();
    429 
    430   return 0;
    431 }
    432 
    433 GLOBAL(void)
    434 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
    435                 DCTELEM * workspace)
    436 {
    437   if (simd_support & JSIMD_ARM_NEON)
    438     jsimd_convsamp_neon(sample_data, start_col, workspace);
    439 }
    440 
    441 GLOBAL(void)
    442 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
    443                       FAST_FLOAT * workspace)
    444 {
    445 }
    446 
    447 GLOBAL(int)
    448 jsimd_can_fdct_islow (void)
    449 {
    450   init_simd();
    451 
    452   return 0;
    453 }
    454 
    455 GLOBAL(int)
    456 jsimd_can_fdct_ifast (void)
    457 {
    458   init_simd();
    459 
    460   /* The code is optimised for these values only */
    461   if (DCTSIZE != 8)
    462     return 0;
    463   if (sizeof(DCTELEM) != 2)
    464     return 0;
    465 
    466   if (simd_support & JSIMD_ARM_NEON)
    467     return 1;
    468 
    469   return 0;
    470 }
    471 
    472 GLOBAL(int)
    473 jsimd_can_fdct_float (void)
    474 {
    475   init_simd();
    476 
    477   return 0;
    478 }
    479 
    480 GLOBAL(void)
    481 jsimd_fdct_islow (DCTELEM * data)
    482 {
    483 }
    484 
    485 GLOBAL(void)
    486 jsimd_fdct_ifast (DCTELEM * data)
    487 {
    488   if (simd_support & JSIMD_ARM_NEON)
    489     jsimd_fdct_ifast_neon(data);
    490 }
    491 
    492 GLOBAL(void)
    493 jsimd_fdct_float (FAST_FLOAT * data)
    494 {
    495 }
    496 
    497 GLOBAL(int)
    498 jsimd_can_quantize (void)
    499 {
    500   init_simd();
    501 
    502   /* The code is optimised for these values only */
    503   if (DCTSIZE != 8)
    504     return 0;
    505   if (sizeof(JCOEF) != 2)
    506     return 0;
    507   if (sizeof(DCTELEM) != 2)
    508     return 0;
    509 
    510   if (simd_support & JSIMD_ARM_NEON)
    511     return 1;
    512 
    513   return 0;
    514 }
    515 
    516 GLOBAL(int)
    517 jsimd_can_quantize_float (void)
    518 {
    519   init_simd();
    520 
    521   return 0;
    522 }
    523 
    524 GLOBAL(void)
    525 jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
    526                 DCTELEM * workspace)
    527 {
    528   if (simd_support & JSIMD_ARM_NEON)
    529     jsimd_quantize_neon(coef_block, divisors, workspace);
    530 }
    531 
    532 GLOBAL(void)
    533 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
    534                       FAST_FLOAT * workspace)
    535 {
    536 }
    537 
    538 GLOBAL(int)
    539 jsimd_can_idct_2x2 (void)
    540 {
    541   init_simd();
    542 
    543   /* The code is optimised for these values only */
    544   if (DCTSIZE != 8)
    545     return 0;
    546   if (sizeof(JCOEF) != 2)
    547     return 0;
    548   if (BITS_IN_JSAMPLE != 8)
    549     return 0;
    550   if (sizeof(JDIMENSION) != 4)
    551     return 0;
    552   if (sizeof(ISLOW_MULT_TYPE) != 2)
    553     return 0;
    554 
    555   if ((simd_support & JSIMD_ARM_NEON))
    556     return 1;
    557 
    558   return 0;
    559 }
    560 
    561 GLOBAL(int)
    562 jsimd_can_idct_4x4 (void)
    563 {
    564   init_simd();
    565 
    566   /* The code is optimised for these values only */
    567   if (DCTSIZE != 8)
    568     return 0;
    569   if (sizeof(JCOEF) != 2)
    570     return 0;
    571   if (BITS_IN_JSAMPLE != 8)
    572     return 0;
    573   if (sizeof(JDIMENSION) != 4)
    574     return 0;
    575   if (sizeof(ISLOW_MULT_TYPE) != 2)
    576     return 0;
    577 
    578   if ((simd_support & JSIMD_ARM_NEON))
    579     return 1;
    580 
    581   return 0;
    582 }
    583 
    584 GLOBAL(void)
    585 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
    586                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
    587                 JDIMENSION output_col)
    588 {
    589   if ((simd_support & JSIMD_ARM_NEON))
    590     jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, output_col);
    591 }
    592 
    593 GLOBAL(void)
    594 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
    595                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
    596                 JDIMENSION output_col)
    597 {
    598   if ((simd_support & JSIMD_ARM_NEON))
    599     jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, output_col);
    600 }
    601 
    602 GLOBAL(int)
    603 jsimd_can_idct_islow (void)
    604 {
    605   init_simd();
    606 
    607   /* The code is optimised for these values only */
    608   if (DCTSIZE != 8)
    609     return 0;
    610   if (sizeof(JCOEF) != 2)
    611     return 0;
    612   if (BITS_IN_JSAMPLE != 8)
    613     return 0;
    614   if (sizeof(JDIMENSION) != 4)
    615     return 0;
    616   if (sizeof(ISLOW_MULT_TYPE) != 2)
    617     return 0;
    618 
    619   if (simd_support & JSIMD_ARM_NEON)
    620     return 1;
    621 
    622   return 0;
    623 }
    624 
    625 GLOBAL(int)
    626 jsimd_can_idct_ifast (void)
    627 {
    628   init_simd();
    629 
    630   /* The code is optimised for these values only */
    631   if (DCTSIZE != 8)
    632     return 0;
    633   if (sizeof(JCOEF) != 2)
    634     return 0;
    635   if (BITS_IN_JSAMPLE != 8)
    636     return 0;
    637   if (sizeof(JDIMENSION) != 4)
    638     return 0;
    639   if (sizeof(IFAST_MULT_TYPE) != 2)
    640     return 0;
    641   if (IFAST_SCALE_BITS != 2)
    642     return 0;
    643 
    644   if ((simd_support & JSIMD_ARM_NEON))
    645     return 1;
    646 
    647   return 0;
    648 }
    649 
    650 GLOBAL(int)
    651 jsimd_can_idct_float (void)
    652 {
    653   init_simd();
    654 
    655   return 0;
    656 }
    657 
    658 GLOBAL(void)
    659 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
    660                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
    661                 JDIMENSION output_col)
    662 {
    663   if ((simd_support & JSIMD_ARM_NEON))
    664     jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf, output_col);
    665 }
    666 
    667 GLOBAL(void)
    668 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
    669                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
    670                 JDIMENSION output_col)
    671 {
    672   if ((simd_support & JSIMD_ARM_NEON))
    673     jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf, output_col);
    674 }
    675 
    676 GLOBAL(void)
    677 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
    678                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
    679                 JDIMENSION output_col)
    680 {
    681 }
    682 
    683