Home | History | Annotate | Download | only in simd
      1 /*
      2  * jsimd_arm.c
      3  *
      4  * Copyright 2009 Pierre Ossman <ossman (at) cendio.se> for Cendio AB
      5  * Copyright 2009-2011, 2013-2014 D. R. Commander
      6  *
      7  * Based on the x86 SIMD extension for IJG JPEG library,
      8  * Copyright (C) 1999-2006, MIYASAKA Masaru.
      9  * For conditions of distribution and use, see copyright notice in jsimdext.inc
     10  *
     11  * This file contains the interface between the "normal" portions
     12  * of the library and the SIMD implementations when running on a
     13  * 32-bit ARM architecture.
     14  */
     15 
     16 #define JPEG_INTERNALS
     17 #include "../jinclude.h"
     18 #include "../jpeglib.h"
     19 #include "../jsimd.h"
     20 #include "../jdct.h"
     21 #include "../jsimddct.h"
     22 #include "jsimd.h"
     23 
     24 #include <stdio.h>
     25 #include <string.h>
     26 #include <ctype.h>
     27 
     28 static unsigned int simd_support = ~0;
     29 
     30 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
     31 
     32 #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
     33 
     34 LOCAL(int)
     35 check_feature (char *buffer, char *feature)
     36 {
     37   char *p;
     38   if (*feature == 0)
     39     return 0;
     40   if (strncmp(buffer, "Features", 8) != 0)
     41     return 0;
     42   buffer += 8;
     43   while (isspace(*buffer))
     44     buffer++;
     45 
     46   /* Check if 'feature' is present in the buffer as a separate word */
     47   while ((p = strstr(buffer, feature))) {
     48     if (p > buffer && !isspace(*(p - 1))) {
     49       buffer++;
     50       continue;
     51     }
     52     p += strlen(feature);
     53     if (*p != 0 && !isspace(*p)) {
     54       buffer++;
     55       continue;
     56     }
     57     return 1;
     58   }
     59   return 0;
     60 }
     61 
     62 LOCAL(int)
     63 parse_proc_cpuinfo (int bufsize)
     64 {
     65   char *buffer = (char *)malloc(bufsize);
     66   FILE *fd;
     67   simd_support = 0;
     68 
     69   if (!buffer)
     70     return 0;
     71 
     72   fd = fopen("/proc/cpuinfo", "r");
     73   if (fd) {
     74     while (fgets(buffer, bufsize, fd)) {
     75       if (!strchr(buffer, '\n') && !feof(fd)) {
     76         /* "impossible" happened - insufficient size of the buffer! */
     77         fclose(fd);
     78         free(buffer);
     79         return 0;
     80       }
     81       if (check_feature(buffer, "neon"))
     82         simd_support |= JSIMD_ARM_NEON;
     83     }
     84     fclose(fd);
     85   }
     86   free(buffer);
     87   return 1;
     88 }
     89 
     90 #endif
     91 
     92 /*
     93  * Check what SIMD accelerations are supported.
     94  *
     95  * FIXME: This code is racy under a multi-threaded environment.
     96  */
     97 LOCAL(void)
     98 init_simd (void)
     99 {
    100   char *env = NULL;
    101 #if !defined(__ARM_NEON__) && defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
    102   int bufsize = 1024; /* an initial guess for the line buffer size limit */
    103 #endif
    104 
    105   if (simd_support != ~0U)
    106     return;
    107 
    108   simd_support = 0;
    109 
    110 #if defined(__ARM_NEON__)
    111   simd_support |= JSIMD_ARM_NEON;
    112 #elif defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
    113   /* We still have a chance to use NEON regardless of globally used
    114    * -mcpu/-mfpu options passed to gcc by performing runtime detection via
    115    * /proc/cpuinfo parsing on linux/android */
    116   while (!parse_proc_cpuinfo(bufsize)) {
    117     bufsize *= 2;
    118     if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT)
    119       break;
    120   }
    121 #endif
    122 
    123   /* Force different settings through environment variables */
    124   env = getenv("JSIMD_FORCENEON");
    125   if ((env != NULL) && (strcmp(env, "1") == 0))
    126     simd_support &= JSIMD_ARM_NEON;
    127   env = getenv("JSIMD_FORCENONE");
    128   if ((env != NULL) && (strcmp(env, "1") == 0))
    129     simd_support = 0;
    130 }
    131 
    132 GLOBAL(int)
    133 jsimd_can_rgb_ycc (void)
    134 {
    135   init_simd();
    136 
    137   /* The code is optimised for these values only */
    138   if (BITS_IN_JSAMPLE != 8)
    139     return 0;
    140   if (sizeof(JDIMENSION) != 4)
    141     return 0;
    142   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
    143     return 0;
    144 
    145   if (simd_support & JSIMD_ARM_NEON)
    146     return 1;
    147 
    148   return 0;
    149 }
    150 
    151 GLOBAL(int)
    152 jsimd_can_rgb_gray (void)
    153 {
    154   init_simd();
    155 
    156   return 0;
    157 }
    158 
    159 GLOBAL(int)
    160 jsimd_can_ycc_rgb (void)
    161 {
    162   init_simd();
    163 
    164   /* The code is optimised for these values only */
    165   if (BITS_IN_JSAMPLE != 8)
    166     return 0;
    167   if (sizeof(JDIMENSION) != 4)
    168     return 0;
    169   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
    170     return 0;
    171 
    172   if (simd_support & JSIMD_ARM_NEON)
    173     return 1;
    174 
    175   return 0;
    176 }
    177 
    178 GLOBAL(int)
    179 jsimd_can_ycc_rgb565 (void)
    180 {
    181   init_simd();
    182 
    183   /* The code is optimised for these values only */
    184   if (BITS_IN_JSAMPLE != 8)
    185     return 0;
    186   if (sizeof(JDIMENSION) != 4)
    187     return 0;
    188 
    189   if (simd_support & JSIMD_ARM_NEON)
    190     return 1;
    191 
    192   return 0;
    193 }
    194 
    195 GLOBAL(void)
    196 jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
    197                        JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
    198                        JDIMENSION output_row, int num_rows)
    199 {
    200   void (*neonfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
    201 
    202   switch(cinfo->in_color_space) {
    203     case JCS_EXT_RGB:
    204       neonfct=jsimd_extrgb_ycc_convert_neon;
    205       break;
    206     case JCS_EXT_RGBX:
    207     case JCS_EXT_RGBA:
    208       neonfct=jsimd_extrgbx_ycc_convert_neon;
    209       break;
    210     case JCS_EXT_BGR:
    211       neonfct=jsimd_extbgr_ycc_convert_neon;
    212       break;
    213     case JCS_EXT_BGRX:
    214     case JCS_EXT_BGRA:
    215       neonfct=jsimd_extbgrx_ycc_convert_neon;
    216       break;
    217     case JCS_EXT_XBGR:
    218     case JCS_EXT_ABGR:
    219       neonfct=jsimd_extxbgr_ycc_convert_neon;
    220       break;
    221     case JCS_EXT_XRGB:
    222     case JCS_EXT_ARGB:
    223       neonfct=jsimd_extxrgb_ycc_convert_neon;
    224       break;
    225     default:
    226       neonfct=jsimd_extrgb_ycc_convert_neon;
    227       break;
    228   }
    229 
    230   if (simd_support & JSIMD_ARM_NEON)
    231     neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
    232 }
    233 
    234 GLOBAL(void)
    235 jsimd_rgb_gray_convert (j_compress_ptr cinfo,
    236                         JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
    237                         JDIMENSION output_row, int num_rows)
    238 {
    239 }
    240 
    241 GLOBAL(void)
    242 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
    243                        JSAMPIMAGE input_buf, JDIMENSION input_row,
    244                        JSAMPARRAY output_buf, int num_rows)
    245 {
    246   void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
    247 
    248   switch(cinfo->out_color_space) {
    249     case JCS_EXT_RGB:
    250       neonfct=jsimd_ycc_extrgb_convert_neon;
    251       break;
    252     case JCS_EXT_RGBX:
    253     case JCS_EXT_RGBA:
    254       neonfct=jsimd_ycc_extrgbx_convert_neon;
    255       break;
    256     case JCS_EXT_BGR:
    257       neonfct=jsimd_ycc_extbgr_convert_neon;
    258       break;
    259     case JCS_EXT_BGRX:
    260     case JCS_EXT_BGRA:
    261       neonfct=jsimd_ycc_extbgrx_convert_neon;
    262       break;
    263     case JCS_EXT_XBGR:
    264     case JCS_EXT_ABGR:
    265       neonfct=jsimd_ycc_extxbgr_convert_neon;
    266       break;
    267     case JCS_EXT_XRGB:
    268     case JCS_EXT_ARGB:
    269       neonfct=jsimd_ycc_extxrgb_convert_neon;
    270       break;
    271     default:
    272       neonfct=jsimd_ycc_extrgb_convert_neon;
    273       break;
    274   }
    275 
    276   if (simd_support & JSIMD_ARM_NEON)
    277     neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
    278 }
    279 
    280 GLOBAL(void)
    281 jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo,
    282                           JSAMPIMAGE input_buf, JDIMENSION input_row,
    283                           JSAMPARRAY output_buf, int num_rows)
    284 {
    285   if (simd_support & JSIMD_ARM_NEON)
    286     jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row,
    287                                   output_buf, num_rows);
    288 }
    289 
    290 GLOBAL(int)
    291 jsimd_can_h2v2_downsample (void)
    292 {
    293   init_simd();
    294 
    295   return 0;
    296 }
    297 
    298 GLOBAL(int)
    299 jsimd_can_h2v1_downsample (void)
    300 {
    301   init_simd();
    302 
    303   return 0;
    304 }
    305 
    306 GLOBAL(void)
    307 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
    308                        JSAMPARRAY input_data, JSAMPARRAY output_data)
    309 {
    310 }
    311 
    312 GLOBAL(void)
    313 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
    314                        JSAMPARRAY input_data, JSAMPARRAY output_data)
    315 {
    316 }
    317 
    318 GLOBAL(int)
    319 jsimd_can_h2v2_upsample (void)
    320 {
    321   init_simd();
    322 
    323   return 0;
    324 }
    325 
    326 GLOBAL(int)
    327 jsimd_can_h2v1_upsample (void)
    328 {
    329   init_simd();
    330 
    331   return 0;
    332 }
    333 
    334 GLOBAL(void)
    335 jsimd_h2v2_upsample (j_decompress_ptr cinfo,
    336                      jpeg_component_info * compptr,
    337                      JSAMPARRAY input_data,
    338                      JSAMPARRAY * output_data_ptr)
    339 {
    340 }
    341 
    342 GLOBAL(void)
    343 jsimd_h2v1_upsample (j_decompress_ptr cinfo,
    344                      jpeg_component_info * compptr,
    345                      JSAMPARRAY input_data,
    346                      JSAMPARRAY * output_data_ptr)
    347 {
    348 }
    349 
    350 GLOBAL(int)
    351 jsimd_can_h2v2_fancy_upsample (void)
    352 {
    353   init_simd();
    354 
    355   return 0;
    356 }
    357 
    358 GLOBAL(int)
    359 jsimd_can_h2v1_fancy_upsample (void)
    360 {
    361   init_simd();
    362 
    363   /* The code is optimised for these values only */
    364   if (BITS_IN_JSAMPLE != 8)
    365     return 0;
    366   if (sizeof(JDIMENSION) != 4)
    367     return 0;
    368 
    369   if (simd_support & JSIMD_ARM_NEON)
    370     return 1;
    371 
    372   return 0;
    373 }
    374 
    375 GLOBAL(void)
    376 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
    377                            jpeg_component_info * compptr,
    378                            JSAMPARRAY input_data,
    379                            JSAMPARRAY * output_data_ptr)
    380 {
    381 }
    382 
    383 GLOBAL(void)
    384 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
    385                            jpeg_component_info * compptr,
    386                            JSAMPARRAY input_data,
    387                            JSAMPARRAY * output_data_ptr)
    388 {
    389   if (simd_support & JSIMD_ARM_NEON)
    390     jsimd_h2v1_fancy_upsample_neon(cinfo->max_v_samp_factor,
    391                                    compptr->downsampled_width, input_data,
    392                                    output_data_ptr);
    393 }
    394 
    395 GLOBAL(int)
    396 jsimd_can_h2v2_merged_upsample (void)
    397 {
    398   init_simd();
    399 
    400   return 0;
    401 }
    402 
    403 GLOBAL(int)
    404 jsimd_can_h2v1_merged_upsample (void)
    405 {
    406   init_simd();
    407 
    408   return 0;
    409 }
    410 
    411 GLOBAL(void)
    412 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
    413                             JSAMPIMAGE input_buf,
    414                             JDIMENSION in_row_group_ctr,
    415                             JSAMPARRAY output_buf)
    416 {
    417 }
    418 
    419 GLOBAL(void)
    420 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
    421                             JSAMPIMAGE input_buf,
    422                             JDIMENSION in_row_group_ctr,
    423                             JSAMPARRAY output_buf)
    424 {
    425 }
    426 
    427 GLOBAL(int)
    428 jsimd_can_convsamp (void)
    429 {
    430   init_simd();
    431 
    432   /* The code is optimised for these values only */
    433   if (DCTSIZE != 8)
    434     return 0;
    435   if (BITS_IN_JSAMPLE != 8)
    436     return 0;
    437   if (sizeof(JDIMENSION) != 4)
    438     return 0;
    439   if (sizeof(DCTELEM) != 2)
    440     return 0;
    441 
    442   if (simd_support & JSIMD_ARM_NEON)
    443     return 1;
    444 
    445   return 0;
    446 }
    447 
    448 GLOBAL(int)
    449 jsimd_can_convsamp_float (void)
    450 {
    451   init_simd();
    452 
    453   return 0;
    454 }
    455 
    456 GLOBAL(void)
    457 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
    458                 DCTELEM * workspace)
    459 {
    460   if (simd_support & JSIMD_ARM_NEON)
    461     jsimd_convsamp_neon(sample_data, start_col, workspace);
    462 }
    463 
    464 GLOBAL(void)
    465 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
    466                       FAST_FLOAT * workspace)
    467 {
    468 }
    469 
    470 GLOBAL(int)
    471 jsimd_can_fdct_islow (void)
    472 {
    473   init_simd();
    474 
    475   return 0;
    476 }
    477 
    478 GLOBAL(int)
    479 jsimd_can_fdct_ifast (void)
    480 {
    481   init_simd();
    482 
    483   /* The code is optimised for these values only */
    484   if (DCTSIZE != 8)
    485     return 0;
    486   if (sizeof(DCTELEM) != 2)
    487     return 0;
    488 
    489   if (simd_support & JSIMD_ARM_NEON)
    490     return 1;
    491 
    492   return 0;
    493 }
    494 
    495 GLOBAL(int)
    496 jsimd_can_fdct_float (void)
    497 {
    498   init_simd();
    499 
    500   return 0;
    501 }
    502 
    503 GLOBAL(void)
    504 jsimd_fdct_islow (DCTELEM * data)
    505 {
    506 }
    507 
    508 GLOBAL(void)
    509 jsimd_fdct_ifast (DCTELEM * data)
    510 {
    511   if (simd_support & JSIMD_ARM_NEON)
    512     jsimd_fdct_ifast_neon(data);
    513 }
    514 
    515 GLOBAL(void)
    516 jsimd_fdct_float (FAST_FLOAT * data)
    517 {
    518 }
    519 
    520 GLOBAL(int)
    521 jsimd_can_quantize (void)
    522 {
    523   init_simd();
    524 
    525   /* The code is optimised for these values only */
    526   if (DCTSIZE != 8)
    527     return 0;
    528   if (sizeof(JCOEF) != 2)
    529     return 0;
    530   if (sizeof(DCTELEM) != 2)
    531     return 0;
    532 
    533   if (simd_support & JSIMD_ARM_NEON)
    534     return 1;
    535 
    536   return 0;
    537 }
    538 
    539 GLOBAL(int)
    540 jsimd_can_quantize_float (void)
    541 {
    542   init_simd();
    543 
    544   return 0;
    545 }
    546 
    547 GLOBAL(void)
    548 jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
    549                 DCTELEM * workspace)
    550 {
    551   if (simd_support & JSIMD_ARM_NEON)
    552     jsimd_quantize_neon(coef_block, divisors, workspace);
    553 }
    554 
    555 GLOBAL(void)
    556 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
    557                       FAST_FLOAT * workspace)
    558 {
    559 }
    560 
    561 GLOBAL(int)
    562 jsimd_can_idct_2x2 (void)
    563 {
    564   init_simd();
    565 
    566   /* The code is optimised for these values only */
    567   if (DCTSIZE != 8)
    568     return 0;
    569   if (sizeof(JCOEF) != 2)
    570     return 0;
    571   if (BITS_IN_JSAMPLE != 8)
    572     return 0;
    573   if (sizeof(JDIMENSION) != 4)
    574     return 0;
    575   if (sizeof(ISLOW_MULT_TYPE) != 2)
    576     return 0;
    577 
    578   if (simd_support & JSIMD_ARM_NEON)
    579     return 1;
    580 
    581   return 0;
    582 }
    583 
    584 GLOBAL(int)
    585 jsimd_can_idct_4x4 (void)
    586 {
    587   init_simd();
    588 
    589   /* The code is optimised for these values only */
    590   if (DCTSIZE != 8)
    591     return 0;
    592   if (sizeof(JCOEF) != 2)
    593     return 0;
    594   if (BITS_IN_JSAMPLE != 8)
    595     return 0;
    596   if (sizeof(JDIMENSION) != 4)
    597     return 0;
    598   if (sizeof(ISLOW_MULT_TYPE) != 2)
    599     return 0;
    600 
    601   if (simd_support & JSIMD_ARM_NEON)
    602     return 1;
    603 
    604   return 0;
    605 }
    606 
    607 GLOBAL(void)
    608 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
    609                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
    610                 JDIMENSION output_col)
    611 {
    612   if (simd_support & JSIMD_ARM_NEON)
    613     jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf,
    614                         output_col);
    615 }
    616 
    617 GLOBAL(void)
    618 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
    619                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
    620                 JDIMENSION output_col)
    621 {
    622   if (simd_support & JSIMD_ARM_NEON)
    623     jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf,
    624                         output_col);
    625 }
    626 
    627 GLOBAL(int)
    628 jsimd_can_idct_islow (void)
    629 {
    630   init_simd();
    631 
    632   /* The code is optimised for these values only */
    633   if (DCTSIZE != 8)
    634     return 0;
    635   if (sizeof(JCOEF) != 2)
    636     return 0;
    637   if (BITS_IN_JSAMPLE != 8)
    638     return 0;
    639   if (sizeof(JDIMENSION) != 4)
    640     return 0;
    641   if (sizeof(ISLOW_MULT_TYPE) != 2)
    642     return 0;
    643 
    644   if (simd_support & JSIMD_ARM_NEON)
    645     return 1;
    646 
    647   return 0;
    648 }
    649 
    650 GLOBAL(int)
    651 jsimd_can_idct_ifast (void)
    652 {
    653   init_simd();
    654 
    655   /* The code is optimised for these values only */
    656   if (DCTSIZE != 8)
    657     return 0;
    658   if (sizeof(JCOEF) != 2)
    659     return 0;
    660   if (BITS_IN_JSAMPLE != 8)
    661     return 0;
    662   if (sizeof(JDIMENSION) != 4)
    663     return 0;
    664   if (sizeof(IFAST_MULT_TYPE) != 2)
    665     return 0;
    666   if (IFAST_SCALE_BITS != 2)
    667     return 0;
    668 
    669   if (simd_support & JSIMD_ARM_NEON)
    670     return 1;
    671 
    672   return 0;
    673 }
    674 
    675 GLOBAL(int)
    676 jsimd_can_idct_float (void)
    677 {
    678   init_simd();
    679 
    680   return 0;
    681 }
    682 
    683 GLOBAL(void)
    684 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
    685                   JCOEFPTR coef_block, JSAMPARRAY output_buf,
    686                   JDIMENSION output_col)
    687 {
    688   if (simd_support & JSIMD_ARM_NEON)
    689     jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf,
    690                           output_col);
    691 }
    692 
    693 GLOBAL(void)
    694 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
    695                   JCOEFPTR coef_block, JSAMPARRAY output_buf,
    696                   JDIMENSION output_col)
    697 {
    698   if (simd_support & JSIMD_ARM_NEON)
    699     jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf,
    700                           output_col);
    701 }
    702 
    703 GLOBAL(void)
    704 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
    705                   JCOEFPTR coef_block, JSAMPARRAY output_buf,
    706                   JDIMENSION output_col)
    707 {
    708 }
    709