Home | History | Annotate | Download | only in simd
      1 /*
      2  * jsimd_arm.c
      3  *
      4  * Copyright 2009 Pierre Ossman <ossman (at) cendio.se> for Cendio AB
      5  * Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies).
      6  * Copyright (C) 2009-2011, 2013-2014, 2016, D. R. Commander.
      7  * Copyright (C) 2015-2016, Matthieu Darbois.
      8  *
      9  * Based on the x86 SIMD extension for IJG JPEG library,
     10  * Copyright (C) 1999-2006, MIYASAKA Masaru.
     11  * For conditions of distribution and use, see copyright notice in jsimdext.inc
     12  *
     13  * This file contains the interface between the "normal" portions
     14  * of the library and the SIMD implementations when running on a
     15  * 32-bit ARM architecture.
     16  */
     17 
     18 #define JPEG_INTERNALS
     19 #include "../jinclude.h"
     20 #include "../jpeglib.h"
     21 #include "../jsimd.h"
     22 #include "../jdct.h"
     23 #include "../jsimddct.h"
     24 #include "jsimd.h"
     25 
     26 #include <stdio.h>
     27 #include <string.h>
     28 #include <ctype.h>
     29 
     30 static unsigned int simd_support = ~0;
     31 static unsigned int simd_huffman = 1;
     32 
     33 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
     34 
     35 #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
     36 
     37 LOCAL(int)
     38 check_feature (char *buffer, char *feature)
     39 {
     40   char *p;
     41   if (*feature == 0)
     42     return 0;
     43   if (strncmp(buffer, "Features", 8) != 0)
     44     return 0;
     45   buffer += 8;
     46   while (isspace(*buffer))
     47     buffer++;
     48 
     49   /* Check if 'feature' is present in the buffer as a separate word */
     50   while ((p = strstr(buffer, feature))) {
     51     if (p > buffer && !isspace(*(p - 1))) {
     52       buffer++;
     53       continue;
     54     }
     55     p += strlen(feature);
     56     if (*p != 0 && !isspace(*p)) {
     57       buffer++;
     58       continue;
     59     }
     60     return 1;
     61   }
     62   return 0;
     63 }
     64 
     65 LOCAL(int)
     66 parse_proc_cpuinfo (int bufsize)
     67 {
     68   char *buffer = (char *)malloc(bufsize);
     69   FILE *fd;
     70   simd_support = 0;
     71 
     72   if (!buffer)
     73     return 0;
     74 
     75   fd = fopen("/proc/cpuinfo", "r");
     76   if (fd) {
     77     while (fgets(buffer, bufsize, fd)) {
     78       if (!strchr(buffer, '\n') && !feof(fd)) {
     79         /* "impossible" happened - insufficient size of the buffer! */
     80         fclose(fd);
     81         free(buffer);
     82         return 0;
     83       }
     84       if (check_feature(buffer, "neon"))
     85         simd_support |= JSIMD_ARM_NEON;
     86     }
     87     fclose(fd);
     88   }
     89   free(buffer);
     90   return 1;
     91 }
     92 
     93 #endif
     94 
     95 /*
     96  * Check what SIMD accelerations are supported.
     97  *
     98  * FIXME: This code is racy under a multi-threaded environment.
     99  */
    100 LOCAL(void)
    101 init_simd (void)
    102 {
    103   char *env = NULL;
    104 #if !defined(__ARM_NEON__) && defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
    105   int bufsize = 1024; /* an initial guess for the line buffer size limit */
    106 #endif
    107 
    108   if (simd_support != ~0U)
    109     return;
    110 
    111   simd_support = 0;
    112 
    113 #if defined(__ARM_NEON__)
    114   simd_support |= JSIMD_ARM_NEON;
    115 #elif defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
    116   /* We still have a chance to use NEON regardless of globally used
    117    * -mcpu/-mfpu options passed to gcc by performing runtime detection via
    118    * /proc/cpuinfo parsing on linux/android */
    119   while (!parse_proc_cpuinfo(bufsize)) {
    120     bufsize *= 2;
    121     if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT)
    122       break;
    123   }
    124 #endif
    125 
    126   /* Force different settings through environment variables */
    127   env = getenv("JSIMD_FORCENEON");
    128   if ((env != NULL) && (strcmp(env, "1") == 0))
    129     simd_support = JSIMD_ARM_NEON;
    130   env = getenv("JSIMD_FORCENONE");
    131   if ((env != NULL) && (strcmp(env, "1") == 0))
    132     simd_support = 0;
    133   env = getenv("JSIMD_NOHUFFENC");
    134   if ((env != NULL) && (strcmp(env, "1") == 0))
    135     simd_huffman = 0;
    136 }
    137 
    138 GLOBAL(int)
    139 jsimd_can_rgb_ycc (void)
    140 {
    141   init_simd();
    142 
    143   /* The code is optimised for these values only */
    144   if (BITS_IN_JSAMPLE != 8)
    145     return 0;
    146   if (sizeof(JDIMENSION) != 4)
    147     return 0;
    148   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
    149     return 0;
    150 
    151   if (simd_support & JSIMD_ARM_NEON)
    152     return 1;
    153 
    154   return 0;
    155 }
    156 
    157 GLOBAL(int)
    158 jsimd_can_rgb_gray (void)
    159 {
    160   init_simd();
    161 
    162   return 0;
    163 }
    164 
    165 GLOBAL(int)
    166 jsimd_can_ycc_rgb (void)
    167 {
    168   init_simd();
    169 
    170   /* The code is optimised for these values only */
    171   if (BITS_IN_JSAMPLE != 8)
    172     return 0;
    173   if (sizeof(JDIMENSION) != 4)
    174     return 0;
    175   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
    176     return 0;
    177 
    178   if (simd_support & JSIMD_ARM_NEON)
    179     return 1;
    180 
    181   return 0;
    182 }
    183 
    184 GLOBAL(int)
    185 jsimd_can_ycc_rgb565 (void)
    186 {
    187   init_simd();
    188 
    189   /* The code is optimised for these values only */
    190   if (BITS_IN_JSAMPLE != 8)
    191     return 0;
    192   if (sizeof(JDIMENSION) != 4)
    193     return 0;
    194 
    195   if (simd_support & JSIMD_ARM_NEON)
    196     return 1;
    197 
    198   return 0;
    199 }
    200 
    201 GLOBAL(void)
    202 jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
    203                        JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
    204                        JDIMENSION output_row, int num_rows)
    205 {
    206   void (*neonfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
    207 
    208   switch(cinfo->in_color_space) {
    209     case JCS_EXT_RGB:
    210       neonfct=jsimd_extrgb_ycc_convert_neon;
    211       break;
    212     case JCS_EXT_RGBX:
    213     case JCS_EXT_RGBA:
    214       neonfct=jsimd_extrgbx_ycc_convert_neon;
    215       break;
    216     case JCS_EXT_BGR:
    217       neonfct=jsimd_extbgr_ycc_convert_neon;
    218       break;
    219     case JCS_EXT_BGRX:
    220     case JCS_EXT_BGRA:
    221       neonfct=jsimd_extbgrx_ycc_convert_neon;
    222       break;
    223     case JCS_EXT_XBGR:
    224     case JCS_EXT_ABGR:
    225       neonfct=jsimd_extxbgr_ycc_convert_neon;
    226       break;
    227     case JCS_EXT_XRGB:
    228     case JCS_EXT_ARGB:
    229       neonfct=jsimd_extxrgb_ycc_convert_neon;
    230       break;
    231     default:
    232       neonfct=jsimd_extrgb_ycc_convert_neon;
    233       break;
    234   }
    235 
    236   neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
    237 }
    238 
    239 GLOBAL(void)
    240 jsimd_rgb_gray_convert (j_compress_ptr cinfo,
    241                         JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
    242                         JDIMENSION output_row, int num_rows)
    243 {
    244 }
    245 
    246 GLOBAL(void)
    247 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
    248                        JSAMPIMAGE input_buf, JDIMENSION input_row,
    249                        JSAMPARRAY output_buf, int num_rows)
    250 {
    251   void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
    252 
    253   switch(cinfo->out_color_space) {
    254     case JCS_EXT_RGB:
    255       neonfct=jsimd_ycc_extrgb_convert_neon;
    256       break;
    257     case JCS_EXT_RGBX:
    258     case JCS_EXT_RGBA:
    259       neonfct=jsimd_ycc_extrgbx_convert_neon;
    260       break;
    261     case JCS_EXT_BGR:
    262       neonfct=jsimd_ycc_extbgr_convert_neon;
    263       break;
    264     case JCS_EXT_BGRX:
    265     case JCS_EXT_BGRA:
    266       neonfct=jsimd_ycc_extbgrx_convert_neon;
    267       break;
    268     case JCS_EXT_XBGR:
    269     case JCS_EXT_ABGR:
    270       neonfct=jsimd_ycc_extxbgr_convert_neon;
    271       break;
    272     case JCS_EXT_XRGB:
    273     case JCS_EXT_ARGB:
    274       neonfct=jsimd_ycc_extxrgb_convert_neon;
    275       break;
    276     default:
    277       neonfct=jsimd_ycc_extrgb_convert_neon;
    278       break;
    279   }
    280 
    281   neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
    282 }
    283 
    284 GLOBAL(void)
    285 jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo,
    286                           JSAMPIMAGE input_buf, JDIMENSION input_row,
    287                           JSAMPARRAY output_buf, int num_rows)
    288 {
    289   jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row,
    290                                 output_buf, num_rows);
    291 }
    292 
    293 GLOBAL(int)
    294 jsimd_can_h2v2_downsample (void)
    295 {
    296   init_simd();
    297 
    298   return 0;
    299 }
    300 
    301 GLOBAL(int)
    302 jsimd_can_h2v1_downsample (void)
    303 {
    304   init_simd();
    305 
    306   return 0;
    307 }
    308 
    309 GLOBAL(void)
    310 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
    311                        JSAMPARRAY input_data, JSAMPARRAY output_data)
    312 {
    313 }
    314 
    315 GLOBAL(void)
    316 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
    317                        JSAMPARRAY input_data, JSAMPARRAY output_data)
    318 {
    319 }
    320 
    321 GLOBAL(int)
    322 jsimd_can_h2v2_upsample (void)
    323 {
    324   init_simd();
    325 
    326   return 0;
    327 }
    328 
    329 GLOBAL(int)
    330 jsimd_can_h2v1_upsample (void)
    331 {
    332   init_simd();
    333 
    334   return 0;
    335 }
    336 
    337 GLOBAL(void)
    338 jsimd_h2v2_upsample (j_decompress_ptr cinfo,
    339                      jpeg_component_info *compptr,
    340                      JSAMPARRAY input_data,
    341                      JSAMPARRAY *output_data_ptr)
    342 {
    343 }
    344 
    345 GLOBAL(void)
    346 jsimd_h2v1_upsample (j_decompress_ptr cinfo,
    347                      jpeg_component_info *compptr,
    348                      JSAMPARRAY input_data,
    349                      JSAMPARRAY *output_data_ptr)
    350 {
    351 }
    352 
    353 GLOBAL(int)
    354 jsimd_can_h2v2_fancy_upsample (void)
    355 {
    356   init_simd();
    357 
    358   return 0;
    359 }
    360 
    361 GLOBAL(int)
    362 jsimd_can_h2v1_fancy_upsample (void)
    363 {
    364   init_simd();
    365 
    366   /* The code is optimised for these values only */
    367   if (BITS_IN_JSAMPLE != 8)
    368     return 0;
    369   if (sizeof(JDIMENSION) != 4)
    370     return 0;
    371 
    372   if (simd_support & JSIMD_ARM_NEON)
    373     return 1;
    374 
    375   return 0;
    376 }
    377 
    378 GLOBAL(void)
    379 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
    380                            jpeg_component_info *compptr,
    381                            JSAMPARRAY input_data,
    382                            JSAMPARRAY *output_data_ptr)
    383 {
    384 }
    385 
    386 GLOBAL(void)
    387 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
    388                            jpeg_component_info *compptr,
    389                            JSAMPARRAY input_data,
    390                            JSAMPARRAY *output_data_ptr)
    391 {
    392   jsimd_h2v1_fancy_upsample_neon(cinfo->max_v_samp_factor,
    393                                  compptr->downsampled_width, input_data,
    394                                  output_data_ptr);
    395 }
    396 
    397 GLOBAL(int)
    398 jsimd_can_h2v2_merged_upsample (void)
    399 {
    400   init_simd();
    401 
    402   return 0;
    403 }
    404 
    405 GLOBAL(int)
    406 jsimd_can_h2v1_merged_upsample (void)
    407 {
    408   init_simd();
    409 
    410   return 0;
    411 }
    412 
    413 GLOBAL(void)
    414 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
    415                             JSAMPIMAGE input_buf,
    416                             JDIMENSION in_row_group_ctr,
    417                             JSAMPARRAY output_buf)
    418 {
    419 }
    420 
    421 GLOBAL(void)
    422 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
    423                             JSAMPIMAGE input_buf,
    424                             JDIMENSION in_row_group_ctr,
    425                             JSAMPARRAY output_buf)
    426 {
    427 }
    428 
    429 GLOBAL(int)
    430 jsimd_can_convsamp (void)
    431 {
    432   init_simd();
    433 
    434   /* The code is optimised for these values only */
    435   if (DCTSIZE != 8)
    436     return 0;
    437   if (BITS_IN_JSAMPLE != 8)
    438     return 0;
    439   if (sizeof(JDIMENSION) != 4)
    440     return 0;
    441   if (sizeof(DCTELEM) != 2)
    442     return 0;
    443 
    444   if (simd_support & JSIMD_ARM_NEON)
    445     return 1;
    446 
    447   return 0;
    448 }
    449 
    450 GLOBAL(int)
    451 jsimd_can_convsamp_float (void)
    452 {
    453   init_simd();
    454 
    455   return 0;
    456 }
    457 
    458 GLOBAL(void)
    459 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
    460                 DCTELEM *workspace)
    461 {
    462   jsimd_convsamp_neon(sample_data, start_col, workspace);
    463 }
    464 
    465 GLOBAL(void)
    466 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
    467                       FAST_FLOAT *workspace)
    468 {
    469 }
    470 
    471 GLOBAL(int)
    472 jsimd_can_fdct_islow (void)
    473 {
    474   init_simd();
    475 
    476   return 0;
    477 }
    478 
    479 GLOBAL(int)
    480 jsimd_can_fdct_ifast (void)
    481 {
    482   init_simd();
    483 
    484   /* The code is optimised for these values only */
    485   if (DCTSIZE != 8)
    486     return 0;
    487   if (sizeof(DCTELEM) != 2)
    488     return 0;
    489 
    490   if (simd_support & JSIMD_ARM_NEON)
    491     return 1;
    492 
    493   return 0;
    494 }
    495 
    496 GLOBAL(int)
    497 jsimd_can_fdct_float (void)
    498 {
    499   init_simd();
    500 
    501   return 0;
    502 }
    503 
    504 GLOBAL(void)
    505 jsimd_fdct_islow (DCTELEM *data)
    506 {
    507 }
    508 
    509 GLOBAL(void)
    510 jsimd_fdct_ifast (DCTELEM *data)
    511 {
    512   jsimd_fdct_ifast_neon(data);
    513 }
    514 
    515 GLOBAL(void)
    516 jsimd_fdct_float (FAST_FLOAT *data)
    517 {
    518 }
    519 
    520 GLOBAL(int)
    521 jsimd_can_quantize (void)
    522 {
    523   init_simd();
    524 
    525   /* The code is optimised for these values only */
    526   if (DCTSIZE != 8)
    527     return 0;
    528   if (sizeof(JCOEF) != 2)
    529     return 0;
    530   if (sizeof(DCTELEM) != 2)
    531     return 0;
    532 
    533   if (simd_support & JSIMD_ARM_NEON)
    534     return 1;
    535 
    536   return 0;
    537 }
    538 
    539 GLOBAL(int)
    540 jsimd_can_quantize_float (void)
    541 {
    542   init_simd();
    543 
    544   return 0;
    545 }
    546 
    547 GLOBAL(void)
    548 jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors,
    549                 DCTELEM *workspace)
    550 {
    551   jsimd_quantize_neon(coef_block, divisors, workspace);
    552 }
    553 
    554 GLOBAL(void)
    555 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors,
    556                       FAST_FLOAT *workspace)
    557 {
    558 }
    559 
    560 GLOBAL(int)
    561 jsimd_can_idct_2x2 (void)
    562 {
    563   init_simd();
    564 
    565   /* The code is optimised for these values only */
    566   if (DCTSIZE != 8)
    567     return 0;
    568   if (sizeof(JCOEF) != 2)
    569     return 0;
    570   if (BITS_IN_JSAMPLE != 8)
    571     return 0;
    572   if (sizeof(JDIMENSION) != 4)
    573     return 0;
    574   if (sizeof(ISLOW_MULT_TYPE) != 2)
    575     return 0;
    576 
    577   if (simd_support & JSIMD_ARM_NEON)
    578     return 1;
    579 
    580   return 0;
    581 }
    582 
    583 GLOBAL(int)
    584 jsimd_can_idct_4x4 (void)
    585 {
    586   init_simd();
    587 
    588   /* The code is optimised for these values only */
    589   if (DCTSIZE != 8)
    590     return 0;
    591   if (sizeof(JCOEF) != 2)
    592     return 0;
    593   if (BITS_IN_JSAMPLE != 8)
    594     return 0;
    595   if (sizeof(JDIMENSION) != 4)
    596     return 0;
    597   if (sizeof(ISLOW_MULT_TYPE) != 2)
    598     return 0;
    599 
    600   if (simd_support & JSIMD_ARM_NEON)
    601     return 1;
    602 
    603   return 0;
    604 }
    605 
    606 GLOBAL(void)
    607 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
    608                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
    609                 JDIMENSION output_col)
    610 {
    611   jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf,
    612                       output_col);
    613 }
    614 
    615 GLOBAL(void)
    616 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
    617                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
    618                 JDIMENSION output_col)
    619 {
    620   jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf,
    621                       output_col);
    622 }
    623 
    624 GLOBAL(int)
    625 jsimd_can_idct_islow (void)
    626 {
    627   init_simd();
    628 
    629   /* The code is optimised for these values only */
    630   if (DCTSIZE != 8)
    631     return 0;
    632   if (sizeof(JCOEF) != 2)
    633     return 0;
    634   if (BITS_IN_JSAMPLE != 8)
    635     return 0;
    636   if (sizeof(JDIMENSION) != 4)
    637     return 0;
    638   if (sizeof(ISLOW_MULT_TYPE) != 2)
    639     return 0;
    640 
    641   if (simd_support & JSIMD_ARM_NEON)
    642     return 1;
    643 
    644   return 0;
    645 }
    646 
    647 GLOBAL(int)
    648 jsimd_can_idct_ifast (void)
    649 {
    650   init_simd();
    651 
    652   /* The code is optimised for these values only */
    653   if (DCTSIZE != 8)
    654     return 0;
    655   if (sizeof(JCOEF) != 2)
    656     return 0;
    657   if (BITS_IN_JSAMPLE != 8)
    658     return 0;
    659   if (sizeof(JDIMENSION) != 4)
    660     return 0;
    661   if (sizeof(IFAST_MULT_TYPE) != 2)
    662     return 0;
    663   if (IFAST_SCALE_BITS != 2)
    664     return 0;
    665 
    666   if (simd_support & JSIMD_ARM_NEON)
    667     return 1;
    668 
    669   return 0;
    670 }
    671 
    672 GLOBAL(int)
    673 jsimd_can_idct_float (void)
    674 {
    675   init_simd();
    676 
    677   return 0;
    678 }
    679 
    680 GLOBAL(void)
    681 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr,
    682                   JCOEFPTR coef_block, JSAMPARRAY output_buf,
    683                   JDIMENSION output_col)
    684 {
    685   jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf,
    686                         output_col);
    687 }
    688 
    689 GLOBAL(void)
    690 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr,
    691                   JCOEFPTR coef_block, JSAMPARRAY output_buf,
    692                   JDIMENSION output_col)
    693 {
    694   jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf,
    695                         output_col);
    696 }
    697 
    698 GLOBAL(void)
    699 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr,
    700                   JCOEFPTR coef_block, JSAMPARRAY output_buf,
    701                   JDIMENSION output_col)
    702 {
    703 }
    704 
    705 GLOBAL(int)
    706 jsimd_can_huff_encode_one_block (void)
    707 {
    708   init_simd();
    709 
    710   if (DCTSIZE != 8)
    711     return 0;
    712   if (sizeof(JCOEF) != 2)
    713     return 0;
    714 
    715   if (simd_support & JSIMD_ARM_NEON && simd_huffman)
    716     return 1;
    717 
    718   return 0;
    719 }
    720 
    721 GLOBAL(JOCTET*)
    722 jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block,
    723                              int last_dc_val, c_derived_tbl *dctbl,
    724                              c_derived_tbl *actbl)
    725 {
    726   return jsimd_huff_encode_one_block_neon(state, buffer, block, last_dc_val,
    727                                           dctbl, actbl);
    728 }
    729