Home | History | Annotate | Download | only in intel
      1 Enabling SSE support
      2 
      3 Copyright (c) 2016 Google, Inc.
      4 Written by Mike Klein, Matt Sarett
      5 
      6 This INSTALL file written by Glenn Randers-Pehrson, 2016.
      7 
      8 If you have moved intel_init.c and filter_sse2_intrinsics.c to a different
      9 directory, be sure to update the '#include "../../pngpriv.h"' line in both
     10 files if necessary to point to the correct relative location of pngpriv.h
     11 with respect to the new location of those files.
     12 
     13 To enable SSE support in libpng, follow the instructions in I, II, or III,
     14 below:
     15 
     16 I. Using patched "configure" scripts:
     17 
     18 First, apply intel_sse.patch in your build directory.
     19 
     20    patch -i contrib/intel/intel_sse.patch -p1
     21 
     22 Then, if you are not building in a new GIT clone, e.g., in a tar
     23 distribution, remove any existing pre-built configure scripts:
     24 
     25    ./configure --enable-maintainer-mode
     26    make maintainer-clean
     27    ./autogen.sh --maintainer --clean
     28 
     29 Finally, configure libpng with -DPNG_INTEL_SSE in CPPFLAGS:
     30 
     31    ./autogen.sh --maintainer
     32    CPPFLAGS="-DPNG_INTEL_SSE" ./configure [options]
     33    make CPPFLAGS="-DPNG_INTEL_SSE" [options]
     34    make
     35 
     36 II. Using a custom makefile:
     37 
     38 If you are using a custom makefile makefile, you will have to update it
     39 manually to include contrib/intel/*.o in the dependencies, and to define
     40 PNG_INTEL_SSE.
     41 
     42 III. Using manually updated "configure" scripts:
     43 
     44 If you prefer, manually edit pngpriv.h, configure.ac, and Makefile.am,
     45 following the instructions below, then follow the instructions in
     46 section II of INSTALL in the main libpng directory, then configure libpng
     47 with -DPNG_INTEL_SSE in CPPFLAGS.
     48 
     49 1. Insert the following lines above the copyright line near the top of
     50 configure.ac:
     51 
     52 -----------------cut----------------
     53 # Copyright (c) 2016 Google, Inc.
     54 # Written by Mike Klein and Matt Sarett
     55 # Derived from the ARM supporting code in libpng/configure.ac, which was
     56 -----------------cut----------------
     57 
     58 2. Add the following code to configure.ac under HOST SPECIFIC OPTIONS
     59 directly beneath the section for ARM:
     60 
     61 -----------------cut----------------
     62 # INTEL
     63 # =====
     64 #
     65 # INTEL SSE (SIMD) support.
     66 
     67 AC_ARG_ENABLE([intel-sse],
     68    AS_HELP_STRING([[[--enable-intel-sse]]],
     69       [Enable Intel SSE optimizations: =no/off, yes/on:]
     70       [no/off: disable the optimizations;]
     71       [yes/on: enable the optimizations.]
     72       [If not specified: determined by the compiler.]),
     73    [case "$enableval" in
     74       no|off)
     75          # disable the default enabling:
     76          AC_DEFINE([PNG_INTEL_SSE_OPT], [0],
     77                    [Disable Intel SSE optimizations])
     78          # Prevent inclusion of the assembler files below:
     79          enable_intel_sse=no;;
     80       yes|on)
     81          AC_DEFINE([PNG_INTEL_SSE_OPT], [1],
     82                    [Enable Intel SSE optimizations]);;
     83       *)
     84          AC_MSG_ERROR([--enable-intel-sse=${enable_intel_sse}: invalid value])
     85    esac])
     86 
     87 # Add Intel specific files to all builds where the host_cpu is Intel ('x86*')
     88 # or where Intel optimizations were explicitly requested (this allows a
     89 # fallback if a future host CPU does not match 'x86*')
     90 AM_CONDITIONAL([PNG_INTEL_SSE],
     91    [test "$enable_intel_sse" != 'no' &&
     92     case "$host_cpu" in
     93       i?86|x86_64) :;;
     94       *)    test "$enable_intel_sse" != '';;
     95     esac])
     96 -----------------cut----------------
     97 
     98 3. Insert the following lines above the copyright line near the top of
     99 Makefile.am:
    100 
    101 -----------------cut----------------
    102 # Copyright (c) 2016 Google, Inc.
    103 # Written by Mike Klein and Matt Sarett
    104 # Derived from the ARM supporting code in libpng/configure.ac, which was
    105 -----------------cut----------------
    106 
    107 4. Add the following code to Makefile.am under HOST SPECIFIC OPTIONS
    108 directly beneath the "if PNG_ARM_NEON ... endif" statement:
    109 
    110 -----------------cut----------------
    111 if PNG_INTEL_SSE
    112 libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += contrib/intel/intel_init.c\
    113     contrib/intel/filter_sse2_intrinsics.c
    114 endif
    115 -----------------cut----------------
    116 
    117 5. Add the following lines to pngpriv.h, following the PNG_ARM_NEON_OPT
    118 code:
    119 
    120 -----------------cut----------------
    121 #ifndef PNG_INTEL_SSE_OPT
    122 #   ifdef PNG_INTEL_SSE
    123       /* Only check for SSE if the build configuration has been modified to
    124        * enable SSE optimizations.  This means that these optimizations will
    125        * be off by default.  See contrib/intel for more details.
    126        */
    127 #     if defined(__SSE4_1__) || defined(__AVX__) || defined(__SSSE3__) || \
    128        defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \
    129        (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
    130 #         define PNG_INTEL_SSE_OPT 1
    131 #      endif
    132 #   endif
    133 #endif
    134 
    135 #if PNG_INTEL_SSE_OPT > 0
    136 #   ifndef PNG_INTEL_SSE_IMPLEMENTATION
    137 #      if defined(__SSE4_1__) || defined(__AVX__)
    138           /* We are not actually using AVX, but checking for AVX is the best
    139              way we can detect SSE4.1 and SSSE3 on MSVC.
    140           */
    141 #         define PNG_INTEL_SSE_IMPLEMENTATION 3
    142 #      elif defined(__SSSE3__)
    143 #         define PNG_INTEL_SSE_IMPLEMENTATION 2
    144 #      elif defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \
    145        (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
    146 #         define PNG_INTEL_SSE_IMPLEMENTATION 1
    147 #      else
    148 #         define PNG_INTEL_SSE_IMPLEMENTATION 0
    149 #      endif
    150 #   endif
    151 
    152 #   if PNG_INTEL_SSE_IMPLEMENTATION > 0
    153 #      define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_sse2
    154 #   endif
    155 #endif
    156 
    157 -----------------cut----------------
    158 
    159 5. Add the following lines to pngpriv.h, following the prototype for
    160 png_read_filter_row_paeth4_neon:
    161 
    162 -----------------cut----------------
    163 PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_sse2,(png_row_infop
    164     row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
    165 PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_sse2,(png_row_infop
    166     row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
    167 PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_sse2,(png_row_infop
    168     row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
    169 PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_sse2,(png_row_infop
    170     row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
    171 PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_sse2,(png_row_infop
    172     row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
    173 PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_sse2,(png_row_infop
    174     row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
    175 
    176 -----------------cut----------------
    177