Home | History | Annotate | Download | only in i386
      1 ;
      2 ; jsimdcpu.asm - SIMD instruction support check
      3 ;
      4 ; Copyright 2009 Pierre Ossman <ossman (a] cendio.se> for Cendio AB
      5 ; Copyright (C) 2016, D. R. Commander.
      6 ;
      7 ; Based on the x86 SIMD extension for IJG JPEG library
      8 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
      9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
     10 ;
     11 ; This file should be assembled with NASM (Netwide Assembler),
     12 ; can *not* be assembled with Microsoft's MASM or any compatible
     13 ; assembler (including Borland's Turbo Assembler).
     14 ; NASM is available from http://nasm.sourceforge.net/ or
     15 ; http://sourceforge.net/project/showfiles.php?group_id=6208
     16 ;
     17 ; [TAB8]
     18 
     19 %include "jsimdext.inc"
     20 
     21 ; --------------------------------------------------------------------------
     22     SECTION     SEG_TEXT
     23     BITS        32
     24 ;
     25 ; Check if the CPU supports SIMD instructions
     26 ;
     27 ; GLOBAL(unsigned int)
     28 ; jpeg_simd_cpu_support(void)
     29 ;
     30 
     31     align       32
     32     GLOBAL_FUNCTION(jpeg_simd_cpu_support)
     33 
     34 EXTN(jpeg_simd_cpu_support):
     35     push        ebx
     36 ;   push        ecx                     ; need not be preserved
     37 ;   push        edx                     ; need not be preserved
     38 ;   push        esi                     ; unused
     39     push        edi
     40 
     41     xor         edi, edi                ; simd support flag
     42 
     43     pushfd
     44     pop         eax
     45     mov         edx, eax
     46     xor         eax, 1<<21              ; flip ID bit in EFLAGS
     47     push        eax
     48     popfd
     49     pushfd
     50     pop         eax
     51     xor         eax, edx
     52     jz          near .return            ; CPUID is not supported
     53 
     54     ; Check for MMX instruction support
     55     xor         eax, eax
     56     cpuid
     57     test        eax, eax
     58     jz          near .return
     59 
     60     xor         eax, eax
     61     inc         eax
     62     cpuid
     63     mov         eax, edx                ; eax = Standard feature flags
     64 
     65     test        eax, 1<<23              ; bit23:MMX
     66     jz          short .no_mmx
     67     or          edi, byte JSIMD_MMX
     68 .no_mmx:
     69     test        eax, 1<<25              ; bit25:SSE
     70     jz          short .no_sse
     71     or          edi, byte JSIMD_SSE
     72 .no_sse:
     73     test        eax, 1<<26              ; bit26:SSE2
     74     jz          short .no_sse2
     75     or          edi, byte JSIMD_SSE2
     76 .no_sse2:
     77 
     78     ; Check for AVX2 instruction support
     79     mov         eax, 7
     80     xor         ecx, ecx
     81     cpuid
     82     mov         eax, ebx
     83     test        eax, 1<<5               ; bit5:AVX2
     84     jz          short .no_avx2
     85 
     86     ; Check for AVX2 O/S support
     87     mov         eax, 1
     88     xor         ecx, ecx
     89     cpuid
     90     test        ecx, 1<<27
     91     jz          short .no_avx2          ; O/S does not support XSAVE
     92     test        ecx, 1<<28
     93     jz          short .no_avx2          ; CPU does not support AVX2
     94 
     95     xor         ecx, ecx
     96     xgetbv
     97     and         eax, 6
     98     cmp         eax, 6                  ; O/S does not manage XMM/YMM state
     99                                         ; using XSAVE
    100     jnz         short .no_avx2
    101 
    102     or          edi, JSIMD_AVX2
    103 .no_avx2:
    104 
    105     ; Check for 3DNow! instruction support
    106     mov         eax, 0x80000000
    107     cpuid
    108     cmp         eax, 0x80000000
    109     jbe         short .return
    110 
    111     mov         eax, 0x80000001
    112     cpuid
    113     mov         eax, edx                ; eax = Extended feature flags
    114 
    115     test        eax, 1<<31              ; bit31:3DNow!(vendor independent)
    116     jz          short .no_3dnow
    117     or          edi, byte JSIMD_3DNOW
    118 .no_3dnow:
    119 
    120 .return:
    121     mov         eax, edi
    122 
    123     pop         edi
    124 ;   pop         esi                     ; unused
    125 ;   pop         edx                     ; need not be preserved
    126 ;   pop         ecx                     ; need not be preserved
    127     pop         ebx
    128     ret
    129 
    130 ; For some reason, the OS X linker does not honor the request to align the
    131 ; segment unless we do this.
    132     align       32
    133