1 ; 2 ; jsimdcpu.asm - SIMD instruction support check 3 ; 4 ; Copyright 2009 Pierre Ossman <ossman (a] cendio.se> for Cendio AB 5 ; Copyright (C) 2016, D. R. Commander. 6 ; 7 ; Based on the x86 SIMD extension for IJG JPEG library 8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. 9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc 10 ; 11 ; This file should be assembled with NASM (Netwide Assembler), 12 ; can *not* be assembled with Microsoft's MASM or any compatible 13 ; assembler (including Borland's Turbo Assembler). 14 ; NASM is available from http://nasm.sourceforge.net/ or 15 ; http://sourceforge.net/project/showfiles.php?group_id=6208 16 ; 17 ; [TAB8] 18 19 %include "jsimdext.inc" 20 21 ; -------------------------------------------------------------------------- 22 SECTION SEG_TEXT 23 BITS 32 24 ; 25 ; Check if the CPU supports SIMD instructions 26 ; 27 ; GLOBAL(unsigned int) 28 ; jpeg_simd_cpu_support(void) 29 ; 30 31 align 32 32 GLOBAL_FUNCTION(jpeg_simd_cpu_support) 33 34 EXTN(jpeg_simd_cpu_support): 35 push ebx 36 ; push ecx ; need not be preserved 37 ; push edx ; need not be preserved 38 ; push esi ; unused 39 push edi 40 41 xor edi, edi ; simd support flag 42 43 pushfd 44 pop eax 45 mov edx, eax 46 xor eax, 1<<21 ; flip ID bit in EFLAGS 47 push eax 48 popfd 49 pushfd 50 pop eax 51 xor eax, edx 52 jz near .return ; CPUID is not supported 53 54 ; Check for MMX instruction support 55 xor eax, eax 56 cpuid 57 test eax, eax 58 jz near .return 59 60 xor eax, eax 61 inc eax 62 cpuid 63 mov eax, edx ; eax = Standard feature flags 64 65 test eax, 1<<23 ; bit23:MMX 66 jz short .no_mmx 67 or edi, byte JSIMD_MMX 68 .no_mmx: 69 test eax, 1<<25 ; bit25:SSE 70 jz short .no_sse 71 or edi, byte JSIMD_SSE 72 .no_sse: 73 test eax, 1<<26 ; bit26:SSE2 74 jz short .no_sse2 75 or edi, byte JSIMD_SSE2 76 .no_sse2: 77 78 ; Check for AVX2 instruction support 79 mov eax, 7 80 xor ecx, ecx 81 cpuid 82 mov eax, ebx 83 test eax, 1<<5 ; bit5:AVX2 84 jz short .no_avx2 85 86 ; Check for AVX2 O/S support 87 mov eax, 1 88 xor ecx, ecx 89 cpuid 90 test ecx, 1<<27 91 jz short .no_avx2 ; O/S does not support XSAVE 92 test ecx, 1<<28 93 jz short .no_avx2 ; CPU does not support AVX2 94 95 xor ecx, ecx 96 xgetbv 97 and eax, 6 98 cmp eax, 6 ; O/S does not manage XMM/YMM state 99 ; using XSAVE 100 jnz short .no_avx2 101 102 or edi, JSIMD_AVX2 103 .no_avx2: 104 105 ; Check for 3DNow! instruction support 106 mov eax, 0x80000000 107 cpuid 108 cmp eax, 0x80000000 109 jbe short .return 110 111 mov eax, 0x80000001 112 cpuid 113 mov eax, edx ; eax = Extended feature flags 114 115 test eax, 1<<31 ; bit31:3DNow!(vendor independent) 116 jz short .no_3dnow 117 or edi, byte JSIMD_3DNOW 118 .no_3dnow: 119 120 .return: 121 mov eax, edi 122 123 pop edi 124 ; pop esi ; unused 125 ; pop edx ; need not be preserved 126 ; pop ecx ; need not be preserved 127 pop ebx 128 ret 129 130 ; For some reason, the OS X linker does not honor the request to align the 131 ; segment unless we do this. 132 align 32 133