1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE 3 ; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1 4 ; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512 5 ; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE 6 ; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1 7 ; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512 8 9 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse2-builtins.c 10 11 define <2 x i64> @test_mm_add_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 12 ; SSE-LABEL: test_mm_add_epi8: 13 ; SSE: # %bb.0: 14 ; SSE-NEXT: paddb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfc,0xc1] 15 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 16 ; 17 ; AVX1-LABEL: test_mm_add_epi8: 18 ; AVX1: # %bb.0: 19 ; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1] 20 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 21 ; 22 ; AVX512-LABEL: test_mm_add_epi8: 23 ; AVX512: # %bb.0: 24 ; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] 25 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 26 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 27 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 28 %res = add <16 x i8> %arg0, %arg1 29 %bc = bitcast <16 x i8> %res to <2 x i64> 30 ret <2 x i64> %bc 31 } 32 33 define <2 x i64> @test_mm_add_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 34 ; SSE-LABEL: test_mm_add_epi16: 35 ; SSE: # %bb.0: 36 ; SSE-NEXT: paddw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfd,0xc1] 37 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 38 ; 39 ; AVX1-LABEL: test_mm_add_epi16: 40 ; AVX1: # %bb.0: 41 ; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1] 42 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 43 ; 44 ; AVX512-LABEL: test_mm_add_epi16: 45 ; AVX512: # %bb.0: 46 ; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1] 47 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 48 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 49 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 50 %res = add <8 x i16> %arg0, %arg1 51 %bc = bitcast <8 x i16> %res to <2 x i64> 52 ret <2 x i64> %bc 53 } 54 55 define <2 x i64> @test_mm_add_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind { 56 ; SSE-LABEL: test_mm_add_epi32: 57 ; SSE: # %bb.0: 58 ; SSE-NEXT: paddd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfe,0xc1] 59 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 60 ; 61 ; AVX1-LABEL: test_mm_add_epi32: 62 ; AVX1: # %bb.0: 63 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1] 64 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 65 ; 66 ; AVX512-LABEL: test_mm_add_epi32: 67 ; AVX512: # %bb.0: 68 ; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] 69 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 70 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 71 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 72 %res = add <4 x i32> %arg0, %arg1 73 %bc = bitcast <4 x i32> %res to <2 x i64> 74 ret <2 x i64> %bc 75 } 76 77 define <2 x i64> @test_mm_add_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind { 78 ; SSE-LABEL: test_mm_add_epi64: 79 ; SSE: # %bb.0: 80 ; SSE-NEXT: paddq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd4,0xc1] 81 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 82 ; 83 ; AVX1-LABEL: test_mm_add_epi64: 84 ; AVX1: # %bb.0: 85 ; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd4,0xc1] 86 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 87 ; 88 ; AVX512-LABEL: test_mm_add_epi64: 89 ; AVX512: # %bb.0: 90 ; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc1] 91 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 92 %res = add <2 x i64> %a0, %a1 93 ret <2 x i64> %res 94 } 95 96 define <2 x double> @test_mm_add_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 97 ; SSE-LABEL: test_mm_add_pd: 98 ; SSE: # %bb.0: 99 ; SSE-NEXT: addpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x58,0xc1] 100 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 101 ; 102 ; AVX1-LABEL: test_mm_add_pd: 103 ; AVX1: # %bb.0: 104 ; AVX1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x58,0xc1] 105 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 106 ; 107 ; AVX512-LABEL: test_mm_add_pd: 108 ; AVX512: # %bb.0: 109 ; AVX512-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1] 110 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 111 %res = fadd <2 x double> %a0, %a1 112 ret <2 x double> %res 113 } 114 115 define <2 x double> @test_mm_add_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 116 ; SSE-LABEL: test_mm_add_sd: 117 ; SSE: # %bb.0: 118 ; SSE-NEXT: addsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x58,0xc1] 119 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 120 ; 121 ; AVX1-LABEL: test_mm_add_sd: 122 ; AVX1: # %bb.0: 123 ; AVX1-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x58,0xc1] 124 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 125 ; 126 ; AVX512-LABEL: test_mm_add_sd: 127 ; AVX512: # %bb.0: 128 ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x58,0xc1] 129 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 130 %ext0 = extractelement <2 x double> %a0, i32 0 131 %ext1 = extractelement <2 x double> %a1, i32 0 132 %fadd = fadd double %ext0, %ext1 133 %res = insertelement <2 x double> %a0, double %fadd, i32 0 134 ret <2 x double> %res 135 } 136 137 define <2 x i64> @test_mm_adds_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 138 ; SSE-LABEL: test_mm_adds_epi8: 139 ; SSE: # %bb.0: 140 ; SSE-NEXT: paddsb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xec,0xc1] 141 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 142 ; 143 ; AVX1-LABEL: test_mm_adds_epi8: 144 ; AVX1: # %bb.0: 145 ; AVX1-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xec,0xc1] 146 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 147 ; 148 ; AVX512-LABEL: test_mm_adds_epi8: 149 ; AVX512: # %bb.0: 150 ; AVX512-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0xc1] 151 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 152 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 153 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 154 %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %arg0, <16 x i8> %arg1) 155 %bc = bitcast <16 x i8> %res to <2 x i64> 156 ret <2 x i64> %bc 157 } 158 declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone 159 160 define <2 x i64> @test_mm_adds_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 161 ; SSE-LABEL: test_mm_adds_epi16: 162 ; SSE: # %bb.0: 163 ; SSE-NEXT: paddsw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xed,0xc1] 164 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 165 ; 166 ; AVX1-LABEL: test_mm_adds_epi16: 167 ; AVX1: # %bb.0: 168 ; AVX1-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xed,0xc1] 169 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 170 ; 171 ; AVX512-LABEL: test_mm_adds_epi16: 172 ; AVX512: # %bb.0: 173 ; AVX512-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0xc1] 174 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 175 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 176 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 177 %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %arg0, <8 x i16> %arg1) 178 %bc = bitcast <8 x i16> %res to <2 x i64> 179 ret <2 x i64> %bc 180 } 181 declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone 182 183 define <2 x i64> @test_mm_adds_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 184 ; SSE-LABEL: test_mm_adds_epu8: 185 ; SSE: # %bb.0: 186 ; SSE-NEXT: paddusb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdc,0xc1] 187 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 188 ; 189 ; AVX1-LABEL: test_mm_adds_epu8: 190 ; AVX1: # %bb.0: 191 ; AVX1-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdc,0xc1] 192 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 193 ; 194 ; AVX512-LABEL: test_mm_adds_epu8: 195 ; AVX512: # %bb.0: 196 ; AVX512-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0xc1] 197 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 198 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 199 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 200 %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %arg0, <16 x i8> %arg1) 201 %bc = bitcast <16 x i8> %res to <2 x i64> 202 ret <2 x i64> %bc 203 } 204 declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone 205 206 define <2 x i64> @test_mm_adds_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 207 ; SSE-LABEL: test_mm_adds_epu16: 208 ; SSE: # %bb.0: 209 ; SSE-NEXT: paddusw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdd,0xc1] 210 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 211 ; 212 ; AVX1-LABEL: test_mm_adds_epu16: 213 ; AVX1: # %bb.0: 214 ; AVX1-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdd,0xc1] 215 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 216 ; 217 ; AVX512-LABEL: test_mm_adds_epu16: 218 ; AVX512: # %bb.0: 219 ; AVX512-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1] 220 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 221 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 222 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 223 %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %arg0, <8 x i16> %arg1) 224 %bc = bitcast <8 x i16> %res to <2 x i64> 225 ret <2 x i64> %bc 226 } 227 declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone 228 229 define <2 x double> @test_mm_and_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 230 ; SSE-LABEL: test_mm_and_pd: 231 ; SSE: # %bb.0: 232 ; SSE-NEXT: andps %xmm1, %xmm0 # encoding: [0x0f,0x54,0xc1] 233 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 234 ; 235 ; AVX1-LABEL: test_mm_and_pd: 236 ; AVX1: # %bb.0: 237 ; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0xc1] 238 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 239 ; 240 ; AVX512-LABEL: test_mm_and_pd: 241 ; AVX512: # %bb.0: 242 ; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0xc1] 243 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 244 %arg0 = bitcast <2 x double> %a0 to <4 x i32> 245 %arg1 = bitcast <2 x double> %a1 to <4 x i32> 246 %res = and <4 x i32> %arg0, %arg1 247 %bc = bitcast <4 x i32> %res to <2 x double> 248 ret <2 x double> %bc 249 } 250 251 define <2 x i64> @test_mm_and_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind { 252 ; SSE-LABEL: test_mm_and_si128: 253 ; SSE: # %bb.0: 254 ; SSE-NEXT: andps %xmm1, %xmm0 # encoding: [0x0f,0x54,0xc1] 255 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 256 ; 257 ; AVX1-LABEL: test_mm_and_si128: 258 ; AVX1: # %bb.0: 259 ; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0xc1] 260 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 261 ; 262 ; AVX512-LABEL: test_mm_and_si128: 263 ; AVX512: # %bb.0: 264 ; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0xc1] 265 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 266 %res = and <2 x i64> %a0, %a1 267 ret <2 x i64> %res 268 } 269 270 define <2 x double> @test_mm_andnot_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 271 ; SSE-LABEL: test_mm_andnot_pd: 272 ; SSE: # %bb.0: 273 ; SSE-NEXT: andnps %xmm1, %xmm0 # encoding: [0x0f,0x55,0xc1] 274 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 275 ; 276 ; AVX1-LABEL: test_mm_andnot_pd: 277 ; AVX1: # %bb.0: 278 ; AVX1-NEXT: vandnps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0xc1] 279 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 280 ; 281 ; AVX512-LABEL: test_mm_andnot_pd: 282 ; AVX512: # %bb.0: 283 ; AVX512-NEXT: vandnps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x55,0xc1] 284 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 285 %arg0 = bitcast <2 x double> %a0 to <4 x i32> 286 %arg1 = bitcast <2 x double> %a1 to <4 x i32> 287 %not = xor <4 x i32> %arg0, <i32 -1, i32 -1, i32 -1, i32 -1> 288 %res = and <4 x i32> %not, %arg1 289 %bc = bitcast <4 x i32> %res to <2 x double> 290 ret <2 x double> %bc 291 } 292 293 define <2 x i64> @test_mm_andnot_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind { 294 ; SSE-LABEL: test_mm_andnot_si128: 295 ; SSE: # %bb.0: 296 ; SSE-NEXT: pcmpeqd %xmm2, %xmm2 # encoding: [0x66,0x0f,0x76,0xd2] 297 ; SSE-NEXT: pxor %xmm2, %xmm0 # encoding: [0x66,0x0f,0xef,0xc2] 298 ; SSE-NEXT: pand %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdb,0xc1] 299 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 300 ; 301 ; AVX1-LABEL: test_mm_andnot_si128: 302 ; AVX1: # %bb.0: 303 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x76,0xd2] 304 ; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xef,0xc2] 305 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdb,0xc1] 306 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 307 ; 308 ; AVX512-LABEL: test_mm_andnot_si128: 309 ; AVX512: # %bb.0: 310 ; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x25,0xc0,0x0f] 311 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdb,0xc1] 312 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 313 %not = xor <2 x i64> %a0, <i64 -1, i64 -1> 314 %res = and <2 x i64> %not, %a1 315 ret <2 x i64> %res 316 } 317 318 define <2 x i64> @test_mm_avg_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 319 ; SSE-LABEL: test_mm_avg_epu8: 320 ; SSE: # %bb.0: 321 ; SSE-NEXT: pavgb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe0,0xc1] 322 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 323 ; 324 ; AVX1-LABEL: test_mm_avg_epu8: 325 ; AVX1: # %bb.0: 326 ; AVX1-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe0,0xc1] 327 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 328 ; 329 ; AVX512-LABEL: test_mm_avg_epu8: 330 ; AVX512: # %bb.0: 331 ; AVX512-NEXT: vpmovzxbw %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x30,0xc0] 332 ; AVX512-NEXT: # ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 333 ; AVX512-NEXT: vpmovzxbw %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x30,0xc9] 334 ; AVX512-NEXT: # ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 335 ; AVX512-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc1] 336 ; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0x76,0xc9] 337 ; AVX512-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf9,0xc1] 338 ; AVX512-NEXT: vpsrlw $1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xd0,0x01] 339 ; AVX512-NEXT: vpmovwb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x30,0xc0] 340 ; AVX512-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 341 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 342 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 343 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 344 %zext0 = zext <16 x i8> %arg0 to <16 x i16> 345 %zext1 = zext <16 x i8> %arg1 to <16 x i16> 346 %add = add <16 x i16> %zext0, %zext1 347 %add1 = add <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 348 %lshr = lshr <16 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 349 %res = trunc <16 x i16> %lshr to <16 x i8> 350 %bc = bitcast <16 x i8> %res to <2 x i64> 351 ret <2 x i64> %bc 352 } 353 354 define <2 x i64> @test_mm_avg_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 355 ; SSE-LABEL: test_mm_avg_epu16: 356 ; SSE: # %bb.0: 357 ; SSE-NEXT: pavgw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe3,0xc1] 358 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 359 ; 360 ; AVX1-LABEL: test_mm_avg_epu16: 361 ; AVX1: # %bb.0: 362 ; AVX1-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe3,0xc1] 363 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 364 ; 365 ; AVX512-LABEL: test_mm_avg_epu16: 366 ; AVX512: # %bb.0: 367 ; AVX512-NEXT: vpmovzxwd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x33,0xc0] 368 ; AVX512-NEXT: # ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 369 ; AVX512-NEXT: vpmovzxwd %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x33,0xc9] 370 ; AVX512-NEXT: # ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 371 ; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1] 372 ; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0x76,0xc9] 373 ; AVX512-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfa,0xc1] 374 ; AVX512-NEXT: vpsrld $1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xd0,0x01] 375 ; AVX512-NEXT: vpmovdw %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x33,0xc0] 376 ; AVX512-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 377 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 378 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 379 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 380 %zext0 = zext <8 x i16> %arg0 to <8 x i32> 381 %zext1 = zext <8 x i16> %arg1 to <8 x i32> 382 %add = add <8 x i32> %zext0, %zext1 383 %add1 = add <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 384 %lshr = lshr <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 385 %res = trunc <8 x i32> %lshr to <8 x i16> 386 %bc = bitcast <8 x i16> %res to <2 x i64> 387 ret <2 x i64> %bc 388 } 389 390 define <2 x i64> @test_mm_bslli_si128(<2 x i64> %a0) nounwind { 391 ; SSE-LABEL: test_mm_bslli_si128: 392 ; SSE: # %bb.0: 393 ; SSE-NEXT: pslldq $5, %xmm0 # encoding: [0x66,0x0f,0x73,0xf8,0x05] 394 ; SSE-NEXT: # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] 395 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 396 ; 397 ; AVX1-LABEL: test_mm_bslli_si128: 398 ; AVX1: # %bb.0: 399 ; AVX1-NEXT: vpslldq $5, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xf8,0x05] 400 ; AVX1-NEXT: # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] 401 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 402 ; 403 ; AVX512-LABEL: test_mm_bslli_si128: 404 ; AVX512: # %bb.0: 405 ; AVX512-NEXT: vpslldq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x05] 406 ; AVX512-NEXT: # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] 407 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 408 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 409 %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26> 410 %bc = bitcast <16 x i8> %res to <2 x i64> 411 ret <2 x i64> %bc 412 } 413 414 define <2 x i64> @test_mm_bsrli_si128(<2 x i64> %a0) nounwind { 415 ; SSE-LABEL: test_mm_bsrli_si128: 416 ; SSE: # %bb.0: 417 ; SSE-NEXT: psrldq $5, %xmm0 # encoding: [0x66,0x0f,0x73,0xd8,0x05] 418 ; SSE-NEXT: # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero 419 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 420 ; 421 ; AVX1-LABEL: test_mm_bsrli_si128: 422 ; AVX1: # %bb.0: 423 ; AVX1-NEXT: vpsrldq $5, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xd8,0x05] 424 ; AVX1-NEXT: # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero 425 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 426 ; 427 ; AVX512-LABEL: test_mm_bsrli_si128: 428 ; AVX512: # %bb.0: 429 ; AVX512-NEXT: vpsrldq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x05] 430 ; AVX512-NEXT: # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero 431 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 432 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 433 %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20> 434 %bc = bitcast <16 x i8> %res to <2 x i64> 435 ret <2 x i64> %bc 436 } 437 438 define <4 x float> @test_mm_castpd_ps(<2 x double> %a0) nounwind { 439 ; CHECK-LABEL: test_mm_castpd_ps: 440 ; CHECK: # %bb.0: 441 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 442 %res = bitcast <2 x double> %a0 to <4 x float> 443 ret <4 x float> %res 444 } 445 446 define <2 x i64> @test_mm_castpd_si128(<2 x double> %a0) nounwind { 447 ; CHECK-LABEL: test_mm_castpd_si128: 448 ; CHECK: # %bb.0: 449 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 450 %res = bitcast <2 x double> %a0 to <2 x i64> 451 ret <2 x i64> %res 452 } 453 454 define <2 x double> @test_mm_castps_pd(<4 x float> %a0) nounwind { 455 ; CHECK-LABEL: test_mm_castps_pd: 456 ; CHECK: # %bb.0: 457 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 458 %res = bitcast <4 x float> %a0 to <2 x double> 459 ret <2 x double> %res 460 } 461 462 define <2 x i64> @test_mm_castps_si128(<4 x float> %a0) nounwind { 463 ; CHECK-LABEL: test_mm_castps_si128: 464 ; CHECK: # %bb.0: 465 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 466 %res = bitcast <4 x float> %a0 to <2 x i64> 467 ret <2 x i64> %res 468 } 469 470 define <2 x double> @test_mm_castsi128_pd(<2 x i64> %a0) nounwind { 471 ; CHECK-LABEL: test_mm_castsi128_pd: 472 ; CHECK: # %bb.0: 473 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 474 %res = bitcast <2 x i64> %a0 to <2 x double> 475 ret <2 x double> %res 476 } 477 478 define <4 x float> @test_mm_castsi128_ps(<2 x i64> %a0) nounwind { 479 ; CHECK-LABEL: test_mm_castsi128_ps: 480 ; CHECK: # %bb.0: 481 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 482 %res = bitcast <2 x i64> %a0 to <4 x float> 483 ret <4 x float> %res 484 } 485 486 define void @test_mm_clflush(i8* %a0) nounwind { 487 ; X86-LABEL: test_mm_clflush: 488 ; X86: # %bb.0: 489 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 490 ; X86-NEXT: clflush (%eax) # encoding: [0x0f,0xae,0x38] 491 ; X86-NEXT: retl # encoding: [0xc3] 492 ; 493 ; X64-LABEL: test_mm_clflush: 494 ; X64: # %bb.0: 495 ; X64-NEXT: clflush (%rdi) # encoding: [0x0f,0xae,0x3f] 496 ; X64-NEXT: retq # encoding: [0xc3] 497 call void @llvm.x86.sse2.clflush(i8* %a0) 498 ret void 499 } 500 declare void @llvm.x86.sse2.clflush(i8*) nounwind readnone 501 502 define <2 x i64> @test_mm_cmpeq_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 503 ; SSE-LABEL: test_mm_cmpeq_epi8: 504 ; SSE: # %bb.0: 505 ; SSE-NEXT: pcmpeqb %xmm1, %xmm0 # encoding: [0x66,0x0f,0x74,0xc1] 506 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 507 ; 508 ; AVX1-LABEL: test_mm_cmpeq_epi8: 509 ; AVX1: # %bb.0: 510 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x74,0xc1] 511 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 512 ; 513 ; AVX512-LABEL: test_mm_cmpeq_epi8: 514 ; AVX512: # %bb.0: 515 ; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1] 516 ; AVX512-NEXT: vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0] 517 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 518 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 519 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 520 %cmp = icmp eq <16 x i8> %arg0, %arg1 521 %res = sext <16 x i1> %cmp to <16 x i8> 522 %bc = bitcast <16 x i8> %res to <2 x i64> 523 ret <2 x i64> %bc 524 } 525 526 define <2 x i64> @test_mm_cmpeq_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 527 ; SSE-LABEL: test_mm_cmpeq_epi16: 528 ; SSE: # %bb.0: 529 ; SSE-NEXT: pcmpeqw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x75,0xc1] 530 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 531 ; 532 ; AVX1-LABEL: test_mm_cmpeq_epi16: 533 ; AVX1: # %bb.0: 534 ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x75,0xc1] 535 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 536 ; 537 ; AVX512-LABEL: test_mm_cmpeq_epi16: 538 ; AVX512: # %bb.0: 539 ; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1] 540 ; AVX512-NEXT: vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0] 541 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 542 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 543 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 544 %cmp = icmp eq <8 x i16> %arg0, %arg1 545 %res = sext <8 x i1> %cmp to <8 x i16> 546 %bc = bitcast <8 x i16> %res to <2 x i64> 547 ret <2 x i64> %bc 548 } 549 550 define <2 x i64> @test_mm_cmpeq_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind { 551 ; SSE-LABEL: test_mm_cmpeq_epi32: 552 ; SSE: # %bb.0: 553 ; SSE-NEXT: pcmpeqd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x76,0xc1] 554 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 555 ; 556 ; AVX1-LABEL: test_mm_cmpeq_epi32: 557 ; AVX1: # %bb.0: 558 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x76,0xc1] 559 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 560 ; 561 ; AVX512-LABEL: test_mm_cmpeq_epi32: 562 ; AVX512: # %bb.0: 563 ; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1] 564 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 565 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 566 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 567 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 568 %cmp = icmp eq <4 x i32> %arg0, %arg1 569 %res = sext <4 x i1> %cmp to <4 x i32> 570 %bc = bitcast <4 x i32> %res to <2 x i64> 571 ret <2 x i64> %bc 572 } 573 574 define <2 x double> @test_mm_cmpeq_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 575 ; SSE-LABEL: test_mm_cmpeq_pd: 576 ; SSE: # %bb.0: 577 ; SSE-NEXT: cmpeqpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x00] 578 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 579 ; 580 ; AVX1-LABEL: test_mm_cmpeq_pd: 581 ; AVX1: # %bb.0: 582 ; AVX1-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x00] 583 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 584 ; 585 ; AVX512-LABEL: test_mm_cmpeq_pd: 586 ; AVX512: # %bb.0: 587 ; AVX512-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x00] 588 ; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 589 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 590 %fcmp = fcmp oeq <2 x double> %a0, %a1 591 %sext = sext <2 x i1> %fcmp to <2 x i64> 592 %res = bitcast <2 x i64> %sext to <2 x double> 593 ret <2 x double> %res 594 } 595 596 define <2 x double> @test_mm_cmpeq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 597 ; SSE-LABEL: test_mm_cmpeq_sd: 598 ; SSE: # %bb.0: 599 ; SSE-NEXT: cmpeqsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x00] 600 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 601 ; 602 ; AVX-LABEL: test_mm_cmpeq_sd: 603 ; AVX: # %bb.0: 604 ; AVX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x00] 605 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 606 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0) 607 ret <2 x double> %res 608 } 609 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone 610 611 define <2 x double> @test_mm_cmpge_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 612 ; SSE-LABEL: test_mm_cmpge_pd: 613 ; SSE: # %bb.0: 614 ; SSE-NEXT: cmplepd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x02] 615 ; SSE-NEXT: movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1] 616 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 617 ; 618 ; AVX1-LABEL: test_mm_cmpge_pd: 619 ; AVX1: # %bb.0: 620 ; AVX1-NEXT: vcmplepd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x02] 621 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 622 ; 623 ; AVX512-LABEL: test_mm_cmpge_pd: 624 ; AVX512: # %bb.0: 625 ; AVX512-NEXT: vcmplepd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x02] 626 ; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 627 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 628 %fcmp = fcmp ole <2 x double> %a1, %a0 629 %sext = sext <2 x i1> %fcmp to <2 x i64> 630 %res = bitcast <2 x i64> %sext to <2 x double> 631 ret <2 x double> %res 632 } 633 634 define <2 x double> @test_mm_cmpge_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 635 ; SSE-LABEL: test_mm_cmpge_sd: 636 ; SSE: # %bb.0: 637 ; SSE-NEXT: cmplesd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x02] 638 ; SSE-NEXT: movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1] 639 ; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1] 640 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 641 ; 642 ; AVX-LABEL: test_mm_cmpge_sd: 643 ; AVX: # %bb.0: 644 ; AVX-NEXT: vcmplesd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x02] 645 ; AVX-NEXT: vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01] 646 ; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1] 647 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 648 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 2) 649 %ext0 = extractelement <2 x double> %cmp, i32 0 650 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0 651 %ext1 = extractelement <2 x double> %a0, i32 1 652 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1 653 ret <2 x double> %ins1 654 } 655 656 define <2 x i64> @test_mm_cmpgt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 657 ; SSE-LABEL: test_mm_cmpgt_epi8: 658 ; SSE: # %bb.0: 659 ; SSE-NEXT: pcmpgtb %xmm1, %xmm0 # encoding: [0x66,0x0f,0x64,0xc1] 660 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 661 ; 662 ; AVX1-LABEL: test_mm_cmpgt_epi8: 663 ; AVX1: # %bb.0: 664 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x64,0xc1] 665 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 666 ; 667 ; AVX512-LABEL: test_mm_cmpgt_epi8: 668 ; AVX512: # %bb.0: 669 ; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xc1] 670 ; AVX512-NEXT: vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0] 671 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 672 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 673 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 674 %cmp = icmp sgt <16 x i8> %arg0, %arg1 675 %res = sext <16 x i1> %cmp to <16 x i8> 676 %bc = bitcast <16 x i8> %res to <2 x i64> 677 ret <2 x i64> %bc 678 } 679 680 define <2 x i64> @test_mm_cmpgt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 681 ; SSE-LABEL: test_mm_cmpgt_epi16: 682 ; SSE: # %bb.0: 683 ; SSE-NEXT: pcmpgtw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x65,0xc1] 684 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 685 ; 686 ; AVX1-LABEL: test_mm_cmpgt_epi16: 687 ; AVX1: # %bb.0: 688 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x65,0xc1] 689 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 690 ; 691 ; AVX512-LABEL: test_mm_cmpgt_epi16: 692 ; AVX512: # %bb.0: 693 ; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xc1] 694 ; AVX512-NEXT: vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0] 695 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 696 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 697 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 698 %cmp = icmp sgt <8 x i16> %arg0, %arg1 699 %res = sext <8 x i1> %cmp to <8 x i16> 700 %bc = bitcast <8 x i16> %res to <2 x i64> 701 ret <2 x i64> %bc 702 } 703 704 define <2 x i64> @test_mm_cmpgt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind { 705 ; SSE-LABEL: test_mm_cmpgt_epi32: 706 ; SSE: # %bb.0: 707 ; SSE-NEXT: pcmpgtd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x66,0xc1] 708 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 709 ; 710 ; AVX1-LABEL: test_mm_cmpgt_epi32: 711 ; AVX1: # %bb.0: 712 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x66,0xc1] 713 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 714 ; 715 ; AVX512-LABEL: test_mm_cmpgt_epi32: 716 ; AVX512: # %bb.0: 717 ; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x66,0xc1] 718 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 719 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 720 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 721 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 722 %cmp = icmp sgt <4 x i32> %arg0, %arg1 723 %res = sext <4 x i1> %cmp to <4 x i32> 724 %bc = bitcast <4 x i32> %res to <2 x i64> 725 ret <2 x i64> %bc 726 } 727 728 define <2 x double> @test_mm_cmpgt_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 729 ; SSE-LABEL: test_mm_cmpgt_pd: 730 ; SSE: # %bb.0: 731 ; SSE-NEXT: cmpltpd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x01] 732 ; SSE-NEXT: movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1] 733 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 734 ; 735 ; AVX1-LABEL: test_mm_cmpgt_pd: 736 ; AVX1: # %bb.0: 737 ; AVX1-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x01] 738 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 739 ; 740 ; AVX512-LABEL: test_mm_cmpgt_pd: 741 ; AVX512: # %bb.0: 742 ; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x01] 743 ; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 744 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 745 %fcmp = fcmp olt <2 x double> %a1, %a0 746 %sext = sext <2 x i1> %fcmp to <2 x i64> 747 %res = bitcast <2 x i64> %sext to <2 x double> 748 ret <2 x double> %res 749 } 750 751 define <2 x double> @test_mm_cmpgt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 752 ; SSE-LABEL: test_mm_cmpgt_sd: 753 ; SSE: # %bb.0: 754 ; SSE-NEXT: cmpltsd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x01] 755 ; SSE-NEXT: movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1] 756 ; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1] 757 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 758 ; 759 ; AVX-LABEL: test_mm_cmpgt_sd: 760 ; AVX: # %bb.0: 761 ; AVX-NEXT: vcmpltsd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x01] 762 ; AVX-NEXT: vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01] 763 ; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1] 764 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 765 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 1) 766 %ext0 = extractelement <2 x double> %cmp, i32 0 767 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0 768 %ext1 = extractelement <2 x double> %a0, i32 1 769 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1 770 ret <2 x double> %ins1 771 } 772 773 define <2 x double> @test_mm_cmple_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 774 ; SSE-LABEL: test_mm_cmple_pd: 775 ; SSE: # %bb.0: 776 ; SSE-NEXT: cmplepd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x02] 777 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 778 ; 779 ; AVX1-LABEL: test_mm_cmple_pd: 780 ; AVX1: # %bb.0: 781 ; AVX1-NEXT: vcmplepd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x02] 782 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 783 ; 784 ; AVX512-LABEL: test_mm_cmple_pd: 785 ; AVX512: # %bb.0: 786 ; AVX512-NEXT: vcmplepd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x02] 787 ; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 788 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 789 %fcmp = fcmp ole <2 x double> %a0, %a1 790 %sext = sext <2 x i1> %fcmp to <2 x i64> 791 %res = bitcast <2 x i64> %sext to <2 x double> 792 ret <2 x double> %res 793 } 794 795 define <2 x double> @test_mm_cmple_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 796 ; SSE-LABEL: test_mm_cmple_sd: 797 ; SSE: # %bb.0: 798 ; SSE-NEXT: cmplesd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x02] 799 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 800 ; 801 ; AVX-LABEL: test_mm_cmple_sd: 802 ; AVX: # %bb.0: 803 ; AVX-NEXT: vcmplesd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x02] 804 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 805 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 2) 806 ret <2 x double> %res 807 } 808 809 define <2 x i64> @test_mm_cmplt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 810 ; SSE-LABEL: test_mm_cmplt_epi8: 811 ; SSE: # %bb.0: 812 ; SSE-NEXT: pcmpgtb %xmm0, %xmm1 # encoding: [0x66,0x0f,0x64,0xc8] 813 ; SSE-NEXT: movdqa %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6f,0xc1] 814 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 815 ; 816 ; AVX1-LABEL: test_mm_cmplt_epi8: 817 ; AVX1: # %bb.0: 818 ; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x64,0xc0] 819 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 820 ; 821 ; AVX512-LABEL: test_mm_cmplt_epi8: 822 ; AVX512: # %bb.0: 823 ; AVX512-NEXT: vpcmpgtb %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x75,0x08,0x64,0xc0] 824 ; AVX512-NEXT: vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0] 825 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 826 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 827 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 828 %cmp = icmp sgt <16 x i8> %arg1, %arg0 829 %res = sext <16 x i1> %cmp to <16 x i8> 830 %bc = bitcast <16 x i8> %res to <2 x i64> 831 ret <2 x i64> %bc 832 } 833 834 define <2 x i64> @test_mm_cmplt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 835 ; SSE-LABEL: test_mm_cmplt_epi16: 836 ; SSE: # %bb.0: 837 ; SSE-NEXT: pcmpgtw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x65,0xc8] 838 ; SSE-NEXT: movdqa %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6f,0xc1] 839 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 840 ; 841 ; AVX1-LABEL: test_mm_cmplt_epi16: 842 ; AVX1: # %bb.0: 843 ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x65,0xc0] 844 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 845 ; 846 ; AVX512-LABEL: test_mm_cmplt_epi16: 847 ; AVX512: # %bb.0: 848 ; AVX512-NEXT: vpcmpgtw %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x75,0x08,0x65,0xc0] 849 ; AVX512-NEXT: vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0] 850 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 851 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 852 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 853 %cmp = icmp sgt <8 x i16> %arg1, %arg0 854 %res = sext <8 x i1> %cmp to <8 x i16> 855 %bc = bitcast <8 x i16> %res to <2 x i64> 856 ret <2 x i64> %bc 857 } 858 859 define <2 x i64> @test_mm_cmplt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind { 860 ; SSE-LABEL: test_mm_cmplt_epi32: 861 ; SSE: # %bb.0: 862 ; SSE-NEXT: pcmpgtd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x66,0xc8] 863 ; SSE-NEXT: movdqa %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6f,0xc1] 864 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 865 ; 866 ; AVX1-LABEL: test_mm_cmplt_epi32: 867 ; AVX1: # %bb.0: 868 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x66,0xc0] 869 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 870 ; 871 ; AVX512-LABEL: test_mm_cmplt_epi32: 872 ; AVX512: # %bb.0: 873 ; AVX512-NEXT: vpcmpgtd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x75,0x08,0x66,0xc0] 874 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 875 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 876 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 877 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 878 %cmp = icmp sgt <4 x i32> %arg1, %arg0 879 %res = sext <4 x i1> %cmp to <4 x i32> 880 %bc = bitcast <4 x i32> %res to <2 x i64> 881 ret <2 x i64> %bc 882 } 883 884 define <2 x double> @test_mm_cmplt_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 885 ; SSE-LABEL: test_mm_cmplt_pd: 886 ; SSE: # %bb.0: 887 ; SSE-NEXT: cmpltpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x01] 888 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 889 ; 890 ; AVX1-LABEL: test_mm_cmplt_pd: 891 ; AVX1: # %bb.0: 892 ; AVX1-NEXT: vcmpltpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x01] 893 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 894 ; 895 ; AVX512-LABEL: test_mm_cmplt_pd: 896 ; AVX512: # %bb.0: 897 ; AVX512-NEXT: vcmpltpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x01] 898 ; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 899 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 900 %fcmp = fcmp olt <2 x double> %a0, %a1 901 %sext = sext <2 x i1> %fcmp to <2 x i64> 902 %res = bitcast <2 x i64> %sext to <2 x double> 903 ret <2 x double> %res 904 } 905 906 define <2 x double> @test_mm_cmplt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 907 ; SSE-LABEL: test_mm_cmplt_sd: 908 ; SSE: # %bb.0: 909 ; SSE-NEXT: cmpltsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x01] 910 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 911 ; 912 ; AVX-LABEL: test_mm_cmplt_sd: 913 ; AVX: # %bb.0: 914 ; AVX-NEXT: vcmpltsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x01] 915 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 916 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 1) 917 ret <2 x double> %res 918 } 919 920 define <2 x double> @test_mm_cmpneq_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 921 ; SSE-LABEL: test_mm_cmpneq_pd: 922 ; SSE: # %bb.0: 923 ; SSE-NEXT: cmpneqpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x04] 924 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 925 ; 926 ; AVX1-LABEL: test_mm_cmpneq_pd: 927 ; AVX1: # %bb.0: 928 ; AVX1-NEXT: vcmpneqpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x04] 929 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 930 ; 931 ; AVX512-LABEL: test_mm_cmpneq_pd: 932 ; AVX512: # %bb.0: 933 ; AVX512-NEXT: vcmpneqpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x04] 934 ; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 935 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 936 %fcmp = fcmp une <2 x double> %a0, %a1 937 %sext = sext <2 x i1> %fcmp to <2 x i64> 938 %res = bitcast <2 x i64> %sext to <2 x double> 939 ret <2 x double> %res 940 } 941 942 define <2 x double> @test_mm_cmpneq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 943 ; SSE-LABEL: test_mm_cmpneq_sd: 944 ; SSE: # %bb.0: 945 ; SSE-NEXT: cmpneqsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x04] 946 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 947 ; 948 ; AVX-LABEL: test_mm_cmpneq_sd: 949 ; AVX: # %bb.0: 950 ; AVX-NEXT: vcmpneqsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x04] 951 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 952 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 4) 953 ret <2 x double> %res 954 } 955 956 define <2 x double> @test_mm_cmpnge_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 957 ; SSE-LABEL: test_mm_cmpnge_pd: 958 ; SSE: # %bb.0: 959 ; SSE-NEXT: cmpnlepd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x06] 960 ; SSE-NEXT: movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1] 961 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 962 ; 963 ; AVX1-LABEL: test_mm_cmpnge_pd: 964 ; AVX1: # %bb.0: 965 ; AVX1-NEXT: vcmpnlepd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x06] 966 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 967 ; 968 ; AVX512-LABEL: test_mm_cmpnge_pd: 969 ; AVX512: # %bb.0: 970 ; AVX512-NEXT: vcmpnlepd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x06] 971 ; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 972 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 973 %fcmp = fcmp ugt <2 x double> %a1, %a0 974 %sext = sext <2 x i1> %fcmp to <2 x i64> 975 %res = bitcast <2 x i64> %sext to <2 x double> 976 ret <2 x double> %res 977 } 978 979 define <2 x double> @test_mm_cmpnge_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 980 ; SSE-LABEL: test_mm_cmpnge_sd: 981 ; SSE: # %bb.0: 982 ; SSE-NEXT: cmpnlesd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x06] 983 ; SSE-NEXT: movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1] 984 ; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1] 985 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 986 ; 987 ; AVX-LABEL: test_mm_cmpnge_sd: 988 ; AVX: # %bb.0: 989 ; AVX-NEXT: vcmpnlesd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x06] 990 ; AVX-NEXT: vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01] 991 ; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1] 992 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 993 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 6) 994 %ext0 = extractelement <2 x double> %cmp, i32 0 995 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0 996 %ext1 = extractelement <2 x double> %a0, i32 1 997 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1 998 ret <2 x double> %ins1 999 } 1000 1001 define <2 x double> @test_mm_cmpngt_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 1002 ; SSE-LABEL: test_mm_cmpngt_pd: 1003 ; SSE: # %bb.0: 1004 ; SSE-NEXT: cmpnltpd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x05] 1005 ; SSE-NEXT: movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1] 1006 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1007 ; 1008 ; AVX1-LABEL: test_mm_cmpngt_pd: 1009 ; AVX1: # %bb.0: 1010 ; AVX1-NEXT: vcmpnltpd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x05] 1011 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1012 ; 1013 ; AVX512-LABEL: test_mm_cmpngt_pd: 1014 ; AVX512: # %bb.0: 1015 ; AVX512-NEXT: vcmpnltpd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x05] 1016 ; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 1017 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1018 %fcmp = fcmp uge <2 x double> %a1, %a0 1019 %sext = sext <2 x i1> %fcmp to <2 x i64> 1020 %res = bitcast <2 x i64> %sext to <2 x double> 1021 ret <2 x double> %res 1022 } 1023 1024 define <2 x double> @test_mm_cmpngt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1025 ; SSE-LABEL: test_mm_cmpngt_sd: 1026 ; SSE: # %bb.0: 1027 ; SSE-NEXT: cmpnltsd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x05] 1028 ; SSE-NEXT: movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1] 1029 ; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1] 1030 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1031 ; 1032 ; AVX-LABEL: test_mm_cmpngt_sd: 1033 ; AVX: # %bb.0: 1034 ; AVX-NEXT: vcmpnltsd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x05] 1035 ; AVX-NEXT: vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01] 1036 ; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1] 1037 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1038 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 5) 1039 %ext0 = extractelement <2 x double> %cmp, i32 0 1040 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0 1041 %ext1 = extractelement <2 x double> %a0, i32 1 1042 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1 1043 ret <2 x double> %ins1 1044 } 1045 1046 define <2 x double> @test_mm_cmpnle_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 1047 ; SSE-LABEL: test_mm_cmpnle_pd: 1048 ; SSE: # %bb.0: 1049 ; SSE-NEXT: cmpnlepd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x06] 1050 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1051 ; 1052 ; AVX1-LABEL: test_mm_cmpnle_pd: 1053 ; AVX1: # %bb.0: 1054 ; AVX1-NEXT: vcmpnlepd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x06] 1055 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1056 ; 1057 ; AVX512-LABEL: test_mm_cmpnle_pd: 1058 ; AVX512: # %bb.0: 1059 ; AVX512-NEXT: vcmpnlepd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x06] 1060 ; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 1061 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1062 %fcmp = fcmp ugt <2 x double> %a0, %a1 1063 %sext = sext <2 x i1> %fcmp to <2 x i64> 1064 %res = bitcast <2 x i64> %sext to <2 x double> 1065 ret <2 x double> %res 1066 } 1067 1068 define <2 x double> @test_mm_cmpnle_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1069 ; SSE-LABEL: test_mm_cmpnle_sd: 1070 ; SSE: # %bb.0: 1071 ; SSE-NEXT: cmpnlesd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x06] 1072 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1073 ; 1074 ; AVX-LABEL: test_mm_cmpnle_sd: 1075 ; AVX: # %bb.0: 1076 ; AVX-NEXT: vcmpnlesd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x06] 1077 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1078 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 6) 1079 ret <2 x double> %res 1080 } 1081 1082 define <2 x double> @test_mm_cmpnlt_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 1083 ; SSE-LABEL: test_mm_cmpnlt_pd: 1084 ; SSE: # %bb.0: 1085 ; SSE-NEXT: cmpnltpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x05] 1086 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1087 ; 1088 ; AVX1-LABEL: test_mm_cmpnlt_pd: 1089 ; AVX1: # %bb.0: 1090 ; AVX1-NEXT: vcmpnltpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x05] 1091 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1092 ; 1093 ; AVX512-LABEL: test_mm_cmpnlt_pd: 1094 ; AVX512: # %bb.0: 1095 ; AVX512-NEXT: vcmpnltpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x05] 1096 ; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 1097 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1098 %fcmp = fcmp uge <2 x double> %a0, %a1 1099 %sext = sext <2 x i1> %fcmp to <2 x i64> 1100 %res = bitcast <2 x i64> %sext to <2 x double> 1101 ret <2 x double> %res 1102 } 1103 1104 define <2 x double> @test_mm_cmpnlt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1105 ; SSE-LABEL: test_mm_cmpnlt_sd: 1106 ; SSE: # %bb.0: 1107 ; SSE-NEXT: cmpnltsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x05] 1108 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1109 ; 1110 ; AVX-LABEL: test_mm_cmpnlt_sd: 1111 ; AVX: # %bb.0: 1112 ; AVX-NEXT: vcmpnltsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x05] 1113 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1114 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 5) 1115 ret <2 x double> %res 1116 } 1117 1118 define <2 x double> @test_mm_cmpord_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 1119 ; SSE-LABEL: test_mm_cmpord_pd: 1120 ; SSE: # %bb.0: 1121 ; SSE-NEXT: cmpordpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x07] 1122 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1123 ; 1124 ; AVX1-LABEL: test_mm_cmpord_pd: 1125 ; AVX1: # %bb.0: 1126 ; AVX1-NEXT: vcmpordpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x07] 1127 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1128 ; 1129 ; AVX512-LABEL: test_mm_cmpord_pd: 1130 ; AVX512: # %bb.0: 1131 ; AVX512-NEXT: vcmpordpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x07] 1132 ; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 1133 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1134 %fcmp = fcmp ord <2 x double> %a0, %a1 1135 %sext = sext <2 x i1> %fcmp to <2 x i64> 1136 %res = bitcast <2 x i64> %sext to <2 x double> 1137 ret <2 x double> %res 1138 } 1139 1140 define <2 x double> @test_mm_cmpord_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1141 ; SSE-LABEL: test_mm_cmpord_sd: 1142 ; SSE: # %bb.0: 1143 ; SSE-NEXT: cmpordsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x07] 1144 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1145 ; 1146 ; AVX-LABEL: test_mm_cmpord_sd: 1147 ; AVX: # %bb.0: 1148 ; AVX-NEXT: vcmpordsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x07] 1149 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1150 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) 1151 ret <2 x double> %res 1152 } 1153 1154 define <2 x double> @test_mm_cmpunord_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 1155 ; SSE-LABEL: test_mm_cmpunord_pd: 1156 ; SSE: # %bb.0: 1157 ; SSE-NEXT: cmpunordpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x03] 1158 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1159 ; 1160 ; AVX1-LABEL: test_mm_cmpunord_pd: 1161 ; AVX1: # %bb.0: 1162 ; AVX1-NEXT: vcmpunordpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x03] 1163 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1164 ; 1165 ; AVX512-LABEL: test_mm_cmpunord_pd: 1166 ; AVX512: # %bb.0: 1167 ; AVX512-NEXT: vcmpunordpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x03] 1168 ; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 1169 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1170 %fcmp = fcmp uno <2 x double> %a0, %a1 1171 %sext = sext <2 x i1> %fcmp to <2 x i64> 1172 %res = bitcast <2 x i64> %sext to <2 x double> 1173 ret <2 x double> %res 1174 } 1175 1176 define <2 x double> @test_mm_cmpunord_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1177 ; SSE-LABEL: test_mm_cmpunord_sd: 1178 ; SSE: # %bb.0: 1179 ; SSE-NEXT: cmpunordsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x03] 1180 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1181 ; 1182 ; AVX-LABEL: test_mm_cmpunord_sd: 1183 ; AVX: # %bb.0: 1184 ; AVX-NEXT: vcmpunordsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x03] 1185 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1186 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 3) 1187 ret <2 x double> %res 1188 } 1189 1190 define i32 @test_mm_comieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1191 ; SSE-LABEL: test_mm_comieq_sd: 1192 ; SSE: # %bb.0: 1193 ; SSE-NEXT: comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1] 1194 ; SSE-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 1195 ; SSE-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 1196 ; SSE-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 1197 ; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 1198 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1199 ; 1200 ; AVX1-LABEL: test_mm_comieq_sd: 1201 ; AVX1: # %bb.0: 1202 ; AVX1-NEXT: vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1] 1203 ; AVX1-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 1204 ; AVX1-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 1205 ; AVX1-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 1206 ; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 1207 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1208 ; 1209 ; AVX512-LABEL: test_mm_comieq_sd: 1210 ; AVX512: # %bb.0: 1211 ; AVX512-NEXT: vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1] 1212 ; AVX512-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 1213 ; AVX512-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 1214 ; AVX512-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 1215 ; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 1216 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1217 %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) 1218 ret i32 %res 1219 } 1220 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone 1221 1222 define i32 @test_mm_comige_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1223 ; SSE-LABEL: test_mm_comige_sd: 1224 ; SSE: # %bb.0: 1225 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1226 ; SSE-NEXT: comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1] 1227 ; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 1228 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1229 ; 1230 ; AVX1-LABEL: test_mm_comige_sd: 1231 ; AVX1: # %bb.0: 1232 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1233 ; AVX1-NEXT: vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1] 1234 ; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 1235 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1236 ; 1237 ; AVX512-LABEL: test_mm_comige_sd: 1238 ; AVX512: # %bb.0: 1239 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1240 ; AVX512-NEXT: vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1] 1241 ; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 1242 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1243 %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) 1244 ret i32 %res 1245 } 1246 declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone 1247 1248 define i32 @test_mm_comigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1249 ; SSE-LABEL: test_mm_comigt_sd: 1250 ; SSE: # %bb.0: 1251 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1252 ; SSE-NEXT: comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1] 1253 ; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 1254 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1255 ; 1256 ; AVX1-LABEL: test_mm_comigt_sd: 1257 ; AVX1: # %bb.0: 1258 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1259 ; AVX1-NEXT: vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1] 1260 ; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 1261 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1262 ; 1263 ; AVX512-LABEL: test_mm_comigt_sd: 1264 ; AVX512: # %bb.0: 1265 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1266 ; AVX512-NEXT: vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1] 1267 ; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 1268 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1269 %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) 1270 ret i32 %res 1271 } 1272 declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone 1273 1274 define i32 @test_mm_comile_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1275 ; SSE-LABEL: test_mm_comile_sd: 1276 ; SSE: # %bb.0: 1277 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1278 ; SSE-NEXT: comisd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x2f,0xc8] 1279 ; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 1280 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1281 ; 1282 ; AVX1-LABEL: test_mm_comile_sd: 1283 ; AVX1: # %bb.0: 1284 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1285 ; AVX1-NEXT: vcomisd %xmm0, %xmm1 # encoding: [0xc5,0xf9,0x2f,0xc8] 1286 ; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 1287 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1288 ; 1289 ; AVX512-LABEL: test_mm_comile_sd: 1290 ; AVX512: # %bb.0: 1291 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1292 ; AVX512-NEXT: vcomisd %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc8] 1293 ; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 1294 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1295 %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) 1296 ret i32 %res 1297 } 1298 declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone 1299 1300 define i32 @test_mm_comilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1301 ; SSE-LABEL: test_mm_comilt_sd: 1302 ; SSE: # %bb.0: 1303 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1304 ; SSE-NEXT: comisd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x2f,0xc8] 1305 ; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 1306 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1307 ; 1308 ; AVX1-LABEL: test_mm_comilt_sd: 1309 ; AVX1: # %bb.0: 1310 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1311 ; AVX1-NEXT: vcomisd %xmm0, %xmm1 # encoding: [0xc5,0xf9,0x2f,0xc8] 1312 ; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 1313 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1314 ; 1315 ; AVX512-LABEL: test_mm_comilt_sd: 1316 ; AVX512: # %bb.0: 1317 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1318 ; AVX512-NEXT: vcomisd %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc8] 1319 ; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 1320 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1321 %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) 1322 ret i32 %res 1323 } 1324 declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone 1325 1326 define i32 @test_mm_comineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1327 ; SSE-LABEL: test_mm_comineq_sd: 1328 ; SSE: # %bb.0: 1329 ; SSE-NEXT: comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1] 1330 ; SSE-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 1331 ; SSE-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 1332 ; SSE-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 1333 ; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 1334 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1335 ; 1336 ; AVX1-LABEL: test_mm_comineq_sd: 1337 ; AVX1: # %bb.0: 1338 ; AVX1-NEXT: vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1] 1339 ; AVX1-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 1340 ; AVX1-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 1341 ; AVX1-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 1342 ; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 1343 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1344 ; 1345 ; AVX512-LABEL: test_mm_comineq_sd: 1346 ; AVX512: # %bb.0: 1347 ; AVX512-NEXT: vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1] 1348 ; AVX512-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 1349 ; AVX512-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 1350 ; AVX512-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 1351 ; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 1352 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1353 %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) 1354 ret i32 %res 1355 } 1356 declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone 1357 1358 define <2 x double> @test_mm_cvtepi32_pd(<2 x i64> %a0) nounwind { 1359 ; SSE-LABEL: test_mm_cvtepi32_pd: 1360 ; SSE: # %bb.0: 1361 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 # encoding: [0xf3,0x0f,0xe6,0xc0] 1362 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1363 ; 1364 ; AVX1-LABEL: test_mm_cvtepi32_pd: 1365 ; AVX1: # %bb.0: 1366 ; AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xe6,0xc0] 1367 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1368 ; 1369 ; AVX512-LABEL: test_mm_cvtepi32_pd: 1370 ; AVX512: # %bb.0: 1371 ; AVX512-NEXT: vcvtdq2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0xe6,0xc0] 1372 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1373 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 1374 %ext = shufflevector <4 x i32> %arg0, <4 x i32> %arg0, <2 x i32> <i32 0, i32 1> 1375 %res = sitofp <2 x i32> %ext to <2 x double> 1376 ret <2 x double> %res 1377 } 1378 1379 define <4 x float> @test_mm_cvtepi32_ps(<2 x i64> %a0) nounwind { 1380 ; SSE-LABEL: test_mm_cvtepi32_ps: 1381 ; SSE: # %bb.0: 1382 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 # encoding: [0x0f,0x5b,0xc0] 1383 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1384 ; 1385 ; AVX1-LABEL: test_mm_cvtepi32_ps: 1386 ; AVX1: # %bb.0: 1387 ; AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5b,0xc0] 1388 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1389 ; 1390 ; AVX512-LABEL: test_mm_cvtepi32_ps: 1391 ; AVX512: # %bb.0: 1392 ; AVX512-NEXT: vcvtdq2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5b,0xc0] 1393 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1394 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 1395 %res = sitofp <4 x i32> %arg0 to <4 x float> 1396 ret <4 x float> %res 1397 } 1398 1399 define <2 x i64> @test_mm_cvtpd_epi32(<2 x double> %a0) nounwind { 1400 ; SSE-LABEL: test_mm_cvtpd_epi32: 1401 ; SSE: # %bb.0: 1402 ; SSE-NEXT: cvtpd2dq %xmm0, %xmm0 # encoding: [0xf2,0x0f,0xe6,0xc0] 1403 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1404 ; 1405 ; AVX1-LABEL: test_mm_cvtpd_epi32: 1406 ; AVX1: # %bb.0: 1407 ; AVX1-NEXT: vcvtpd2dq %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xe6,0xc0] 1408 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1409 ; 1410 ; AVX512-LABEL: test_mm_cvtpd_epi32: 1411 ; AVX512: # %bb.0: 1412 ; AVX512-NEXT: vcvtpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0] 1413 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1414 %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) 1415 %bc = bitcast <4 x i32> %res to <2 x i64> 1416 ret <2 x i64> %bc 1417 } 1418 declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone 1419 1420 define <4 x float> @test_mm_cvtpd_ps(<2 x double> %a0) nounwind { 1421 ; SSE-LABEL: test_mm_cvtpd_ps: 1422 ; SSE: # %bb.0: 1423 ; SSE-NEXT: cvtpd2ps %xmm0, %xmm0 # encoding: [0x66,0x0f,0x5a,0xc0] 1424 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1425 ; 1426 ; AVX1-LABEL: test_mm_cvtpd_ps: 1427 ; AVX1: # %bb.0: 1428 ; AVX1-NEXT: vcvtpd2ps %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5a,0xc0] 1429 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1430 ; 1431 ; AVX512-LABEL: test_mm_cvtpd_ps: 1432 ; AVX512: # %bb.0: 1433 ; AVX512-NEXT: vcvtpd2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0] 1434 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1435 %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) 1436 ret <4 x float> %res 1437 } 1438 declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone 1439 1440 define <2 x i64> @test_mm_cvtps_epi32(<4 x float> %a0) nounwind { 1441 ; SSE-LABEL: test_mm_cvtps_epi32: 1442 ; SSE: # %bb.0: 1443 ; SSE-NEXT: cvtps2dq %xmm0, %xmm0 # encoding: [0x66,0x0f,0x5b,0xc0] 1444 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1445 ; 1446 ; AVX1-LABEL: test_mm_cvtps_epi32: 1447 ; AVX1: # %bb.0: 1448 ; AVX1-NEXT: vcvtps2dq %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5b,0xc0] 1449 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1450 ; 1451 ; AVX512-LABEL: test_mm_cvtps_epi32: 1452 ; AVX512: # %bb.0: 1453 ; AVX512-NEXT: vcvtps2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5b,0xc0] 1454 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1455 %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) 1456 %bc = bitcast <4 x i32> %res to <2 x i64> 1457 ret <2 x i64> %bc 1458 } 1459 declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone 1460 1461 define <2 x double> @test_mm_cvtps_pd(<4 x float> %a0) nounwind { 1462 ; SSE-LABEL: test_mm_cvtps_pd: 1463 ; SSE: # %bb.0: 1464 ; SSE-NEXT: cvtps2pd %xmm0, %xmm0 # encoding: [0x0f,0x5a,0xc0] 1465 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1466 ; 1467 ; AVX1-LABEL: test_mm_cvtps_pd: 1468 ; AVX1: # %bb.0: 1469 ; AVX1-NEXT: vcvtps2pd %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5a,0xc0] 1470 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1471 ; 1472 ; AVX512-LABEL: test_mm_cvtps_pd: 1473 ; AVX512: # %bb.0: 1474 ; AVX512-NEXT: vcvtps2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xc0] 1475 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1476 %ext = shufflevector <4 x float> %a0, <4 x float> %a0, <2 x i32> <i32 0, i32 1> 1477 %res = fpext <2 x float> %ext to <2 x double> 1478 ret <2 x double> %res 1479 } 1480 1481 define double @test_mm_cvtsd_f64(<2 x double> %a0) nounwind { 1482 ; X86-SSE-LABEL: test_mm_cvtsd_f64: 1483 ; X86-SSE: # %bb.0: 1484 ; X86-SSE-NEXT: pushl %ebp # encoding: [0x55] 1485 ; X86-SSE-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] 1486 ; X86-SSE-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8] 1487 ; X86-SSE-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08] 1488 ; X86-SSE-NEXT: movlps %xmm0, (%esp) # encoding: [0x0f,0x13,0x04,0x24] 1489 ; X86-SSE-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24] 1490 ; X86-SSE-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] 1491 ; X86-SSE-NEXT: popl %ebp # encoding: [0x5d] 1492 ; X86-SSE-NEXT: retl # encoding: [0xc3] 1493 ; 1494 ; X86-AVX1-LABEL: test_mm_cvtsd_f64: 1495 ; X86-AVX1: # %bb.0: 1496 ; X86-AVX1-NEXT: pushl %ebp # encoding: [0x55] 1497 ; X86-AVX1-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] 1498 ; X86-AVX1-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8] 1499 ; X86-AVX1-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08] 1500 ; X86-AVX1-NEXT: vmovlps %xmm0, (%esp) # encoding: [0xc5,0xf8,0x13,0x04,0x24] 1501 ; X86-AVX1-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24] 1502 ; X86-AVX1-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] 1503 ; X86-AVX1-NEXT: popl %ebp # encoding: [0x5d] 1504 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 1505 ; 1506 ; X86-AVX512-LABEL: test_mm_cvtsd_f64: 1507 ; X86-AVX512: # %bb.0: 1508 ; X86-AVX512-NEXT: pushl %ebp # encoding: [0x55] 1509 ; X86-AVX512-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] 1510 ; X86-AVX512-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8] 1511 ; X86-AVX512-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08] 1512 ; X86-AVX512-NEXT: vmovlps %xmm0, (%esp) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x04,0x24] 1513 ; X86-AVX512-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24] 1514 ; X86-AVX512-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] 1515 ; X86-AVX512-NEXT: popl %ebp # encoding: [0x5d] 1516 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 1517 ; 1518 ; X64-LABEL: test_mm_cvtsd_f64: 1519 ; X64: # %bb.0: 1520 ; X64-NEXT: retq # encoding: [0xc3] 1521 %res = extractelement <2 x double> %a0, i32 0 1522 ret double %res 1523 } 1524 1525 define i32 @test_mm_cvtsd_si32(<2 x double> %a0) nounwind { 1526 ; SSE-LABEL: test_mm_cvtsd_si32: 1527 ; SSE: # %bb.0: 1528 ; SSE-NEXT: cvtsd2si %xmm0, %eax # encoding: [0xf2,0x0f,0x2d,0xc0] 1529 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1530 ; 1531 ; AVX1-LABEL: test_mm_cvtsd_si32: 1532 ; AVX1: # %bb.0: 1533 ; AVX1-NEXT: vcvtsd2si %xmm0, %eax # encoding: [0xc5,0xfb,0x2d,0xc0] 1534 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1535 ; 1536 ; AVX512-LABEL: test_mm_cvtsd_si32: 1537 ; AVX512: # %bb.0: 1538 ; AVX512-NEXT: vcvtsd2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2d,0xc0] 1539 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1540 %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) 1541 ret i32 %res 1542 } 1543 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone 1544 1545 define <4 x float> @test_mm_cvtsd_ss(<4 x float> %a0, <2 x double> %a1) { 1546 ; SSE-LABEL: test_mm_cvtsd_ss: 1547 ; SSE: # %bb.0: 1548 ; SSE-NEXT: cvtsd2ss %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5a,0xc1] 1549 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1550 ; 1551 ; AVX-LABEL: test_mm_cvtsd_ss: 1552 ; AVX: # %bb.0: 1553 ; AVX-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0xc1] 1554 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1555 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) 1556 ret <4 x float> %res 1557 } 1558 declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone 1559 1560 define <4 x float> @test_mm_cvtsd_ss_load(<4 x float> %a0, <2 x double>* %p1) { 1561 ; X86-SSE-LABEL: test_mm_cvtsd_ss_load: 1562 ; X86-SSE: # %bb.0: 1563 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1564 ; X86-SSE-NEXT: cvtsd2ss (%eax), %xmm0 # encoding: [0xf2,0x0f,0x5a,0x00] 1565 ; X86-SSE-NEXT: retl # encoding: [0xc3] 1566 ; 1567 ; X86-AVX-LABEL: test_mm_cvtsd_ss_load: 1568 ; X86-AVX: # %bb.0: 1569 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1570 ; X86-AVX-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0x00] 1571 ; X86-AVX-NEXT: retl # encoding: [0xc3] 1572 ; 1573 ; X64-SSE-LABEL: test_mm_cvtsd_ss_load: 1574 ; X64-SSE: # %bb.0: 1575 ; X64-SSE-NEXT: cvtsd2ss (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x5a,0x07] 1576 ; X64-SSE-NEXT: retq # encoding: [0xc3] 1577 ; 1578 ; X64-AVX-LABEL: test_mm_cvtsd_ss_load: 1579 ; X64-AVX: # %bb.0: 1580 ; X64-AVX-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0x07] 1581 ; X64-AVX-NEXT: retq # encoding: [0xc3] 1582 %a1 = load <2 x double>, <2 x double>* %p1 1583 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) 1584 ret <4 x float> %res 1585 } 1586 1587 define i32 @test_mm_cvtsi128_si32(<2 x i64> %a0) nounwind { 1588 ; SSE-LABEL: test_mm_cvtsi128_si32: 1589 ; SSE: # %bb.0: 1590 ; SSE-NEXT: movd %xmm0, %eax # encoding: [0x66,0x0f,0x7e,0xc0] 1591 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1592 ; 1593 ; AVX1-LABEL: test_mm_cvtsi128_si32: 1594 ; AVX1: # %bb.0: 1595 ; AVX1-NEXT: vmovd %xmm0, %eax # encoding: [0xc5,0xf9,0x7e,0xc0] 1596 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1597 ; 1598 ; AVX512-LABEL: test_mm_cvtsi128_si32: 1599 ; AVX512: # %bb.0: 1600 ; AVX512-NEXT: vmovd %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc0] 1601 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1602 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 1603 %res = extractelement <4 x i32> %arg0, i32 0 1604 ret i32 %res 1605 } 1606 1607 define <2 x double> @test_mm_cvtsi32_sd(<2 x double> %a0, i32 %a1) nounwind { 1608 ; X86-SSE-LABEL: test_mm_cvtsi32_sd: 1609 ; X86-SSE: # %bb.0: 1610 ; X86-SSE-NEXT: cvtsi2sdl {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x2a,0x44,0x24,0x04] 1611 ; X86-SSE-NEXT: retl # encoding: [0xc3] 1612 ; 1613 ; X86-AVX1-LABEL: test_mm_cvtsi32_sd: 1614 ; X86-AVX1: # %bb.0: 1615 ; X86-AVX1-NEXT: vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x2a,0x44,0x24,0x04] 1616 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 1617 ; 1618 ; X86-AVX512-LABEL: test_mm_cvtsi32_sd: 1619 ; X86-AVX512: # %bb.0: 1620 ; X86-AVX512-NEXT: vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0x44,0x24,0x04] 1621 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 1622 ; 1623 ; X64-SSE-LABEL: test_mm_cvtsi32_sd: 1624 ; X64-SSE: # %bb.0: 1625 ; X64-SSE-NEXT: cvtsi2sdl %edi, %xmm0 # encoding: [0xf2,0x0f,0x2a,0xc7] 1626 ; X64-SSE-NEXT: retq # encoding: [0xc3] 1627 ; 1628 ; X64-AVX1-LABEL: test_mm_cvtsi32_sd: 1629 ; X64-AVX1: # %bb.0: 1630 ; X64-AVX1-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x2a,0xc7] 1631 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 1632 ; 1633 ; X64-AVX512-LABEL: test_mm_cvtsi32_sd: 1634 ; X64-AVX512: # %bb.0: 1635 ; X64-AVX512-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0xc7] 1636 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 1637 %cvt = sitofp i32 %a1 to double 1638 %res = insertelement <2 x double> %a0, double %cvt, i32 0 1639 ret <2 x double> %res 1640 } 1641 1642 define <2 x i64> @test_mm_cvtsi32_si128(i32 %a0) nounwind { 1643 ; X86-SSE-LABEL: test_mm_cvtsi32_si128: 1644 ; X86-SSE: # %bb.0: 1645 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04] 1646 ; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 1647 ; X86-SSE-NEXT: retl # encoding: [0xc3] 1648 ; 1649 ; X86-AVX1-LABEL: test_mm_cvtsi32_si128: 1650 ; X86-AVX1: # %bb.0: 1651 ; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 1652 ; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 1653 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 1654 ; 1655 ; X86-AVX512-LABEL: test_mm_cvtsi32_si128: 1656 ; X86-AVX512: # %bb.0: 1657 ; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 1658 ; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 1659 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 1660 ; 1661 ; X64-SSE-LABEL: test_mm_cvtsi32_si128: 1662 ; X64-SSE: # %bb.0: 1663 ; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] 1664 ; X64-SSE-NEXT: retq # encoding: [0xc3] 1665 ; 1666 ; X64-AVX1-LABEL: test_mm_cvtsi32_si128: 1667 ; X64-AVX1: # %bb.0: 1668 ; X64-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] 1669 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 1670 ; 1671 ; X64-AVX512-LABEL: test_mm_cvtsi32_si128: 1672 ; X64-AVX512: # %bb.0: 1673 ; X64-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] 1674 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 1675 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0 1676 %res1 = insertelement <4 x i32> %res0, i32 0, i32 1 1677 %res2 = insertelement <4 x i32> %res1, i32 0, i32 2 1678 %res3 = insertelement <4 x i32> %res2, i32 0, i32 3 1679 %res = bitcast <4 x i32> %res3 to <2 x i64> 1680 ret <2 x i64> %res 1681 } 1682 1683 define <2 x double> @test_mm_cvtss_sd(<2 x double> %a0, <4 x float> %a1) nounwind { 1684 ; SSE-LABEL: test_mm_cvtss_sd: 1685 ; SSE: # %bb.0: 1686 ; SSE-NEXT: cvtss2sd %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5a,0xc1] 1687 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1688 ; 1689 ; AVX1-LABEL: test_mm_cvtss_sd: 1690 ; AVX1: # %bb.0: 1691 ; AVX1-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5a,0xc1] 1692 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1693 ; 1694 ; AVX512-LABEL: test_mm_cvtss_sd: 1695 ; AVX512: # %bb.0: 1696 ; AVX512-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5a,0xc1] 1697 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1698 %ext = extractelement <4 x float> %a1, i32 0 1699 %cvt = fpext float %ext to double 1700 %res = insertelement <2 x double> %a0, double %cvt, i32 0 1701 ret <2 x double> %res 1702 } 1703 1704 define <2 x i64> @test_mm_cvttpd_epi32(<2 x double> %a0) nounwind { 1705 ; SSE-LABEL: test_mm_cvttpd_epi32: 1706 ; SSE: # %bb.0: 1707 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 # encoding: [0x66,0x0f,0xe6,0xc0] 1708 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1709 ; 1710 ; AVX1-LABEL: test_mm_cvttpd_epi32: 1711 ; AVX1: # %bb.0: 1712 ; AVX1-NEXT: vcvttpd2dq %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe6,0xc0] 1713 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1714 ; 1715 ; AVX512-LABEL: test_mm_cvttpd_epi32: 1716 ; AVX512: # %bb.0: 1717 ; AVX512-NEXT: vcvttpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0] 1718 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1719 %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) 1720 %bc = bitcast <4 x i32> %res to <2 x i64> 1721 ret <2 x i64> %bc 1722 } 1723 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone 1724 1725 define <2 x i64> @test_mm_cvttps_epi32(<4 x float> %a0) nounwind { 1726 ; SSE-LABEL: test_mm_cvttps_epi32: 1727 ; SSE: # %bb.0: 1728 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x5b,0xc0] 1729 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1730 ; 1731 ; AVX1-LABEL: test_mm_cvttps_epi32: 1732 ; AVX1: # %bb.0: 1733 ; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5b,0xc0] 1734 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1735 ; 1736 ; AVX512-LABEL: test_mm_cvttps_epi32: 1737 ; AVX512: # %bb.0: 1738 ; AVX512-NEXT: vcvttps2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xc0] 1739 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1740 %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) 1741 %bc = bitcast <4 x i32> %res to <2 x i64> 1742 ret <2 x i64> %bc 1743 } 1744 declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone 1745 1746 define i32 @test_mm_cvttsd_si32(<2 x double> %a0) nounwind { 1747 ; SSE-LABEL: test_mm_cvttsd_si32: 1748 ; SSE: # %bb.0: 1749 ; SSE-NEXT: cvttsd2si %xmm0, %eax # encoding: [0xf2,0x0f,0x2c,0xc0] 1750 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1751 ; 1752 ; AVX1-LABEL: test_mm_cvttsd_si32: 1753 ; AVX1: # %bb.0: 1754 ; AVX1-NEXT: vcvttsd2si %xmm0, %eax # encoding: [0xc5,0xfb,0x2c,0xc0] 1755 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1756 ; 1757 ; AVX512-LABEL: test_mm_cvttsd_si32: 1758 ; AVX512: # %bb.0: 1759 ; AVX512-NEXT: vcvttsd2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2c,0xc0] 1760 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1761 %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) 1762 ret i32 %res 1763 } 1764 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone 1765 1766 define <2 x double> @test_mm_div_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 1767 ; SSE-LABEL: test_mm_div_pd: 1768 ; SSE: # %bb.0: 1769 ; SSE-NEXT: divpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x5e,0xc1] 1770 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1771 ; 1772 ; AVX1-LABEL: test_mm_div_pd: 1773 ; AVX1: # %bb.0: 1774 ; AVX1-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5e,0xc1] 1775 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1776 ; 1777 ; AVX512-LABEL: test_mm_div_pd: 1778 ; AVX512: # %bb.0: 1779 ; AVX512-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5e,0xc1] 1780 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1781 %res = fdiv <2 x double> %a0, %a1 1782 ret <2 x double> %res 1783 } 1784 1785 define <2 x double> @test_mm_div_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1786 ; SSE-LABEL: test_mm_div_sd: 1787 ; SSE: # %bb.0: 1788 ; SSE-NEXT: divsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5e,0xc1] 1789 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1790 ; 1791 ; AVX1-LABEL: test_mm_div_sd: 1792 ; AVX1: # %bb.0: 1793 ; AVX1-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5e,0xc1] 1794 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1795 ; 1796 ; AVX512-LABEL: test_mm_div_sd: 1797 ; AVX512: # %bb.0: 1798 ; AVX512-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5e,0xc1] 1799 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1800 %ext0 = extractelement <2 x double> %a0, i32 0 1801 %ext1 = extractelement <2 x double> %a1, i32 0 1802 %fdiv = fdiv double %ext0, %ext1 1803 %res = insertelement <2 x double> %a0, double %fdiv, i32 0 1804 ret <2 x double> %res 1805 } 1806 1807 define i32 @test_mm_extract_epi16(<2 x i64> %a0) nounwind { 1808 ; SSE-LABEL: test_mm_extract_epi16: 1809 ; SSE: # %bb.0: 1810 ; SSE-NEXT: pextrw $1, %xmm0, %eax # encoding: [0x66,0x0f,0xc5,0xc0,0x01] 1811 ; SSE-NEXT: movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0] 1812 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1813 ; 1814 ; AVX1-LABEL: test_mm_extract_epi16: 1815 ; AVX1: # %bb.0: 1816 ; AVX1-NEXT: vpextrw $1, %xmm0, %eax # encoding: [0xc5,0xf9,0xc5,0xc0,0x01] 1817 ; AVX1-NEXT: movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0] 1818 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1819 ; 1820 ; AVX512-LABEL: test_mm_extract_epi16: 1821 ; AVX512: # %bb.0: 1822 ; AVX512-NEXT: vpextrw $1, %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc5,0xc0,0x01] 1823 ; AVX512-NEXT: movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0] 1824 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1825 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 1826 %ext = extractelement <8 x i16> %arg0, i32 1 1827 %res = zext i16 %ext to i32 1828 ret i32 %res 1829 } 1830 1831 define <2 x i64> @test_mm_insert_epi16(<2 x i64> %a0, i16 %a1) nounwind { 1832 ; X86-SSE-LABEL: test_mm_insert_epi16: 1833 ; X86-SSE: # %bb.0: 1834 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 1835 ; X86-SSE-NEXT: pinsrw $1, %eax, %xmm0 # encoding: [0x66,0x0f,0xc4,0xc0,0x01] 1836 ; X86-SSE-NEXT: retl # encoding: [0xc3] 1837 ; 1838 ; X86-AVX1-LABEL: test_mm_insert_epi16: 1839 ; X86-AVX1: # %bb.0: 1840 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 1841 ; X86-AVX1-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 1842 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 1843 ; 1844 ; X86-AVX512-LABEL: test_mm_insert_epi16: 1845 ; X86-AVX512: # %bb.0: 1846 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 1847 ; X86-AVX512-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 1848 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 1849 ; 1850 ; X64-SSE-LABEL: test_mm_insert_epi16: 1851 ; X64-SSE: # %bb.0: 1852 ; X64-SSE-NEXT: pinsrw $1, %edi, %xmm0 # encoding: [0x66,0x0f,0xc4,0xc7,0x01] 1853 ; X64-SSE-NEXT: retq # encoding: [0xc3] 1854 ; 1855 ; X64-AVX1-LABEL: test_mm_insert_epi16: 1856 ; X64-AVX1: # %bb.0: 1857 ; X64-AVX1-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc7,0x01] 1858 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 1859 ; 1860 ; X64-AVX512-LABEL: test_mm_insert_epi16: 1861 ; X64-AVX512: # %bb.0: 1862 ; X64-AVX512-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x01] 1863 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 1864 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 1865 %res = insertelement <8 x i16> %arg0, i16 %a1,i32 1 1866 %bc = bitcast <8 x i16> %res to <2 x i64> 1867 ret <2 x i64> %bc 1868 } 1869 1870 define void @test_mm_lfence() nounwind { 1871 ; CHECK-LABEL: test_mm_lfence: 1872 ; CHECK: # %bb.0: 1873 ; CHECK-NEXT: lfence # encoding: [0x0f,0xae,0xe8] 1874 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1875 call void @llvm.x86.sse2.lfence() 1876 ret void 1877 } 1878 declare void @llvm.x86.sse2.lfence() nounwind readnone 1879 1880 define <2 x double> @test_mm_load_pd(double* %a0) nounwind { 1881 ; X86-SSE-LABEL: test_mm_load_pd: 1882 ; X86-SSE: # %bb.0: 1883 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1884 ; X86-SSE-NEXT: movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00] 1885 ; X86-SSE-NEXT: retl # encoding: [0xc3] 1886 ; 1887 ; X86-AVX1-LABEL: test_mm_load_pd: 1888 ; X86-AVX1: # %bb.0: 1889 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1890 ; X86-AVX1-NEXT: vmovaps (%eax), %xmm0 # encoding: [0xc5,0xf8,0x28,0x00] 1891 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 1892 ; 1893 ; X86-AVX512-LABEL: test_mm_load_pd: 1894 ; X86-AVX512: # %bb.0: 1895 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1896 ; X86-AVX512-NEXT: vmovaps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00] 1897 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 1898 ; 1899 ; X64-SSE-LABEL: test_mm_load_pd: 1900 ; X64-SSE: # %bb.0: 1901 ; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07] 1902 ; X64-SSE-NEXT: retq # encoding: [0xc3] 1903 ; 1904 ; X64-AVX1-LABEL: test_mm_load_pd: 1905 ; X64-AVX1: # %bb.0: 1906 ; X64-AVX1-NEXT: vmovaps (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x07] 1907 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 1908 ; 1909 ; X64-AVX512-LABEL: test_mm_load_pd: 1910 ; X64-AVX512: # %bb.0: 1911 ; X64-AVX512-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] 1912 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 1913 %arg0 = bitcast double* %a0 to <2 x double>* 1914 %res = load <2 x double>, <2 x double>* %arg0, align 16 1915 ret <2 x double> %res 1916 } 1917 1918 define <2 x double> @test_mm_load_sd(double* %a0) nounwind { 1919 ; X86-SSE-LABEL: test_mm_load_sd: 1920 ; X86-SSE: # %bb.0: 1921 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1922 ; X86-SSE-NEXT: movsd (%eax), %xmm0 # encoding: [0xf2,0x0f,0x10,0x00] 1923 ; X86-SSE-NEXT: # xmm0 = mem[0],zero 1924 ; X86-SSE-NEXT: retl # encoding: [0xc3] 1925 ; 1926 ; X86-AVX1-LABEL: test_mm_load_sd: 1927 ; X86-AVX1: # %bb.0: 1928 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1929 ; X86-AVX1-NEXT: vmovsd (%eax), %xmm0 # encoding: [0xc5,0xfb,0x10,0x00] 1930 ; X86-AVX1-NEXT: # xmm0 = mem[0],zero 1931 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 1932 ; 1933 ; X86-AVX512-LABEL: test_mm_load_sd: 1934 ; X86-AVX512: # %bb.0: 1935 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1936 ; X86-AVX512-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] 1937 ; X86-AVX512-NEXT: # xmm0 = mem[0],zero 1938 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 1939 ; 1940 ; X64-SSE-LABEL: test_mm_load_sd: 1941 ; X64-SSE: # %bb.0: 1942 ; X64-SSE-NEXT: movsd (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x10,0x07] 1943 ; X64-SSE-NEXT: # xmm0 = mem[0],zero 1944 ; X64-SSE-NEXT: retq # encoding: [0xc3] 1945 ; 1946 ; X64-AVX1-LABEL: test_mm_load_sd: 1947 ; X64-AVX1: # %bb.0: 1948 ; X64-AVX1-NEXT: vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07] 1949 ; X64-AVX1-NEXT: # xmm0 = mem[0],zero 1950 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 1951 ; 1952 ; X64-AVX512-LABEL: test_mm_load_sd: 1953 ; X64-AVX512: # %bb.0: 1954 ; X64-AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] 1955 ; X64-AVX512-NEXT: # xmm0 = mem[0],zero 1956 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 1957 %ld = load double, double* %a0, align 1 1958 %res0 = insertelement <2 x double> undef, double %ld, i32 0 1959 %res1 = insertelement <2 x double> %res0, double 0.0, i32 1 1960 ret <2 x double> %res1 1961 } 1962 1963 define <2 x i64> @test_mm_load_si128(<2 x i64>* %a0) nounwind { 1964 ; X86-SSE-LABEL: test_mm_load_si128: 1965 ; X86-SSE: # %bb.0: 1966 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1967 ; X86-SSE-NEXT: movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00] 1968 ; X86-SSE-NEXT: retl # encoding: [0xc3] 1969 ; 1970 ; X86-AVX1-LABEL: test_mm_load_si128: 1971 ; X86-AVX1: # %bb.0: 1972 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1973 ; X86-AVX1-NEXT: vmovaps (%eax), %xmm0 # encoding: [0xc5,0xf8,0x28,0x00] 1974 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 1975 ; 1976 ; X86-AVX512-LABEL: test_mm_load_si128: 1977 ; X86-AVX512: # %bb.0: 1978 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1979 ; X86-AVX512-NEXT: vmovaps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00] 1980 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 1981 ; 1982 ; X64-SSE-LABEL: test_mm_load_si128: 1983 ; X64-SSE: # %bb.0: 1984 ; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07] 1985 ; X64-SSE-NEXT: retq # encoding: [0xc3] 1986 ; 1987 ; X64-AVX1-LABEL: test_mm_load_si128: 1988 ; X64-AVX1: # %bb.0: 1989 ; X64-AVX1-NEXT: vmovaps (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x07] 1990 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 1991 ; 1992 ; X64-AVX512-LABEL: test_mm_load_si128: 1993 ; X64-AVX512: # %bb.0: 1994 ; X64-AVX512-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] 1995 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 1996 %res = load <2 x i64>, <2 x i64>* %a0, align 16 1997 ret <2 x i64> %res 1998 } 1999 2000 define <2 x double> @test_mm_load1_pd(double* %a0) nounwind { 2001 ; X86-SSE-LABEL: test_mm_load1_pd: 2002 ; X86-SSE: # %bb.0: 2003 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2004 ; X86-SSE-NEXT: movsd (%eax), %xmm0 # encoding: [0xf2,0x0f,0x10,0x00] 2005 ; X86-SSE-NEXT: # xmm0 = mem[0],zero 2006 ; X86-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] 2007 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0] 2008 ; X86-SSE-NEXT: retl # encoding: [0xc3] 2009 ; 2010 ; X86-AVX1-LABEL: test_mm_load1_pd: 2011 ; X86-AVX1: # %bb.0: 2012 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2013 ; X86-AVX1-NEXT: vmovddup (%eax), %xmm0 # encoding: [0xc5,0xfb,0x12,0x00] 2014 ; X86-AVX1-NEXT: # xmm0 = mem[0,0] 2015 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 2016 ; 2017 ; X86-AVX512-LABEL: test_mm_load1_pd: 2018 ; X86-AVX512: # %bb.0: 2019 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2020 ; X86-AVX512-NEXT: vmovddup (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x00] 2021 ; X86-AVX512-NEXT: # xmm0 = mem[0,0] 2022 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 2023 ; 2024 ; X64-SSE-LABEL: test_mm_load1_pd: 2025 ; X64-SSE: # %bb.0: 2026 ; X64-SSE-NEXT: movsd (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x10,0x07] 2027 ; X64-SSE-NEXT: # xmm0 = mem[0],zero 2028 ; X64-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] 2029 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0] 2030 ; X64-SSE-NEXT: retq # encoding: [0xc3] 2031 ; 2032 ; X64-AVX1-LABEL: test_mm_load1_pd: 2033 ; X64-AVX1: # %bb.0: 2034 ; X64-AVX1-NEXT: vmovddup (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x12,0x07] 2035 ; X64-AVX1-NEXT: # xmm0 = mem[0,0] 2036 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 2037 ; 2038 ; X64-AVX512-LABEL: test_mm_load1_pd: 2039 ; X64-AVX512: # %bb.0: 2040 ; X64-AVX512-NEXT: vmovddup (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x07] 2041 ; X64-AVX512-NEXT: # xmm0 = mem[0,0] 2042 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 2043 %ld = load double, double* %a0, align 8 2044 %res0 = insertelement <2 x double> undef, double %ld, i32 0 2045 %res1 = insertelement <2 x double> %res0, double %ld, i32 1 2046 ret <2 x double> %res1 2047 } 2048 2049 define <2 x double> @test_mm_loadh_pd(<2 x double> %a0, double* %a1) nounwind { 2050 ; X86-SSE-LABEL: test_mm_loadh_pd: 2051 ; X86-SSE: # %bb.0: 2052 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2053 ; X86-SSE-NEXT: movhpd (%eax), %xmm0 # encoding: [0x66,0x0f,0x16,0x00] 2054 ; X86-SSE-NEXT: # xmm0 = xmm0[0],mem[0] 2055 ; X86-SSE-NEXT: retl # encoding: [0xc3] 2056 ; 2057 ; X86-AVX1-LABEL: test_mm_loadh_pd: 2058 ; X86-AVX1: # %bb.0: 2059 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2060 ; X86-AVX1-NEXT: vmovhpd (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x16,0x00] 2061 ; X86-AVX1-NEXT: # xmm0 = xmm0[0],mem[0] 2062 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 2063 ; 2064 ; X86-AVX512-LABEL: test_mm_loadh_pd: 2065 ; X86-AVX512: # %bb.0: 2066 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2067 ; X86-AVX512-NEXT: vmovhpd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x16,0x00] 2068 ; X86-AVX512-NEXT: # xmm0 = xmm0[0],mem[0] 2069 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 2070 ; 2071 ; X64-SSE-LABEL: test_mm_loadh_pd: 2072 ; X64-SSE: # %bb.0: 2073 ; X64-SSE-NEXT: movhpd (%rdi), %xmm0 # encoding: [0x66,0x0f,0x16,0x07] 2074 ; X64-SSE-NEXT: # xmm0 = xmm0[0],mem[0] 2075 ; X64-SSE-NEXT: retq # encoding: [0xc3] 2076 ; 2077 ; X64-AVX1-LABEL: test_mm_loadh_pd: 2078 ; X64-AVX1: # %bb.0: 2079 ; X64-AVX1-NEXT: vmovhpd (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x16,0x07] 2080 ; X64-AVX1-NEXT: # xmm0 = xmm0[0],mem[0] 2081 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 2082 ; 2083 ; X64-AVX512-LABEL: test_mm_loadh_pd: 2084 ; X64-AVX512: # %bb.0: 2085 ; X64-AVX512-NEXT: vmovhpd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x16,0x07] 2086 ; X64-AVX512-NEXT: # xmm0 = xmm0[0],mem[0] 2087 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 2088 %ld = load double, double* %a1, align 8 2089 %res = insertelement <2 x double> %a0, double %ld, i32 1 2090 ret <2 x double> %res 2091 } 2092 2093 define <2 x i64> @test_mm_loadl_epi64(<2 x i64> %a0, <2 x i64>* %a1) nounwind { 2094 ; X86-SSE-LABEL: test_mm_loadl_epi64: 2095 ; X86-SSE: # %bb.0: 2096 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2097 ; X86-SSE-NEXT: movsd (%eax), %xmm0 # encoding: [0xf2,0x0f,0x10,0x00] 2098 ; X86-SSE-NEXT: # xmm0 = mem[0],zero 2099 ; X86-SSE-NEXT: retl # encoding: [0xc3] 2100 ; 2101 ; X86-AVX1-LABEL: test_mm_loadl_epi64: 2102 ; X86-AVX1: # %bb.0: 2103 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2104 ; X86-AVX1-NEXT: vmovsd (%eax), %xmm0 # encoding: [0xc5,0xfb,0x10,0x00] 2105 ; X86-AVX1-NEXT: # xmm0 = mem[0],zero 2106 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 2107 ; 2108 ; X86-AVX512-LABEL: test_mm_loadl_epi64: 2109 ; X86-AVX512: # %bb.0: 2110 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2111 ; X86-AVX512-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] 2112 ; X86-AVX512-NEXT: # xmm0 = mem[0],zero 2113 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 2114 ; 2115 ; X64-SSE-LABEL: test_mm_loadl_epi64: 2116 ; X64-SSE: # %bb.0: 2117 ; X64-SSE-NEXT: movsd (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x10,0x07] 2118 ; X64-SSE-NEXT: # xmm0 = mem[0],zero 2119 ; X64-SSE-NEXT: retq # encoding: [0xc3] 2120 ; 2121 ; X64-AVX1-LABEL: test_mm_loadl_epi64: 2122 ; X64-AVX1: # %bb.0: 2123 ; X64-AVX1-NEXT: vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07] 2124 ; X64-AVX1-NEXT: # xmm0 = mem[0],zero 2125 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 2126 ; 2127 ; X64-AVX512-LABEL: test_mm_loadl_epi64: 2128 ; X64-AVX512: # %bb.0: 2129 ; X64-AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] 2130 ; X64-AVX512-NEXT: # xmm0 = mem[0],zero 2131 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 2132 %bc = bitcast <2 x i64>* %a1 to i64* 2133 %ld = load i64, i64* %bc, align 1 2134 %res0 = insertelement <2 x i64> undef, i64 %ld, i32 0 2135 %res1 = insertelement <2 x i64> %res0, i64 0, i32 1 2136 ret <2 x i64> %res1 2137 } 2138 2139 define <2 x double> @test_mm_loadl_pd(<2 x double> %a0, double* %a1) nounwind { 2140 ; X86-SSE-LABEL: test_mm_loadl_pd: 2141 ; X86-SSE: # %bb.0: 2142 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2143 ; X86-SSE-NEXT: movlpd (%eax), %xmm0 # encoding: [0x66,0x0f,0x12,0x00] 2144 ; X86-SSE-NEXT: # xmm0 = mem[0],xmm0[1] 2145 ; X86-SSE-NEXT: retl # encoding: [0xc3] 2146 ; 2147 ; X86-AVX1-LABEL: test_mm_loadl_pd: 2148 ; X86-AVX1: # %bb.0: 2149 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2150 ; X86-AVX1-NEXT: vmovlpd (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x12,0x00] 2151 ; X86-AVX1-NEXT: # xmm0 = mem[0],xmm0[1] 2152 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 2153 ; 2154 ; X86-AVX512-LABEL: test_mm_loadl_pd: 2155 ; X86-AVX512: # %bb.0: 2156 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2157 ; X86-AVX512-NEXT: vmovlpd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x12,0x00] 2158 ; X86-AVX512-NEXT: # xmm0 = mem[0],xmm0[1] 2159 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 2160 ; 2161 ; X64-SSE-LABEL: test_mm_loadl_pd: 2162 ; X64-SSE: # %bb.0: 2163 ; X64-SSE-NEXT: movlpd (%rdi), %xmm0 # encoding: [0x66,0x0f,0x12,0x07] 2164 ; X64-SSE-NEXT: # xmm0 = mem[0],xmm0[1] 2165 ; X64-SSE-NEXT: retq # encoding: [0xc3] 2166 ; 2167 ; X64-AVX1-LABEL: test_mm_loadl_pd: 2168 ; X64-AVX1: # %bb.0: 2169 ; X64-AVX1-NEXT: vmovlpd (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x12,0x07] 2170 ; X64-AVX1-NEXT: # xmm0 = mem[0],xmm0[1] 2171 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 2172 ; 2173 ; X64-AVX512-LABEL: test_mm_loadl_pd: 2174 ; X64-AVX512: # %bb.0: 2175 ; X64-AVX512-NEXT: vmovlpd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x12,0x07] 2176 ; X64-AVX512-NEXT: # xmm0 = mem[0],xmm0[1] 2177 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 2178 %ld = load double, double* %a1, align 8 2179 %res = insertelement <2 x double> %a0, double %ld, i32 0 2180 ret <2 x double> %res 2181 } 2182 2183 define <2 x double> @test_mm_loadr_pd(double* %a0) nounwind { 2184 ; X86-SSE-LABEL: test_mm_loadr_pd: 2185 ; X86-SSE: # %bb.0: 2186 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2187 ; X86-SSE-NEXT: movapd (%eax), %xmm0 # encoding: [0x66,0x0f,0x28,0x00] 2188 ; X86-SSE-NEXT: shufpd $1, %xmm0, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc0,0x01] 2189 ; X86-SSE-NEXT: # xmm0 = xmm0[1,0] 2190 ; X86-SSE-NEXT: retl # encoding: [0xc3] 2191 ; 2192 ; X86-AVX1-LABEL: test_mm_loadr_pd: 2193 ; X86-AVX1: # %bb.0: 2194 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2195 ; X86-AVX1-NEXT: vpermilpd $1, (%eax), %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0x00,0x01] 2196 ; X86-AVX1-NEXT: # xmm0 = mem[1,0] 2197 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 2198 ; 2199 ; X86-AVX512-LABEL: test_mm_loadr_pd: 2200 ; X86-AVX512: # %bb.0: 2201 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2202 ; X86-AVX512-NEXT: vpermilpd $1, (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0x00,0x01] 2203 ; X86-AVX512-NEXT: # xmm0 = mem[1,0] 2204 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 2205 ; 2206 ; X64-SSE-LABEL: test_mm_loadr_pd: 2207 ; X64-SSE: # %bb.0: 2208 ; X64-SSE-NEXT: movapd (%rdi), %xmm0 # encoding: [0x66,0x0f,0x28,0x07] 2209 ; X64-SSE-NEXT: shufpd $1, %xmm0, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc0,0x01] 2210 ; X64-SSE-NEXT: # xmm0 = xmm0[1,0] 2211 ; X64-SSE-NEXT: retq # encoding: [0xc3] 2212 ; 2213 ; X64-AVX1-LABEL: test_mm_loadr_pd: 2214 ; X64-AVX1: # %bb.0: 2215 ; X64-AVX1-NEXT: vpermilpd $1, (%rdi), %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0x07,0x01] 2216 ; X64-AVX1-NEXT: # xmm0 = mem[1,0] 2217 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 2218 ; 2219 ; X64-AVX512-LABEL: test_mm_loadr_pd: 2220 ; X64-AVX512: # %bb.0: 2221 ; X64-AVX512-NEXT: vpermilpd $1, (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0x07,0x01] 2222 ; X64-AVX512-NEXT: # xmm0 = mem[1,0] 2223 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 2224 %arg0 = bitcast double* %a0 to <2 x double>* 2225 %ld = load <2 x double>, <2 x double>* %arg0, align 16 2226 %res = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 0> 2227 ret <2 x double> %res 2228 } 2229 2230 define <2 x double> @test_mm_loadu_pd(double* %a0) nounwind { 2231 ; X86-SSE-LABEL: test_mm_loadu_pd: 2232 ; X86-SSE: # %bb.0: 2233 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2234 ; X86-SSE-NEXT: movups (%eax), %xmm0 # encoding: [0x0f,0x10,0x00] 2235 ; X86-SSE-NEXT: retl # encoding: [0xc3] 2236 ; 2237 ; X86-AVX1-LABEL: test_mm_loadu_pd: 2238 ; X86-AVX1: # %bb.0: 2239 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2240 ; X86-AVX1-NEXT: vmovups (%eax), %xmm0 # encoding: [0xc5,0xf8,0x10,0x00] 2241 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 2242 ; 2243 ; X86-AVX512-LABEL: test_mm_loadu_pd: 2244 ; X86-AVX512: # %bb.0: 2245 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2246 ; X86-AVX512-NEXT: vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00] 2247 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 2248 ; 2249 ; X64-SSE-LABEL: test_mm_loadu_pd: 2250 ; X64-SSE: # %bb.0: 2251 ; X64-SSE-NEXT: movups (%rdi), %xmm0 # encoding: [0x0f,0x10,0x07] 2252 ; X64-SSE-NEXT: retq # encoding: [0xc3] 2253 ; 2254 ; X64-AVX1-LABEL: test_mm_loadu_pd: 2255 ; X64-AVX1: # %bb.0: 2256 ; X64-AVX1-NEXT: vmovups (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x10,0x07] 2257 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 2258 ; 2259 ; X64-AVX512-LABEL: test_mm_loadu_pd: 2260 ; X64-AVX512: # %bb.0: 2261 ; X64-AVX512-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] 2262 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 2263 %arg0 = bitcast double* %a0 to <2 x double>* 2264 %res = load <2 x double>, <2 x double>* %arg0, align 1 2265 ret <2 x double> %res 2266 } 2267 2268 define <2 x i64> @test_mm_loadu_si128(<2 x i64>* %a0) nounwind { 2269 ; X86-SSE-LABEL: test_mm_loadu_si128: 2270 ; X86-SSE: # %bb.0: 2271 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2272 ; X86-SSE-NEXT: movups (%eax), %xmm0 # encoding: [0x0f,0x10,0x00] 2273 ; X86-SSE-NEXT: retl # encoding: [0xc3] 2274 ; 2275 ; X86-AVX1-LABEL: test_mm_loadu_si128: 2276 ; X86-AVX1: # %bb.0: 2277 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2278 ; X86-AVX1-NEXT: vmovups (%eax), %xmm0 # encoding: [0xc5,0xf8,0x10,0x00] 2279 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 2280 ; 2281 ; X86-AVX512-LABEL: test_mm_loadu_si128: 2282 ; X86-AVX512: # %bb.0: 2283 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2284 ; X86-AVX512-NEXT: vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00] 2285 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 2286 ; 2287 ; X64-SSE-LABEL: test_mm_loadu_si128: 2288 ; X64-SSE: # %bb.0: 2289 ; X64-SSE-NEXT: movups (%rdi), %xmm0 # encoding: [0x0f,0x10,0x07] 2290 ; X64-SSE-NEXT: retq # encoding: [0xc3] 2291 ; 2292 ; X64-AVX1-LABEL: test_mm_loadu_si128: 2293 ; X64-AVX1: # %bb.0: 2294 ; X64-AVX1-NEXT: vmovups (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x10,0x07] 2295 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 2296 ; 2297 ; X64-AVX512-LABEL: test_mm_loadu_si128: 2298 ; X64-AVX512: # %bb.0: 2299 ; X64-AVX512-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] 2300 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 2301 %res = load <2 x i64>, <2 x i64>* %a0, align 1 2302 ret <2 x i64> %res 2303 } 2304 2305 define <2 x i64> @test_mm_madd_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 2306 ; SSE-LABEL: test_mm_madd_epi16: 2307 ; SSE: # %bb.0: 2308 ; SSE-NEXT: pmaddwd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf5,0xc1] 2309 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2310 ; 2311 ; AVX1-LABEL: test_mm_madd_epi16: 2312 ; AVX1: # %bb.0: 2313 ; AVX1-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf5,0xc1] 2314 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2315 ; 2316 ; AVX512-LABEL: test_mm_madd_epi16: 2317 ; AVX512: # %bb.0: 2318 ; AVX512-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf5,0xc1] 2319 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2320 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 2321 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 2322 %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %arg0, <8 x i16> %arg1) 2323 %bc = bitcast <4 x i32> %res to <2 x i64> 2324 ret <2 x i64> %bc 2325 } 2326 declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone 2327 2328 define void @test_mm_maskmoveu_si128(<2 x i64> %a0, <2 x i64> %a1, i8* %a2) nounwind { 2329 ; X86-SSE-LABEL: test_mm_maskmoveu_si128: 2330 ; X86-SSE: # %bb.0: 2331 ; X86-SSE-NEXT: pushl %edi # encoding: [0x57] 2332 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi # encoding: [0x8b,0x7c,0x24,0x08] 2333 ; X86-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf7,0xc1] 2334 ; X86-SSE-NEXT: popl %edi # encoding: [0x5f] 2335 ; X86-SSE-NEXT: retl # encoding: [0xc3] 2336 ; 2337 ; X86-AVX-LABEL: test_mm_maskmoveu_si128: 2338 ; X86-AVX: # %bb.0: 2339 ; X86-AVX-NEXT: pushl %edi # encoding: [0x57] 2340 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edi # encoding: [0x8b,0x7c,0x24,0x08] 2341 ; X86-AVX-NEXT: vmaskmovdqu %xmm1, %xmm0 # encoding: [0xc5,0xf9,0xf7,0xc1] 2342 ; X86-AVX-NEXT: popl %edi # encoding: [0x5f] 2343 ; X86-AVX-NEXT: retl # encoding: [0xc3] 2344 ; 2345 ; X64-SSE-LABEL: test_mm_maskmoveu_si128: 2346 ; X64-SSE: # %bb.0: 2347 ; X64-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf7,0xc1] 2348 ; X64-SSE-NEXT: retq # encoding: [0xc3] 2349 ; 2350 ; X64-AVX-LABEL: test_mm_maskmoveu_si128: 2351 ; X64-AVX: # %bb.0: 2352 ; X64-AVX-NEXT: vmaskmovdqu %xmm1, %xmm0 # encoding: [0xc5,0xf9,0xf7,0xc1] 2353 ; X64-AVX-NEXT: retq # encoding: [0xc3] 2354 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 2355 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 2356 call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %arg0, <16 x i8> %arg1, i8* %a2) 2357 ret void 2358 } 2359 declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind 2360 2361 define <2 x i64> @test_mm_max_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 2362 ; SSE-LABEL: test_mm_max_epi16: 2363 ; SSE: # %bb.0: 2364 ; SSE-NEXT: pmaxsw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xee,0xc1] 2365 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2366 ; 2367 ; AVX1-LABEL: test_mm_max_epi16: 2368 ; AVX1: # %bb.0: 2369 ; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xee,0xc1] 2370 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2371 ; 2372 ; AVX512-LABEL: test_mm_max_epi16: 2373 ; AVX512: # %bb.0: 2374 ; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xee,0xc1] 2375 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2376 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 2377 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 2378 %cmp = icmp sgt <8 x i16> %arg0, %arg1 2379 %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1 2380 %bc = bitcast <8 x i16> %sel to <2 x i64> 2381 ret <2 x i64> %bc 2382 } 2383 2384 define <2 x i64> @test_mm_max_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 2385 ; SSE-LABEL: test_mm_max_epu8: 2386 ; SSE: # %bb.0: 2387 ; SSE-NEXT: pmaxub %xmm1, %xmm0 # encoding: [0x66,0x0f,0xde,0xc1] 2388 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2389 ; 2390 ; AVX1-LABEL: test_mm_max_epu8: 2391 ; AVX1: # %bb.0: 2392 ; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xde,0xc1] 2393 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2394 ; 2395 ; AVX512-LABEL: test_mm_max_epu8: 2396 ; AVX512: # %bb.0: 2397 ; AVX512-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xde,0xc1] 2398 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2399 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 2400 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 2401 %cmp = icmp ugt <16 x i8> %arg0, %arg1 2402 %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1 2403 %bc = bitcast <16 x i8> %sel to <2 x i64> 2404 ret <2 x i64> %bc 2405 } 2406 2407 define <2 x double> @test_mm_max_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 2408 ; SSE-LABEL: test_mm_max_pd: 2409 ; SSE: # %bb.0: 2410 ; SSE-NEXT: maxpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x5f,0xc1] 2411 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2412 ; 2413 ; AVX1-LABEL: test_mm_max_pd: 2414 ; AVX1: # %bb.0: 2415 ; AVX1-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5f,0xc1] 2416 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2417 ; 2418 ; AVX512-LABEL: test_mm_max_pd: 2419 ; AVX512: # %bb.0: 2420 ; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5f,0xc1] 2421 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2422 %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) 2423 ret <2 x double> %res 2424 } 2425 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone 2426 2427 define <2 x double> @test_mm_max_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 2428 ; SSE-LABEL: test_mm_max_sd: 2429 ; SSE: # %bb.0: 2430 ; SSE-NEXT: maxsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5f,0xc1] 2431 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2432 ; 2433 ; AVX1-LABEL: test_mm_max_sd: 2434 ; AVX1: # %bb.0: 2435 ; AVX1-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5f,0xc1] 2436 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2437 ; 2438 ; AVX512-LABEL: test_mm_max_sd: 2439 ; AVX512: # %bb.0: 2440 ; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5f,0xc1] 2441 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2442 %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) 2443 ret <2 x double> %res 2444 } 2445 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone 2446 2447 define void @test_mm_mfence() nounwind { 2448 ; CHECK-LABEL: test_mm_mfence: 2449 ; CHECK: # %bb.0: 2450 ; CHECK-NEXT: mfence # encoding: [0x0f,0xae,0xf0] 2451 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2452 call void @llvm.x86.sse2.mfence() 2453 ret void 2454 } 2455 declare void @llvm.x86.sse2.mfence() nounwind readnone 2456 2457 define <2 x i64> @test_mm_min_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 2458 ; SSE-LABEL: test_mm_min_epi16: 2459 ; SSE: # %bb.0: 2460 ; SSE-NEXT: pminsw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xea,0xc1] 2461 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2462 ; 2463 ; AVX1-LABEL: test_mm_min_epi16: 2464 ; AVX1: # %bb.0: 2465 ; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xea,0xc1] 2466 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2467 ; 2468 ; AVX512-LABEL: test_mm_min_epi16: 2469 ; AVX512: # %bb.0: 2470 ; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xea,0xc1] 2471 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2472 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 2473 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 2474 %cmp = icmp slt <8 x i16> %arg0, %arg1 2475 %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1 2476 %bc = bitcast <8 x i16> %sel to <2 x i64> 2477 ret <2 x i64> %bc 2478 } 2479 2480 define <2 x i64> @test_mm_min_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 2481 ; SSE-LABEL: test_mm_min_epu8: 2482 ; SSE: # %bb.0: 2483 ; SSE-NEXT: pminub %xmm1, %xmm0 # encoding: [0x66,0x0f,0xda,0xc1] 2484 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2485 ; 2486 ; AVX1-LABEL: test_mm_min_epu8: 2487 ; AVX1: # %bb.0: 2488 ; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xda,0xc1] 2489 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2490 ; 2491 ; AVX512-LABEL: test_mm_min_epu8: 2492 ; AVX512: # %bb.0: 2493 ; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xda,0xc1] 2494 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2495 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 2496 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 2497 %cmp = icmp ult <16 x i8> %arg0, %arg1 2498 %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1 2499 %bc = bitcast <16 x i8> %sel to <2 x i64> 2500 ret <2 x i64> %bc 2501 } 2502 2503 define <2 x double> @test_mm_min_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 2504 ; SSE-LABEL: test_mm_min_pd: 2505 ; SSE: # %bb.0: 2506 ; SSE-NEXT: minpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x5d,0xc1] 2507 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2508 ; 2509 ; AVX1-LABEL: test_mm_min_pd: 2510 ; AVX1: # %bb.0: 2511 ; AVX1-NEXT: vminpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5d,0xc1] 2512 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2513 ; 2514 ; AVX512-LABEL: test_mm_min_pd: 2515 ; AVX512: # %bb.0: 2516 ; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5d,0xc1] 2517 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2518 %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) 2519 ret <2 x double> %res 2520 } 2521 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone 2522 2523 define <2 x double> @test_mm_min_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 2524 ; SSE-LABEL: test_mm_min_sd: 2525 ; SSE: # %bb.0: 2526 ; SSE-NEXT: minsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5d,0xc1] 2527 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2528 ; 2529 ; AVX1-LABEL: test_mm_min_sd: 2530 ; AVX1: # %bb.0: 2531 ; AVX1-NEXT: vminsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5d,0xc1] 2532 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2533 ; 2534 ; AVX512-LABEL: test_mm_min_sd: 2535 ; AVX512: # %bb.0: 2536 ; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5d,0xc1] 2537 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2538 %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) 2539 ret <2 x double> %res 2540 } 2541 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone 2542 2543 define <2 x i64> @test_mm_move_epi64(<2 x i64> %a0) nounwind { 2544 ; SSE-LABEL: test_mm_move_epi64: 2545 ; SSE: # %bb.0: 2546 ; SSE-NEXT: movq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x7e,0xc0] 2547 ; SSE-NEXT: # xmm0 = xmm0[0],zero 2548 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2549 ; 2550 ; AVX1-LABEL: test_mm_move_epi64: 2551 ; AVX1: # %bb.0: 2552 ; AVX1-NEXT: vmovq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x7e,0xc0] 2553 ; AVX1-NEXT: # xmm0 = xmm0[0],zero 2554 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2555 ; 2556 ; AVX512-LABEL: test_mm_move_epi64: 2557 ; AVX512: # %bb.0: 2558 ; AVX512-NEXT: vmovq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc0] 2559 ; AVX512-NEXT: # xmm0 = xmm0[0],zero 2560 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2561 %res = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2> 2562 ret <2 x i64> %res 2563 } 2564 2565 define <2 x double> @test_mm_move_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 2566 ; SSE-LABEL: test_mm_move_sd: 2567 ; SSE: # %bb.0: 2568 ; SSE-NEXT: movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1] 2569 ; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1] 2570 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2571 ; 2572 ; AVX-LABEL: test_mm_move_sd: 2573 ; AVX: # %bb.0: 2574 ; AVX-NEXT: vblendps $3, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03] 2575 ; AVX-NEXT: # xmm0 = xmm1[0,1],xmm0[2,3] 2576 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2577 %ext0 = extractelement <2 x double> %a1, i32 0 2578 %res0 = insertelement <2 x double> undef, double %ext0, i32 0 2579 %ext1 = extractelement <2 x double> %a0, i32 1 2580 %res1 = insertelement <2 x double> %res0, double %ext1, i32 1 2581 ret <2 x double> %res1 2582 } 2583 2584 define i32 @test_mm_movemask_epi8(<2 x i64> %a0) nounwind { 2585 ; SSE-LABEL: test_mm_movemask_epi8: 2586 ; SSE: # %bb.0: 2587 ; SSE-NEXT: pmovmskb %xmm0, %eax # encoding: [0x66,0x0f,0xd7,0xc0] 2588 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2589 ; 2590 ; AVX-LABEL: test_mm_movemask_epi8: 2591 ; AVX: # %bb.0: 2592 ; AVX-NEXT: vpmovmskb %xmm0, %eax # encoding: [0xc5,0xf9,0xd7,0xc0] 2593 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2594 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 2595 %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %arg0) 2596 ret i32 %res 2597 } 2598 declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone 2599 2600 define i32 @test_mm_movemask_pd(<2 x double> %a0) nounwind { 2601 ; SSE-LABEL: test_mm_movemask_pd: 2602 ; SSE: # %bb.0: 2603 ; SSE-NEXT: movmskpd %xmm0, %eax # encoding: [0x66,0x0f,0x50,0xc0] 2604 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2605 ; 2606 ; AVX-LABEL: test_mm_movemask_pd: 2607 ; AVX: # %bb.0: 2608 ; AVX-NEXT: vmovmskpd %xmm0, %eax # encoding: [0xc5,0xf9,0x50,0xc0] 2609 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2610 %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) 2611 ret i32 %res 2612 } 2613 declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone 2614 2615 define <2 x i64> @test_mm_mul_epu32(<2 x i64> %a0, <2 x i64> %a1) nounwind { 2616 ; X86-SSE-LABEL: test_mm_mul_epu32: 2617 ; X86-SSE: # %bb.0: 2618 ; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,0,4294967295,0] 2619 ; X86-SSE-NEXT: # encoding: [0x66,0x0f,0x6f,0x15,A,A,A,A] 2620 ; X86-SSE-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 2621 ; X86-SSE-NEXT: pand %xmm2, %xmm0 # encoding: [0x66,0x0f,0xdb,0xc2] 2622 ; X86-SSE-NEXT: pand %xmm2, %xmm1 # encoding: [0x66,0x0f,0xdb,0xca] 2623 ; X86-SSE-NEXT: pmuludq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf4,0xc1] 2624 ; X86-SSE-NEXT: retl # encoding: [0xc3] 2625 ; 2626 ; AVX1-LABEL: test_mm_mul_epu32: 2627 ; AVX1: # %bb.0: 2628 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0xef,0xd2] 2629 ; AVX1-NEXT: vpblendw $204, %xmm2, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc2,0xcc] 2630 ; AVX1-NEXT: # xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 2631 ; AVX1-NEXT: vpblendw $204, %xmm2, %xmm1, %xmm1 # encoding: [0xc4,0xe3,0x71,0x0e,0xca,0xcc] 2632 ; AVX1-NEXT: # xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 2633 ; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf4,0xc1] 2634 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2635 ; 2636 ; AVX512-LABEL: test_mm_mul_epu32: 2637 ; AVX512: # %bb.0: 2638 ; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] 2639 ; AVX512-NEXT: vpblendd $10, %xmm2, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc2,0x0a] 2640 ; AVX512-NEXT: # xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 2641 ; AVX512-NEXT: vpblendd $10, %xmm2, %xmm1, %xmm1 # encoding: [0xc4,0xe3,0x71,0x02,0xca,0x0a] 2642 ; AVX512-NEXT: # xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 2643 ; AVX512-NEXT: vpmullq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x40,0xc1] 2644 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2645 ; 2646 ; X64-SSE-LABEL: test_mm_mul_epu32: 2647 ; X64-SSE: # %bb.0: 2648 ; X64-SSE-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,0,4294967295,0] 2649 ; X64-SSE-NEXT: # encoding: [0x66,0x0f,0x6f,0x15,A,A,A,A] 2650 ; X64-SSE-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 2651 ; X64-SSE-NEXT: pand %xmm2, %xmm0 # encoding: [0x66,0x0f,0xdb,0xc2] 2652 ; X64-SSE-NEXT: pand %xmm2, %xmm1 # encoding: [0x66,0x0f,0xdb,0xca] 2653 ; X64-SSE-NEXT: pmuludq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf4,0xc1] 2654 ; X64-SSE-NEXT: retq # encoding: [0xc3] 2655 %A = and <2 x i64> %a0, <i64 4294967295, i64 4294967295> 2656 %B = and <2 x i64> %a1, <i64 4294967295, i64 4294967295> 2657 %res = mul nuw <2 x i64> %A, %B 2658 ret <2 x i64> %res 2659 } 2660 2661 define <2 x double> @test_mm_mul_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 2662 ; SSE-LABEL: test_mm_mul_pd: 2663 ; SSE: # %bb.0: 2664 ; SSE-NEXT: mulpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x59,0xc1] 2665 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2666 ; 2667 ; AVX1-LABEL: test_mm_mul_pd: 2668 ; AVX1: # %bb.0: 2669 ; AVX1-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x59,0xc1] 2670 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2671 ; 2672 ; AVX512-LABEL: test_mm_mul_pd: 2673 ; AVX512: # %bb.0: 2674 ; AVX512-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x59,0xc1] 2675 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2676 %res = fmul <2 x double> %a0, %a1 2677 ret <2 x double> %res 2678 } 2679 2680 define <2 x double> @test_mm_mul_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 2681 ; SSE-LABEL: test_mm_mul_sd: 2682 ; SSE: # %bb.0: 2683 ; SSE-NEXT: mulsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x59,0xc1] 2684 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2685 ; 2686 ; AVX1-LABEL: test_mm_mul_sd: 2687 ; AVX1: # %bb.0: 2688 ; AVX1-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x59,0xc1] 2689 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2690 ; 2691 ; AVX512-LABEL: test_mm_mul_sd: 2692 ; AVX512: # %bb.0: 2693 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x59,0xc1] 2694 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2695 %ext0 = extractelement <2 x double> %a0, i32 0 2696 %ext1 = extractelement <2 x double> %a1, i32 0 2697 %fmul = fmul double %ext0, %ext1 2698 %res = insertelement <2 x double> %a0, double %fmul, i32 0 2699 ret <2 x double> %res 2700 } 2701 2702 define <2 x i64> @test_mm_mulhi_epi16(<2 x i64> %a0, <2 x i64> %a1) { 2703 ; SSE-LABEL: test_mm_mulhi_epi16: 2704 ; SSE: # %bb.0: 2705 ; SSE-NEXT: pmulhw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe5,0xc1] 2706 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2707 ; 2708 ; AVX1-LABEL: test_mm_mulhi_epi16: 2709 ; AVX1: # %bb.0: 2710 ; AVX1-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe5,0xc1] 2711 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2712 ; 2713 ; AVX512-LABEL: test_mm_mulhi_epi16: 2714 ; AVX512: # %bb.0: 2715 ; AVX512-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe5,0xc1] 2716 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2717 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 2718 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 2719 %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %arg0, <8 x i16> %arg1) 2720 %bc = bitcast <8 x i16> %res to <2 x i64> 2721 ret <2 x i64> %bc 2722 } 2723 declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone 2724 2725 define <2 x i64> @test_mm_mulhi_epu16(<2 x i64> %a0, <2 x i64> %a1) { 2726 ; SSE-LABEL: test_mm_mulhi_epu16: 2727 ; SSE: # %bb.0: 2728 ; SSE-NEXT: pmulhuw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe4,0xc1] 2729 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2730 ; 2731 ; AVX1-LABEL: test_mm_mulhi_epu16: 2732 ; AVX1: # %bb.0: 2733 ; AVX1-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe4,0xc1] 2734 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2735 ; 2736 ; AVX512-LABEL: test_mm_mulhi_epu16: 2737 ; AVX512: # %bb.0: 2738 ; AVX512-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe4,0xc1] 2739 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2740 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 2741 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 2742 %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %arg0, <8 x i16> %arg1) 2743 %bc = bitcast <8 x i16> %res to <2 x i64> 2744 ret <2 x i64> %bc 2745 } 2746 declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone 2747 2748 define <2 x i64> @test_mm_mullo_epi16(<2 x i64> %a0, <2 x i64> %a1) { 2749 ; SSE-LABEL: test_mm_mullo_epi16: 2750 ; SSE: # %bb.0: 2751 ; SSE-NEXT: pmullw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd5,0xc1] 2752 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2753 ; 2754 ; AVX1-LABEL: test_mm_mullo_epi16: 2755 ; AVX1: # %bb.0: 2756 ; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd5,0xc1] 2757 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2758 ; 2759 ; AVX512-LABEL: test_mm_mullo_epi16: 2760 ; AVX512: # %bb.0: 2761 ; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd5,0xc1] 2762 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2763 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 2764 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 2765 %res = mul <8 x i16> %arg0, %arg1 2766 %bc = bitcast <8 x i16> %res to <2 x i64> 2767 ret <2 x i64> %bc 2768 } 2769 2770 define <2 x double> @test_mm_or_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 2771 ; SSE-LABEL: test_mm_or_pd: 2772 ; SSE: # %bb.0: 2773 ; SSE-NEXT: orps %xmm1, %xmm0 # encoding: [0x0f,0x56,0xc1] 2774 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2775 ; 2776 ; AVX1-LABEL: test_mm_or_pd: 2777 ; AVX1: # %bb.0: 2778 ; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0xc1] 2779 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2780 ; 2781 ; AVX512-LABEL: test_mm_or_pd: 2782 ; AVX512: # %bb.0: 2783 ; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0xc1] 2784 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2785 %arg0 = bitcast <2 x double> %a0 to <4 x i32> 2786 %arg1 = bitcast <2 x double> %a1 to <4 x i32> 2787 %res = or <4 x i32> %arg0, %arg1 2788 %bc = bitcast <4 x i32> %res to <2 x double> 2789 ret <2 x double> %bc 2790 } 2791 2792 define <2 x i64> @test_mm_or_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind { 2793 ; SSE-LABEL: test_mm_or_si128: 2794 ; SSE: # %bb.0: 2795 ; SSE-NEXT: orps %xmm1, %xmm0 # encoding: [0x0f,0x56,0xc1] 2796 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2797 ; 2798 ; AVX1-LABEL: test_mm_or_si128: 2799 ; AVX1: # %bb.0: 2800 ; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0xc1] 2801 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2802 ; 2803 ; AVX512-LABEL: test_mm_or_si128: 2804 ; AVX512: # %bb.0: 2805 ; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0xc1] 2806 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2807 %res = or <2 x i64> %a0, %a1 2808 ret <2 x i64> %res 2809 } 2810 2811 define <2 x i64> @test_mm_packs_epi16(<2 x i64> %a0, <2 x i64> %a1) { 2812 ; SSE-LABEL: test_mm_packs_epi16: 2813 ; SSE: # %bb.0: 2814 ; SSE-NEXT: packsswb %xmm1, %xmm0 # encoding: [0x66,0x0f,0x63,0xc1] 2815 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2816 ; 2817 ; AVX1-LABEL: test_mm_packs_epi16: 2818 ; AVX1: # %bb.0: 2819 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x63,0xc1] 2820 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2821 ; 2822 ; AVX512-LABEL: test_mm_packs_epi16: 2823 ; AVX512: # %bb.0: 2824 ; AVX512-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0xc1] 2825 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2826 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 2827 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 2828 %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %arg0, <8 x i16> %arg1) 2829 %bc = bitcast <16 x i8> %res to <2 x i64> 2830 ret <2 x i64> %bc 2831 } 2832 declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone 2833 2834 define <2 x i64> @test_mm_packs_epi32(<2 x i64> %a0, <2 x i64> %a1) { 2835 ; SSE-LABEL: test_mm_packs_epi32: 2836 ; SSE: # %bb.0: 2837 ; SSE-NEXT: packssdw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6b,0xc1] 2838 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2839 ; 2840 ; AVX1-LABEL: test_mm_packs_epi32: 2841 ; AVX1: # %bb.0: 2842 ; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x6b,0xc1] 2843 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2844 ; 2845 ; AVX512-LABEL: test_mm_packs_epi32: 2846 ; AVX512: # %bb.0: 2847 ; AVX512-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0xc1] 2848 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2849 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 2850 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 2851 %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %arg0, <4 x i32> %arg1) 2852 %bc = bitcast <8 x i16> %res to <2 x i64> 2853 ret <2 x i64> %bc 2854 } 2855 declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone 2856 2857 define <2 x i64> @test_mm_packus_epi16(<2 x i64> %a0, <2 x i64> %a1) { 2858 ; SSE-LABEL: test_mm_packus_epi16: 2859 ; SSE: # %bb.0: 2860 ; SSE-NEXT: packuswb %xmm1, %xmm0 # encoding: [0x66,0x0f,0x67,0xc1] 2861 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2862 ; 2863 ; AVX1-LABEL: test_mm_packus_epi16: 2864 ; AVX1: # %bb.0: 2865 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x67,0xc1] 2866 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2867 ; 2868 ; AVX512-LABEL: test_mm_packus_epi16: 2869 ; AVX512: # %bb.0: 2870 ; AVX512-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0xc1] 2871 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2872 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 2873 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 2874 %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %arg0, <8 x i16> %arg1) 2875 %bc = bitcast <16 x i8> %res to <2 x i64> 2876 ret <2 x i64> %bc 2877 } 2878 declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone 2879 2880 define void @test_mm_pause() nounwind { 2881 ; CHECK-LABEL: test_mm_pause: 2882 ; CHECK: # %bb.0: 2883 ; CHECK-NEXT: pause # encoding: [0xf3,0x90] 2884 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2885 call void @llvm.x86.sse2.pause() 2886 ret void 2887 } 2888 declare void @llvm.x86.sse2.pause() nounwind readnone 2889 2890 define <2 x i64> @test_mm_sad_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 2891 ; SSE-LABEL: test_mm_sad_epu8: 2892 ; SSE: # %bb.0: 2893 ; SSE-NEXT: psadbw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf6,0xc1] 2894 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2895 ; 2896 ; AVX1-LABEL: test_mm_sad_epu8: 2897 ; AVX1: # %bb.0: 2898 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf6,0xc1] 2899 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2900 ; 2901 ; AVX512-LABEL: test_mm_sad_epu8: 2902 ; AVX512: # %bb.0: 2903 ; AVX512-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf6,0xc1] 2904 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2905 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 2906 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 2907 %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %arg0, <16 x i8> %arg1) 2908 ret <2 x i64> %res 2909 } 2910 declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone 2911 2912 define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind { 2913 ; X86-SSE-LABEL: test_mm_set_epi8: 2914 ; X86-SSE: # %bb.0: 2915 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2916 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 2917 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 2918 ; X86-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] 2919 ; X86-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] 2920 ; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2921 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c] 2922 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 2923 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 2924 ; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 2925 ; X86-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 2926 ; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2927 ; X86-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1] 2928 ; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 2929 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14] 2930 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 2931 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 2932 ; X86-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] 2933 ; X86-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] 2934 ; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 2935 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c] 2936 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 2937 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 2938 ; X86-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] 2939 ; X86-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] 2940 ; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2941 ; X86-SSE-NEXT: punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb] 2942 ; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 2943 ; X86-SSE-NEXT: punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca] 2944 ; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 2945 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24] 2946 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 2947 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 2948 ; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 2949 ; X86-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 2950 ; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2951 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c] 2952 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 2953 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 2954 ; X86-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] 2955 ; X86-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] 2956 ; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 2957 ; X86-SSE-NEXT: punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda] 2958 ; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 2959 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34] 2960 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 2961 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 2962 ; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 2963 ; X86-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 2964 ; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2965 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c] 2966 ; X86-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0] 2967 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] 2968 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 2969 ; X86-SSE-NEXT: punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4] 2970 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 2971 ; X86-SSE-NEXT: punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2] 2972 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 2973 ; X86-SSE-NEXT: punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3] 2974 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 2975 ; X86-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] 2976 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 2977 ; X86-SSE-NEXT: retl # encoding: [0xc3] 2978 ; 2979 ; X86-AVX1-LABEL: test_mm_set_epi8: 2980 ; X86-AVX1: # %bb.0: 2981 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c] 2982 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x40] 2983 ; X86-AVX1-NEXT: vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1] 2984 ; X86-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 2985 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 2986 ; X86-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 2987 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34] 2988 ; X86-AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] 2989 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 2990 ; X86-AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 2991 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c] 2992 ; X86-AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 2993 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 2994 ; X86-AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 2995 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24] 2996 ; X86-AVX1-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 2997 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 2998 ; X86-AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 2999 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c] 3000 ; X86-AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] 3001 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 3002 ; X86-AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 3003 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14] 3004 ; X86-AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] 3005 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 3006 ; X86-AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 3007 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c] 3008 ; X86-AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] 3009 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 3010 ; X86-AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 3011 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3012 ; X86-AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] 3013 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 3014 ; 3015 ; X86-AVX512-LABEL: test_mm_set_epi8: 3016 ; X86-AVX512: # %bb.0: 3017 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c] 3018 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x40] 3019 ; X86-AVX512-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 3020 ; X86-AVX512-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 3021 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 3022 ; X86-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 3023 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34] 3024 ; X86-AVX512-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] 3025 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 3026 ; X86-AVX512-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 3027 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c] 3028 ; X86-AVX512-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 3029 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 3030 ; X86-AVX512-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 3031 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24] 3032 ; X86-AVX512-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 3033 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 3034 ; X86-AVX512-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 3035 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c] 3036 ; X86-AVX512-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] 3037 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 3038 ; X86-AVX512-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 3039 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14] 3040 ; X86-AVX512-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] 3041 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 3042 ; X86-AVX512-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 3043 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c] 3044 ; X86-AVX512-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] 3045 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 3046 ; X86-AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 3047 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3048 ; X86-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] 3049 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 3050 ; 3051 ; X64-SSE-LABEL: test_mm_set_epi8: 3052 ; X64-SSE: # %bb.0: 3053 ; X64-SSE-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] 3054 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3055 ; X64-SSE-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] 3056 ; X64-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] 3057 ; X64-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] 3058 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 3059 ; X64-SSE-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] 3060 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3061 ; X64-SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3062 ; X64-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 3063 ; X64-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 3064 ; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 3065 ; X64-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1] 3066 ; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 3067 ; X64-SSE-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] 3068 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3069 ; X64-SSE-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] 3070 ; X64-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] 3071 ; X64-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] 3072 ; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 3073 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 3074 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3075 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 3076 ; X64-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] 3077 ; X64-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] 3078 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 3079 ; X64-SSE-NEXT: punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb] 3080 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 3081 ; X64-SSE-NEXT: punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca] 3082 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3083 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 3084 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3085 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 3086 ; X64-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 3087 ; X64-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 3088 ; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 3089 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 3090 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3091 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 3092 ; X64-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] 3093 ; X64-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] 3094 ; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 3095 ; X64-SSE-NEXT: punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda] 3096 ; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 3097 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 3098 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3099 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] 3100 ; X64-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 3101 ; X64-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 3102 ; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 3103 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48] 3104 ; X64-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0] 3105 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50] 3106 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3107 ; X64-SSE-NEXT: punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4] 3108 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 3109 ; X64-SSE-NEXT: punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2] 3110 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 3111 ; X64-SSE-NEXT: punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3] 3112 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 3113 ; X64-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] 3114 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 3115 ; X64-SSE-NEXT: retq # encoding: [0xc3] 3116 ; 3117 ; X64-AVX1-LABEL: test_mm_set_epi8: 3118 ; X64-AVX1: # %bb.0: 3119 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb6,0x54,0x24,0x48] 3120 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50] 3121 ; X64-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] 3122 ; X64-AVX1-NEXT: vpinsrb $1, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc2,0x01] 3123 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] 3124 ; X64-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 3125 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 3126 ; X64-AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] 3127 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 3128 ; X64-AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 3129 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 3130 ; X64-AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 3131 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 3132 ; X64-AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 3133 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 3134 ; X64-AVX1-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 3135 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 3136 ; X64-AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 3137 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 3138 ; X64-AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] 3139 ; X64-AVX1-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] 3140 ; X64-AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 3141 ; X64-AVX1-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] 3142 ; X64-AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] 3143 ; X64-AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3144 ; X64-AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 3145 ; X64-AVX1-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] 3146 ; X64-AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] 3147 ; X64-AVX1-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] 3148 ; X64-AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 3149 ; X64-AVX1-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] 3150 ; X64-AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] 3151 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 3152 ; 3153 ; X64-AVX512-LABEL: test_mm_set_epi8: 3154 ; X64-AVX512: # %bb.0: 3155 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb6,0x54,0x24,0x48] 3156 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50] 3157 ; X64-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] 3158 ; X64-AVX512-NEXT: vpinsrb $1, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc3,0x79,0x20,0xc2,0x01] 3159 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] 3160 ; X64-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 3161 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 3162 ; X64-AVX512-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] 3163 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 3164 ; X64-AVX512-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 3165 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 3166 ; X64-AVX512-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 3167 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 3168 ; X64-AVX512-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 3169 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 3170 ; X64-AVX512-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 3171 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 3172 ; X64-AVX512-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 3173 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 3174 ; X64-AVX512-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] 3175 ; X64-AVX512-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] 3176 ; X64-AVX512-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 3177 ; X64-AVX512-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] 3178 ; X64-AVX512-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] 3179 ; X64-AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3180 ; X64-AVX512-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 3181 ; X64-AVX512-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] 3182 ; X64-AVX512-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] 3183 ; X64-AVX512-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] 3184 ; X64-AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 3185 ; X64-AVX512-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] 3186 ; X64-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] 3187 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 3188 %res0 = insertelement <16 x i8> undef, i8 %a15, i32 0 3189 %res1 = insertelement <16 x i8> %res0, i8 %a14, i32 1 3190 %res2 = insertelement <16 x i8> %res1, i8 %a13, i32 2 3191 %res3 = insertelement <16 x i8> %res2, i8 %a12, i32 3 3192 %res4 = insertelement <16 x i8> %res3, i8 %a11, i32 4 3193 %res5 = insertelement <16 x i8> %res4, i8 %a10, i32 5 3194 %res6 = insertelement <16 x i8> %res5, i8 %a9 , i32 6 3195 %res7 = insertelement <16 x i8> %res6, i8 %a8 , i32 7 3196 %res8 = insertelement <16 x i8> %res7, i8 %a7 , i32 8 3197 %res9 = insertelement <16 x i8> %res8, i8 %a6 , i32 9 3198 %res10 = insertelement <16 x i8> %res9, i8 %a5 , i32 10 3199 %res11 = insertelement <16 x i8> %res10, i8 %a4 , i32 11 3200 %res12 = insertelement <16 x i8> %res11, i8 %a3 , i32 12 3201 %res13 = insertelement <16 x i8> %res12, i8 %a2 , i32 13 3202 %res14 = insertelement <16 x i8> %res13, i8 %a1 , i32 14 3203 %res15 = insertelement <16 x i8> %res14, i8 %a0 , i32 15 3204 %res = bitcast <16 x i8> %res15 to <2 x i64> 3205 ret <2 x i64> %res 3206 } 3207 3208 define <2 x i64> @test_mm_set_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind { 3209 ; X86-SSE-LABEL: test_mm_set_epi16: 3210 ; X86-SSE: # %bb.0: 3211 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 3212 ; X86-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] 3213 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] 3214 ; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 3215 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c] 3216 ; X86-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] 3217 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] 3218 ; X86-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0] 3219 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14] 3220 ; X86-SSE-NEXT: movd %eax, %xmm5 # encoding: [0x66,0x0f,0x6e,0xe8] 3221 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18] 3222 ; X86-SSE-NEXT: movd %eax, %xmm6 # encoding: [0x66,0x0f,0x6e,0xf0] 3223 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c] 3224 ; X86-SSE-NEXT: movd %eax, %xmm7 # encoding: [0x66,0x0f,0x6e,0xf8] 3225 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20] 3226 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3227 ; X86-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1] 3228 ; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 3229 ; X86-SSE-NEXT: punpcklwd %xmm3, %xmm4 # encoding: [0x66,0x0f,0x61,0xe3] 3230 ; X86-SSE-NEXT: # xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] 3231 ; X86-SSE-NEXT: punpckldq %xmm2, %xmm4 # encoding: [0x66,0x0f,0x62,0xe2] 3232 ; X86-SSE-NEXT: # xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] 3233 ; X86-SSE-NEXT: punpcklwd %xmm5, %xmm6 # encoding: [0x66,0x0f,0x61,0xf5] 3234 ; X86-SSE-NEXT: # xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3] 3235 ; X86-SSE-NEXT: punpcklwd %xmm7, %xmm0 # encoding: [0x66,0x0f,0x61,0xc7] 3236 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3] 3237 ; X86-SSE-NEXT: punpckldq %xmm6, %xmm0 # encoding: [0x66,0x0f,0x62,0xc6] 3238 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1] 3239 ; X86-SSE-NEXT: punpcklqdq %xmm4, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc4] 3240 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0] 3241 ; X86-SSE-NEXT: retl # encoding: [0xc3] 3242 ; 3243 ; X86-AVX1-LABEL: test_mm_set_epi16: 3244 ; X86-AVX1: # %bb.0: 3245 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20] 3246 ; X86-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] 3247 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c] 3248 ; X86-AVX1-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 3249 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18] 3250 ; X86-AVX1-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 3251 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14] 3252 ; X86-AVX1-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x03] 3253 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] 3254 ; X86-AVX1-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 3255 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c] 3256 ; X86-AVX1-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 3257 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] 3258 ; X86-AVX1-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 3259 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 3260 ; X86-AVX1-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 3261 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 3262 ; 3263 ; X86-AVX512-LABEL: test_mm_set_epi16: 3264 ; X86-AVX512: # %bb.0: 3265 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20] 3266 ; X86-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] 3267 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c] 3268 ; X86-AVX512-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 3269 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18] 3270 ; X86-AVX512-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 3271 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14] 3272 ; X86-AVX512-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03] 3273 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] 3274 ; X86-AVX512-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 3275 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c] 3276 ; X86-AVX512-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 3277 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] 3278 ; X86-AVX512-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 3279 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 3280 ; X86-AVX512-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 3281 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 3282 ; 3283 ; X64-SSE-LABEL: test_mm_set_epi16: 3284 ; X64-SSE: # %bb.0: 3285 ; X64-SSE-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x10] 3286 ; X64-SSE-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] 3287 ; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] 3288 ; X64-SSE-NEXT: movd %esi, %xmm1 # encoding: [0x66,0x0f,0x6e,0xce] 3289 ; X64-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8] 3290 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 3291 ; X64-SSE-NEXT: movd %edx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc2] 3292 ; X64-SSE-NEXT: movd %ecx, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd1] 3293 ; X64-SSE-NEXT: punpcklwd %xmm0, %xmm2 # encoding: [0x66,0x0f,0x61,0xd0] 3294 ; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 3295 ; X64-SSE-NEXT: punpckldq %xmm1, %xmm2 # encoding: [0x66,0x0f,0x62,0xd1] 3296 ; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 3297 ; X64-SSE-NEXT: movd %r8d, %xmm0 # encoding: [0x66,0x41,0x0f,0x6e,0xc0] 3298 ; X64-SSE-NEXT: movd %r9d, %xmm1 # encoding: [0x66,0x41,0x0f,0x6e,0xc9] 3299 ; X64-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8] 3300 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 3301 ; X64-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] 3302 ; X64-SSE-NEXT: movd %r10d, %xmm0 # encoding: [0x66,0x41,0x0f,0x6e,0xc2] 3303 ; X64-SSE-NEXT: punpcklwd %xmm3, %xmm0 # encoding: [0x66,0x0f,0x61,0xc3] 3304 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] 3305 ; X64-SSE-NEXT: punpckldq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x62,0xc1] 3306 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3307 ; X64-SSE-NEXT: punpcklqdq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc2] 3308 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0] 3309 ; X64-SSE-NEXT: retq # encoding: [0xc3] 3310 ; 3311 ; X64-AVX1-LABEL: test_mm_set_epi16: 3312 ; X64-AVX1: # %bb.0: 3313 ; X64-AVX1-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] 3314 ; X64-AVX1-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x08] 3315 ; X64-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] 3316 ; X64-AVX1-NEXT: vpinsrw $1, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x01] 3317 ; X64-AVX1-NEXT: vpinsrw $2, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x02] 3318 ; X64-AVX1-NEXT: vpinsrw $3, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x03] 3319 ; X64-AVX1-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x04] 3320 ; X64-AVX1-NEXT: vpinsrw $5, %edx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc2,0x05] 3321 ; X64-AVX1-NEXT: vpinsrw $6, %esi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc6,0x06] 3322 ; X64-AVX1-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc7,0x07] 3323 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 3324 ; 3325 ; X64-AVX512-LABEL: test_mm_set_epi16: 3326 ; X64-AVX512: # %bb.0: 3327 ; X64-AVX512-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] 3328 ; X64-AVX512-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x08] 3329 ; X64-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] 3330 ; X64-AVX512-NEXT: vpinsrw $1, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x01] 3331 ; X64-AVX512-NEXT: vpinsrw $2, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x02] 3332 ; X64-AVX512-NEXT: vpinsrw $3, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x03] 3333 ; X64-AVX512-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04] 3334 ; X64-AVX512-NEXT: vpinsrw $5, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc2,0x05] 3335 ; X64-AVX512-NEXT: vpinsrw $6, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc6,0x06] 3336 ; X64-AVX512-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07] 3337 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 3338 %res0 = insertelement <8 x i16> undef, i16 %a7, i32 0 3339 %res1 = insertelement <8 x i16> %res0, i16 %a6, i32 1 3340 %res2 = insertelement <8 x i16> %res1, i16 %a5, i32 2 3341 %res3 = insertelement <8 x i16> %res2, i16 %a4, i32 3 3342 %res4 = insertelement <8 x i16> %res3, i16 %a3, i32 4 3343 %res5 = insertelement <8 x i16> %res4, i16 %a2, i32 5 3344 %res6 = insertelement <8 x i16> %res5, i16 %a1, i32 6 3345 %res7 = insertelement <8 x i16> %res6, i16 %a0, i32 7 3346 %res = bitcast <8 x i16> %res7 to <2 x i64> 3347 ret <2 x i64> %res 3348 } 3349 3350 define <2 x i64> @test_mm_set_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind { 3351 ; X86-SSE-LABEL: test_mm_set_epi32: 3352 ; X86-SSE: # %bb.0: 3353 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04] 3354 ; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 3355 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x08] 3356 ; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 3357 ; X86-SSE-NEXT: unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8] 3358 ; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 3359 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x0c] 3360 ; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero 3361 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10] 3362 ; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 3363 ; X86-SSE-NEXT: unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2] 3364 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 3365 ; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 3366 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 3367 ; X86-SSE-NEXT: retl # encoding: [0xc3] 3368 ; 3369 ; X86-AVX1-LABEL: test_mm_set_epi32: 3370 ; X86-AVX1: # %bb.0: 3371 ; X86-AVX1-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x10] 3372 ; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 3373 ; X86-AVX1-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x01] 3374 ; X86-AVX1-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x02] 3375 ; X86-AVX1-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x04,0x03] 3376 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 3377 ; 3378 ; X86-AVX512-LABEL: test_mm_set_epi32: 3379 ; X86-AVX512: # %bb.0: 3380 ; X86-AVX512-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x10] 3381 ; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 3382 ; X86-AVX512-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x01] 3383 ; X86-AVX512-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x02] 3384 ; X86-AVX512-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x04,0x03] 3385 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 3386 ; 3387 ; X64-SSE-LABEL: test_mm_set_epi32: 3388 ; X64-SSE: # %bb.0: 3389 ; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] 3390 ; X64-SSE-NEXT: movd %esi, %xmm1 # encoding: [0x66,0x0f,0x6e,0xce] 3391 ; X64-SSE-NEXT: punpckldq %xmm0, %xmm1 # encoding: [0x66,0x0f,0x62,0xc8] 3392 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 3393 ; X64-SSE-NEXT: movd %edx, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd2] 3394 ; X64-SSE-NEXT: movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1] 3395 ; X64-SSE-NEXT: punpckldq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x62,0xc2] 3396 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 3397 ; X64-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] 3398 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 3399 ; X64-SSE-NEXT: retq # encoding: [0xc3] 3400 ; 3401 ; X64-AVX1-LABEL: test_mm_set_epi32: 3402 ; X64-AVX1: # %bb.0: 3403 ; X64-AVX1-NEXT: vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1] 3404 ; X64-AVX1-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x01] 3405 ; X64-AVX1-NEXT: vpinsrd $2, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x02] 3406 ; X64-AVX1-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03] 3407 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 3408 ; 3409 ; X64-AVX512-LABEL: test_mm_set_epi32: 3410 ; X64-AVX512: # %bb.0: 3411 ; X64-AVX512-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 3412 ; X64-AVX512-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x01] 3413 ; X64-AVX512-NEXT: vpinsrd $2, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x02] 3414 ; X64-AVX512-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03] 3415 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 3416 %res0 = insertelement <4 x i32> undef, i32 %a3, i32 0 3417 %res1 = insertelement <4 x i32> %res0, i32 %a2, i32 1 3418 %res2 = insertelement <4 x i32> %res1, i32 %a1, i32 2 3419 %res3 = insertelement <4 x i32> %res2, i32 %a0, i32 3 3420 %res = bitcast <4 x i32> %res3 to <2 x i64> 3421 ret <2 x i64> %res 3422 } 3423 3424 ; TODO test_mm_set_epi64 3425 3426 define <2 x i64> @test_mm_set_epi64x(i64 %a0, i64 %a1) nounwind { 3427 ; X86-SSE-LABEL: test_mm_set_epi64x: 3428 ; X86-SSE: # %bb.0: 3429 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x04] 3430 ; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 3431 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x08] 3432 ; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 3433 ; X86-SSE-NEXT: unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8] 3434 ; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 3435 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x0c] 3436 ; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 3437 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x10] 3438 ; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero 3439 ; X86-SSE-NEXT: unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2] 3440 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 3441 ; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 3442 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 3443 ; X86-SSE-NEXT: retl # encoding: [0xc3] 3444 ; 3445 ; X86-AVX1-LABEL: test_mm_set_epi64x: 3446 ; X86-AVX1: # %bb.0: 3447 ; X86-AVX1-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x0c] 3448 ; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 3449 ; X86-AVX1-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x01] 3450 ; X86-AVX1-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x04,0x02] 3451 ; X86-AVX1-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x03] 3452 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 3453 ; 3454 ; X86-AVX512-LABEL: test_mm_set_epi64x: 3455 ; X86-AVX512: # %bb.0: 3456 ; X86-AVX512-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x0c] 3457 ; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 3458 ; X86-AVX512-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x01] 3459 ; X86-AVX512-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x04,0x02] 3460 ; X86-AVX512-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x03] 3461 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 3462 ; 3463 ; X64-SSE-LABEL: test_mm_set_epi64x: 3464 ; X64-SSE: # %bb.0: 3465 ; X64-SSE-NEXT: movq %rdi, %xmm1 # encoding: [0x66,0x48,0x0f,0x6e,0xcf] 3466 ; X64-SSE-NEXT: movq %rsi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc6] 3467 ; X64-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] 3468 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 3469 ; X64-SSE-NEXT: retq # encoding: [0xc3] 3470 ; 3471 ; X64-AVX1-LABEL: test_mm_set_epi64x: 3472 ; X64-AVX1: # %bb.0: 3473 ; X64-AVX1-NEXT: vmovq %rdi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc7] 3474 ; X64-AVX1-NEXT: vmovq %rsi, %xmm1 # encoding: [0xc4,0xe1,0xf9,0x6e,0xce] 3475 ; X64-AVX1-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x6c,0xc0] 3476 ; X64-AVX1-NEXT: # xmm0 = xmm1[0],xmm0[0] 3477 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 3478 ; 3479 ; X64-AVX512-LABEL: test_mm_set_epi64x: 3480 ; X64-AVX512: # %bb.0: 3481 ; X64-AVX512-NEXT: vmovq %rdi, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc7] 3482 ; X64-AVX512-NEXT: vmovq %rsi, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xce] 3483 ; X64-AVX512-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xc0] 3484 ; X64-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0] 3485 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 3486 %res0 = insertelement <2 x i64> undef, i64 %a1, i32 0 3487 %res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1 3488 ret <2 x i64> %res1 3489 } 3490 3491 define <2 x double> @test_mm_set_pd(double %a0, double %a1) nounwind { 3492 ; X86-SSE-LABEL: test_mm_set_pd: 3493 ; X86-SSE: # %bb.0: 3494 ; X86-SSE-NEXT: movsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x44,0x24,0x0c] 3495 ; X86-SSE-NEXT: # xmm0 = mem[0],zero 3496 ; X86-SSE-NEXT: movsd {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf2,0x0f,0x10,0x4c,0x24,0x04] 3497 ; X86-SSE-NEXT: # xmm1 = mem[0],zero 3498 ; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 3499 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 3500 ; X86-SSE-NEXT: retl # encoding: [0xc3] 3501 ; 3502 ; X86-AVX1-LABEL: test_mm_set_pd: 3503 ; X86-AVX1: # %bb.0: 3504 ; X86-AVX1-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x44,0x24,0x0c] 3505 ; X86-AVX1-NEXT: # xmm0 = mem[0],zero 3506 ; X86-AVX1-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x04] 3507 ; X86-AVX1-NEXT: # xmm1 = mem[0],zero 3508 ; X86-AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1] 3509 ; X86-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0] 3510 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 3511 ; 3512 ; X86-AVX512-LABEL: test_mm_set_pd: 3513 ; X86-AVX512: # %bb.0: 3514 ; X86-AVX512-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x44,0x24,0x0c] 3515 ; X86-AVX512-NEXT: # xmm0 = mem[0],zero 3516 ; X86-AVX512-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x04] 3517 ; X86-AVX512-NEXT: # xmm1 = mem[0],zero 3518 ; X86-AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1] 3519 ; X86-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0] 3520 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 3521 ; 3522 ; X64-SSE-LABEL: test_mm_set_pd: 3523 ; X64-SSE: # %bb.0: 3524 ; X64-SSE-NEXT: movlhps %xmm0, %xmm1 # encoding: [0x0f,0x16,0xc8] 3525 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0] 3526 ; X64-SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] 3527 ; X64-SSE-NEXT: retq # encoding: [0xc3] 3528 ; 3529 ; X64-AVX1-LABEL: test_mm_set_pd: 3530 ; X64-AVX1: # %bb.0: 3531 ; X64-AVX1-NEXT: vmovlhps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0x16,0xc0] 3532 ; X64-AVX1-NEXT: # xmm0 = xmm1[0],xmm0[0] 3533 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 3534 ; 3535 ; X64-AVX512-LABEL: test_mm_set_pd: 3536 ; X64-AVX512: # %bb.0: 3537 ; X64-AVX512-NEXT: vmovlhps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x16,0xc0] 3538 ; X64-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0] 3539 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 3540 %res0 = insertelement <2 x double> undef, double %a1, i32 0 3541 %res1 = insertelement <2 x double> %res0, double %a0, i32 1 3542 ret <2 x double> %res1 3543 } 3544 3545 define <2 x double> @test_mm_set_pd1(double %a0) nounwind { 3546 ; X86-SSE-LABEL: test_mm_set_pd1: 3547 ; X86-SSE: # %bb.0: 3548 ; X86-SSE-NEXT: movsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x44,0x24,0x04] 3549 ; X86-SSE-NEXT: # xmm0 = mem[0],zero 3550 ; X86-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] 3551 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0] 3552 ; X86-SSE-NEXT: retl # encoding: [0xc3] 3553 ; 3554 ; X86-AVX1-LABEL: test_mm_set_pd1: 3555 ; X86-AVX1: # %bb.0: 3556 ; X86-AVX1-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x44,0x24,0x04] 3557 ; X86-AVX1-NEXT: # xmm0 = mem[0],zero 3558 ; X86-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] 3559 ; X86-AVX1-NEXT: # xmm0 = xmm0[0,0] 3560 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 3561 ; 3562 ; X86-AVX512-LABEL: test_mm_set_pd1: 3563 ; X86-AVX512: # %bb.0: 3564 ; X86-AVX512-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x44,0x24,0x04] 3565 ; X86-AVX512-NEXT: # xmm0 = mem[0],zero 3566 ; X86-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] 3567 ; X86-AVX512-NEXT: # xmm0 = xmm0[0,0] 3568 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 3569 ; 3570 ; X64-SSE-LABEL: test_mm_set_pd1: 3571 ; X64-SSE: # %bb.0: 3572 ; X64-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] 3573 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0] 3574 ; X64-SSE-NEXT: retq # encoding: [0xc3] 3575 ; 3576 ; X64-AVX1-LABEL: test_mm_set_pd1: 3577 ; X64-AVX1: # %bb.0: 3578 ; X64-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] 3579 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,0] 3580 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 3581 ; 3582 ; X64-AVX512-LABEL: test_mm_set_pd1: 3583 ; X64-AVX512: # %bb.0: 3584 ; X64-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] 3585 ; X64-AVX512-NEXT: # xmm0 = xmm0[0,0] 3586 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 3587 %res0 = insertelement <2 x double> undef, double %a0, i32 0 3588 %res1 = insertelement <2 x double> %res0, double %a0, i32 1 3589 ret <2 x double> %res1 3590 } 3591 3592 define <2 x double> @test_mm_set_sd(double %a0) nounwind { 3593 ; X86-SSE-LABEL: test_mm_set_sd: 3594 ; X86-SSE: # %bb.0: 3595 ; X86-SSE-NEXT: movq {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x7e,0x44,0x24,0x04] 3596 ; X86-SSE-NEXT: # xmm0 = mem[0],zero 3597 ; X86-SSE-NEXT: movq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x7e,0xc0] 3598 ; X86-SSE-NEXT: # xmm0 = xmm0[0],zero 3599 ; X86-SSE-NEXT: retl # encoding: [0xc3] 3600 ; 3601 ; X86-AVX1-LABEL: test_mm_set_sd: 3602 ; X86-AVX1: # %bb.0: 3603 ; X86-AVX1-NEXT: vmovq {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x7e,0x44,0x24,0x04] 3604 ; X86-AVX1-NEXT: # xmm0 = mem[0],zero 3605 ; X86-AVX1-NEXT: vmovq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x7e,0xc0] 3606 ; X86-AVX1-NEXT: # xmm0 = xmm0[0],zero 3607 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 3608 ; 3609 ; X86-AVX512-LABEL: test_mm_set_sd: 3610 ; X86-AVX512: # %bb.0: 3611 ; X86-AVX512-NEXT: vmovq {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x44,0x24,0x04] 3612 ; X86-AVX512-NEXT: # xmm0 = mem[0],zero 3613 ; X86-AVX512-NEXT: vmovq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc0] 3614 ; X86-AVX512-NEXT: # xmm0 = xmm0[0],zero 3615 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 3616 ; 3617 ; X64-SSE-LABEL: test_mm_set_sd: 3618 ; X64-SSE: # %bb.0: 3619 ; X64-SSE-NEXT: movq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x7e,0xc0] 3620 ; X64-SSE-NEXT: # xmm0 = xmm0[0],zero 3621 ; X64-SSE-NEXT: retq # encoding: [0xc3] 3622 ; 3623 ; X64-AVX1-LABEL: test_mm_set_sd: 3624 ; X64-AVX1: # %bb.0: 3625 ; X64-AVX1-NEXT: vmovq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x7e,0xc0] 3626 ; X64-AVX1-NEXT: # xmm0 = xmm0[0],zero 3627 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 3628 ; 3629 ; X64-AVX512-LABEL: test_mm_set_sd: 3630 ; X64-AVX512: # %bb.0: 3631 ; X64-AVX512-NEXT: vmovq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc0] 3632 ; X64-AVX512-NEXT: # xmm0 = xmm0[0],zero 3633 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 3634 %res0 = insertelement <2 x double> undef, double %a0, i32 0 3635 %res1 = insertelement <2 x double> %res0, double 0.0, i32 1 3636 ret <2 x double> %res1 3637 } 3638 3639 define <2 x i64> @test_mm_set1_epi8(i8 %a0) nounwind { 3640 ; X86-SSE-LABEL: test_mm_set1_epi8: 3641 ; X86-SSE: # %bb.0: 3642 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3643 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3644 ; X86-SSE-NEXT: punpcklbw %xmm0, %xmm0 # encoding: [0x66,0x0f,0x60,0xc0] 3645 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3646 ; X86-SSE-NEXT: pshuflw $224, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0xe0] 3647 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0,2,3,4,5,6,7] 3648 ; X86-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00] 3649 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 3650 ; X86-SSE-NEXT: retl # encoding: [0xc3] 3651 ; 3652 ; X86-AVX1-LABEL: test_mm_set1_epi8: 3653 ; X86-AVX1: # %bb.0: 3654 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3655 ; X86-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] 3656 ; X86-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9] 3657 ; X86-AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x00,0xc1] 3658 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 3659 ; 3660 ; X86-AVX512-LABEL: test_mm_set1_epi8: 3661 ; X86-AVX512: # %bb.0: 3662 ; X86-AVX512-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04] 3663 ; X86-AVX512-NEXT: vpbroadcastb %eax, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xc0] 3664 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 3665 ; 3666 ; X64-SSE-LABEL: test_mm_set1_epi8: 3667 ; X64-SSE: # %bb.0: 3668 ; X64-SSE-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] 3669 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3670 ; X64-SSE-NEXT: punpcklbw %xmm0, %xmm0 # encoding: [0x66,0x0f,0x60,0xc0] 3671 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3672 ; X64-SSE-NEXT: pshuflw $224, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0xe0] 3673 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0,2,3,4,5,6,7] 3674 ; X64-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00] 3675 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 3676 ; X64-SSE-NEXT: retq # encoding: [0xc3] 3677 ; 3678 ; X64-AVX1-LABEL: test_mm_set1_epi8: 3679 ; X64-AVX1: # %bb.0: 3680 ; X64-AVX1-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] 3681 ; X64-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] 3682 ; X64-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9] 3683 ; X64-AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x00,0xc1] 3684 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 3685 ; 3686 ; X64-AVX512-LABEL: test_mm_set1_epi8: 3687 ; X64-AVX512: # %bb.0: 3688 ; X64-AVX512-NEXT: vpbroadcastb %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xc7] 3689 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 3690 %res0 = insertelement <16 x i8> undef, i8 %a0, i32 0 3691 %res1 = insertelement <16 x i8> %res0, i8 %a0, i32 1 3692 %res2 = insertelement <16 x i8> %res1, i8 %a0, i32 2 3693 %res3 = insertelement <16 x i8> %res2, i8 %a0, i32 3 3694 %res4 = insertelement <16 x i8> %res3, i8 %a0, i32 4 3695 %res5 = insertelement <16 x i8> %res4, i8 %a0, i32 5 3696 %res6 = insertelement <16 x i8> %res5, i8 %a0, i32 6 3697 %res7 = insertelement <16 x i8> %res6, i8 %a0, i32 7 3698 %res8 = insertelement <16 x i8> %res7, i8 %a0, i32 8 3699 %res9 = insertelement <16 x i8> %res8, i8 %a0, i32 9 3700 %res10 = insertelement <16 x i8> %res9, i8 %a0, i32 10 3701 %res11 = insertelement <16 x i8> %res10, i8 %a0, i32 11 3702 %res12 = insertelement <16 x i8> %res11, i8 %a0, i32 12 3703 %res13 = insertelement <16 x i8> %res12, i8 %a0, i32 13 3704 %res14 = insertelement <16 x i8> %res13, i8 %a0, i32 14 3705 %res15 = insertelement <16 x i8> %res14, i8 %a0, i32 15 3706 %res = bitcast <16 x i8> %res15 to <2 x i64> 3707 ret <2 x i64> %res 3708 } 3709 3710 define <2 x i64> @test_mm_set1_epi16(i16 %a0) nounwind { 3711 ; X86-SSE-LABEL: test_mm_set1_epi16: 3712 ; X86-SSE: # %bb.0: 3713 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 3714 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3715 ; X86-SSE-NEXT: pshuflw $224, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0xe0] 3716 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0,2,3,4,5,6,7] 3717 ; X86-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00] 3718 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 3719 ; X86-SSE-NEXT: retl # encoding: [0xc3] 3720 ; 3721 ; X86-AVX1-LABEL: test_mm_set1_epi16: 3722 ; X86-AVX1: # %bb.0: 3723 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 3724 ; X86-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] 3725 ; X86-AVX1-NEXT: vpshuflw $224, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x70,0xc0,0xe0] 3726 ; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,2,3,4,5,6,7] 3727 ; X86-AVX1-NEXT: vpshufd $0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x00] 3728 ; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 3729 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 3730 ; 3731 ; X86-AVX512-LABEL: test_mm_set1_epi16: 3732 ; X86-AVX512: # %bb.0: 3733 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 3734 ; X86-AVX512-NEXT: vpbroadcastw %eax, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xc0] 3735 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 3736 ; 3737 ; X64-SSE-LABEL: test_mm_set1_epi16: 3738 ; X64-SSE: # %bb.0: 3739 ; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] 3740 ; X64-SSE-NEXT: pshuflw $224, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0xe0] 3741 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0,2,3,4,5,6,7] 3742 ; X64-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00] 3743 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 3744 ; X64-SSE-NEXT: retq # encoding: [0xc3] 3745 ; 3746 ; X64-AVX1-LABEL: test_mm_set1_epi16: 3747 ; X64-AVX1: # %bb.0: 3748 ; X64-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] 3749 ; X64-AVX1-NEXT: vpshuflw $224, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x70,0xc0,0xe0] 3750 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,2,3,4,5,6,7] 3751 ; X64-AVX1-NEXT: vpshufd $0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x00] 3752 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 3753 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 3754 ; 3755 ; X64-AVX512-LABEL: test_mm_set1_epi16: 3756 ; X64-AVX512: # %bb.0: 3757 ; X64-AVX512-NEXT: vpbroadcastw %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xc7] 3758 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 3759 %res0 = insertelement <8 x i16> undef, i16 %a0, i32 0 3760 %res1 = insertelement <8 x i16> %res0, i16 %a0, i32 1 3761 %res2 = insertelement <8 x i16> %res1, i16 %a0, i32 2 3762 %res3 = insertelement <8 x i16> %res2, i16 %a0, i32 3 3763 %res4 = insertelement <8 x i16> %res3, i16 %a0, i32 4 3764 %res5 = insertelement <8 x i16> %res4, i16 %a0, i32 5 3765 %res6 = insertelement <8 x i16> %res5, i16 %a0, i32 6 3766 %res7 = insertelement <8 x i16> %res6, i16 %a0, i32 7 3767 %res = bitcast <8 x i16> %res7 to <2 x i64> 3768 ret <2 x i64> %res 3769 } 3770 3771 define <2 x i64> @test_mm_set1_epi32(i32 %a0) nounwind { 3772 ; X86-SSE-LABEL: test_mm_set1_epi32: 3773 ; X86-SSE: # %bb.0: 3774 ; X86-SSE-NEXT: movd {{[0-9]+}}(%esp), %xmm0 # encoding: [0x66,0x0f,0x6e,0x44,0x24,0x04] 3775 ; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 3776 ; X86-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00] 3777 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 3778 ; X86-SSE-NEXT: retl # encoding: [0xc3] 3779 ; 3780 ; X86-AVX1-LABEL: test_mm_set1_epi32: 3781 ; X86-AVX1: # %bb.0: 3782 ; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 3783 ; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 3784 ; X86-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 3785 ; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 3786 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 3787 ; 3788 ; X86-AVX512-LABEL: test_mm_set1_epi32: 3789 ; X86-AVX512: # %bb.0: 3790 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3791 ; X86-AVX512-NEXT: vpbroadcastd %eax, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xc0] 3792 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 3793 ; 3794 ; X64-SSE-LABEL: test_mm_set1_epi32: 3795 ; X64-SSE: # %bb.0: 3796 ; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] 3797 ; X64-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00] 3798 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 3799 ; X64-SSE-NEXT: retq # encoding: [0xc3] 3800 ; 3801 ; X64-AVX1-LABEL: test_mm_set1_epi32: 3802 ; X64-AVX1: # %bb.0: 3803 ; X64-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] 3804 ; X64-AVX1-NEXT: vpshufd $0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x00] 3805 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 3806 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 3807 ; 3808 ; X64-AVX512-LABEL: test_mm_set1_epi32: 3809 ; X64-AVX512: # %bb.0: 3810 ; X64-AVX512-NEXT: vpbroadcastd %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xc7] 3811 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 3812 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0 3813 %res1 = insertelement <4 x i32> %res0, i32 %a0, i32 1 3814 %res2 = insertelement <4 x i32> %res1, i32 %a0, i32 2 3815 %res3 = insertelement <4 x i32> %res2, i32 %a0, i32 3 3816 %res = bitcast <4 x i32> %res3 to <2 x i64> 3817 ret <2 x i64> %res 3818 } 3819 3820 ; TODO test_mm_set1_epi64 3821 3822 define <2 x i64> @test_mm_set1_epi64x(i64 %a0) nounwind { 3823 ; X86-SSE-LABEL: test_mm_set1_epi64x: 3824 ; X86-SSE: # %bb.0: 3825 ; X86-SSE-NEXT: movd {{[0-9]+}}(%esp), %xmm0 # encoding: [0x66,0x0f,0x6e,0x44,0x24,0x04] 3826 ; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 3827 ; X86-SSE-NEXT: movd {{[0-9]+}}(%esp), %xmm1 # encoding: [0x66,0x0f,0x6e,0x4c,0x24,0x08] 3828 ; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 3829 ; X86-SSE-NEXT: punpckldq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x62,0xc1] 3830 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3831 ; X86-SSE-NEXT: pshufd $68, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x44] 3832 ; X86-SSE-NEXT: # xmm0 = xmm0[0,1,0,1] 3833 ; X86-SSE-NEXT: retl # encoding: [0xc3] 3834 ; 3835 ; X86-AVX1-LABEL: test_mm_set1_epi64x: 3836 ; X86-AVX1: # %bb.0: 3837 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 3838 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 3839 ; X86-AVX1-NEXT: vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1] 3840 ; X86-AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01] 3841 ; X86-AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x02] 3842 ; X86-AVX1-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x03] 3843 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 3844 ; 3845 ; X86-AVX512-LABEL: test_mm_set1_epi64x: 3846 ; X86-AVX512: # %bb.0: 3847 ; X86-AVX512-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04] 3848 ; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 3849 ; X86-AVX512-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01] 3850 ; X86-AVX512-NEXT: vpbroadcastq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0xc0] 3851 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 3852 ; 3853 ; X64-SSE-LABEL: test_mm_set1_epi64x: 3854 ; X64-SSE: # %bb.0: 3855 ; X64-SSE-NEXT: movq %rdi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc7] 3856 ; X64-SSE-NEXT: pshufd $68, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x44] 3857 ; X64-SSE-NEXT: # xmm0 = xmm0[0,1,0,1] 3858 ; X64-SSE-NEXT: retq # encoding: [0xc3] 3859 ; 3860 ; X64-AVX1-LABEL: test_mm_set1_epi64x: 3861 ; X64-AVX1: # %bb.0: 3862 ; X64-AVX1-NEXT: vmovq %rdi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc7] 3863 ; X64-AVX1-NEXT: vpshufd $68, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x44] 3864 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,1,0,1] 3865 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 3866 ; 3867 ; X64-AVX512-LABEL: test_mm_set1_epi64x: 3868 ; X64-AVX512: # %bb.0: 3869 ; X64-AVX512-NEXT: vpbroadcastq %rdi, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x7c,0xc7] 3870 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 3871 %res0 = insertelement <2 x i64> undef, i64 %a0, i32 0 3872 %res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1 3873 ret <2 x i64> %res1 3874 } 3875 3876 define <2 x double> @test_mm_set1_pd(double %a0) nounwind { 3877 ; X86-SSE-LABEL: test_mm_set1_pd: 3878 ; X86-SSE: # %bb.0: 3879 ; X86-SSE-NEXT: movsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x44,0x24,0x04] 3880 ; X86-SSE-NEXT: # xmm0 = mem[0],zero 3881 ; X86-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] 3882 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0] 3883 ; X86-SSE-NEXT: retl # encoding: [0xc3] 3884 ; 3885 ; X86-AVX1-LABEL: test_mm_set1_pd: 3886 ; X86-AVX1: # %bb.0: 3887 ; X86-AVX1-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x44,0x24,0x04] 3888 ; X86-AVX1-NEXT: # xmm0 = mem[0],zero 3889 ; X86-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] 3890 ; X86-AVX1-NEXT: # xmm0 = xmm0[0,0] 3891 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 3892 ; 3893 ; X86-AVX512-LABEL: test_mm_set1_pd: 3894 ; X86-AVX512: # %bb.0: 3895 ; X86-AVX512-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x44,0x24,0x04] 3896 ; X86-AVX512-NEXT: # xmm0 = mem[0],zero 3897 ; X86-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] 3898 ; X86-AVX512-NEXT: # xmm0 = xmm0[0,0] 3899 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 3900 ; 3901 ; X64-SSE-LABEL: test_mm_set1_pd: 3902 ; X64-SSE: # %bb.0: 3903 ; X64-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] 3904 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0] 3905 ; X64-SSE-NEXT: retq # encoding: [0xc3] 3906 ; 3907 ; X64-AVX1-LABEL: test_mm_set1_pd: 3908 ; X64-AVX1: # %bb.0: 3909 ; X64-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] 3910 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,0] 3911 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 3912 ; 3913 ; X64-AVX512-LABEL: test_mm_set1_pd: 3914 ; X64-AVX512: # %bb.0: 3915 ; X64-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] 3916 ; X64-AVX512-NEXT: # xmm0 = xmm0[0,0] 3917 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 3918 %res0 = insertelement <2 x double> undef, double %a0, i32 0 3919 %res1 = insertelement <2 x double> %res0, double %a0, i32 1 3920 ret <2 x double> %res1 3921 } 3922 3923 define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind { 3924 ; X86-SSE-LABEL: test_mm_setr_epi8: 3925 ; X86-SSE: # %bb.0: 3926 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] 3927 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3928 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c] 3929 ; X86-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] 3930 ; X86-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] 3931 ; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 3932 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 3933 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3934 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34] 3935 ; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 3936 ; X86-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 3937 ; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 3938 ; X86-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1] 3939 ; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 3940 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 3941 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3942 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c] 3943 ; X86-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] 3944 ; X86-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] 3945 ; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 3946 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 3947 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3948 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24] 3949 ; X86-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] 3950 ; X86-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] 3951 ; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 3952 ; X86-SSE-NEXT: punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb] 3953 ; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 3954 ; X86-SSE-NEXT: punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca] 3955 ; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3956 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 3957 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3958 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c] 3959 ; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 3960 ; X86-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 3961 ; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 3962 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 3963 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3964 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14] 3965 ; X86-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] 3966 ; X86-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] 3967 ; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 3968 ; X86-SSE-NEXT: punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda] 3969 ; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 3970 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 3971 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3972 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c] 3973 ; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 3974 ; X86-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 3975 ; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 3976 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 3977 ; X86-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0] 3978 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3979 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3980 ; X86-SSE-NEXT: punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4] 3981 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 3982 ; X86-SSE-NEXT: punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2] 3983 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 3984 ; X86-SSE-NEXT: punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3] 3985 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 3986 ; X86-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] 3987 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 3988 ; X86-SSE-NEXT: retl # encoding: [0xc3] 3989 ; 3990 ; X86-AVX1-LABEL: test_mm_setr_epi8: 3991 ; X86-AVX1: # %bb.0: 3992 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 3993 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x04] 3994 ; X86-AVX1-NEXT: vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1] 3995 ; X86-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 3996 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c] 3997 ; X86-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 3998 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 3999 ; X86-AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] 4000 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14] 4001 ; X86-AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 4002 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 4003 ; X86-AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 4004 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c] 4005 ; X86-AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 4006 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 4007 ; X86-AVX1-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 4008 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24] 4009 ; X86-AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 4010 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 4011 ; X86-AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] 4012 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c] 4013 ; X86-AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 4014 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 4015 ; X86-AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] 4016 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34] 4017 ; X86-AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 4018 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 4019 ; X86-AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] 4020 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c] 4021 ; X86-AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 4022 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] 4023 ; X86-AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] 4024 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 4025 ; 4026 ; X86-AVX512-LABEL: test_mm_setr_epi8: 4027 ; X86-AVX512: # %bb.0: 4028 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 4029 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x04] 4030 ; X86-AVX512-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 4031 ; X86-AVX512-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 4032 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c] 4033 ; X86-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 4034 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 4035 ; X86-AVX512-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] 4036 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14] 4037 ; X86-AVX512-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 4038 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 4039 ; X86-AVX512-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 4040 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c] 4041 ; X86-AVX512-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 4042 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 4043 ; X86-AVX512-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 4044 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24] 4045 ; X86-AVX512-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 4046 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 4047 ; X86-AVX512-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] 4048 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c] 4049 ; X86-AVX512-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 4050 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 4051 ; X86-AVX512-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] 4052 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34] 4053 ; X86-AVX512-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 4054 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 4055 ; X86-AVX512-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] 4056 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c] 4057 ; X86-AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 4058 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] 4059 ; X86-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] 4060 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 4061 ; 4062 ; X64-SSE-LABEL: test_mm_setr_epi8: 4063 ; X64-SSE: # %bb.0: 4064 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50] 4065 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4066 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48] 4067 ; X64-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] 4068 ; X64-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] 4069 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 4070 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] 4071 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4072 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 4073 ; X64-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 4074 ; X64-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 4075 ; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 4076 ; X64-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1] 4077 ; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 4078 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 4079 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4080 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 4081 ; X64-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] 4082 ; X64-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] 4083 ; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 4084 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 4085 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4086 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 4087 ; X64-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] 4088 ; X64-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] 4089 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 4090 ; X64-SSE-NEXT: punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb] 4091 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 4092 ; X64-SSE-NEXT: punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca] 4093 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 4094 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 4095 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4096 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 4097 ; X64-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 4098 ; X64-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 4099 ; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 4100 ; X64-SSE-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] 4101 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4102 ; X64-SSE-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] 4103 ; X64-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] 4104 ; X64-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] 4105 ; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 4106 ; X64-SSE-NEXT: punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda] 4107 ; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 4108 ; X64-SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 4109 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4110 ; X64-SSE-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] 4111 ; X64-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 4112 ; X64-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 4113 ; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 4114 ; X64-SSE-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] 4115 ; X64-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0] 4116 ; X64-SSE-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] 4117 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4118 ; X64-SSE-NEXT: punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4] 4119 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 4120 ; X64-SSE-NEXT: punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2] 4121 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 4122 ; X64-SSE-NEXT: punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3] 4123 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 4124 ; X64-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] 4125 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 4126 ; X64-SSE-NEXT: retq # encoding: [0xc3] 4127 ; 4128 ; X64-AVX1-LABEL: test_mm_setr_epi8: 4129 ; X64-AVX1: # %bb.0: 4130 ; X64-AVX1-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] 4131 ; X64-AVX1-NEXT: movzbl %dil, %esi # encoding: [0x40,0x0f,0xb6,0xf7] 4132 ; X64-AVX1-NEXT: vmovd %esi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc6] 4133 ; X64-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 4134 ; X64-AVX1-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] 4135 ; X64-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 4136 ; X64-AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 4137 ; X64-AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] 4138 ; X64-AVX1-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] 4139 ; X64-AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 4140 ; X64-AVX1-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] 4141 ; X64-AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 4142 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 4143 ; X64-AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 4144 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 4145 ; X64-AVX1-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 4146 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 4147 ; X64-AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 4148 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 4149 ; X64-AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] 4150 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 4151 ; X64-AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 4152 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 4153 ; X64-AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] 4154 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 4155 ; X64-AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 4156 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] 4157 ; X64-AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] 4158 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48] 4159 ; X64-AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 4160 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50] 4161 ; X64-AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] 4162 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 4163 ; 4164 ; X64-AVX512-LABEL: test_mm_setr_epi8: 4165 ; X64-AVX512: # %bb.0: 4166 ; X64-AVX512-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] 4167 ; X64-AVX512-NEXT: movzbl %dil, %esi # encoding: [0x40,0x0f,0xb6,0xf7] 4168 ; X64-AVX512-NEXT: vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] 4169 ; X64-AVX512-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 4170 ; X64-AVX512-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] 4171 ; X64-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 4172 ; X64-AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 4173 ; X64-AVX512-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] 4174 ; X64-AVX512-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] 4175 ; X64-AVX512-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 4176 ; X64-AVX512-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] 4177 ; X64-AVX512-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 4178 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 4179 ; X64-AVX512-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 4180 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 4181 ; X64-AVX512-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 4182 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 4183 ; X64-AVX512-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 4184 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 4185 ; X64-AVX512-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] 4186 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 4187 ; X64-AVX512-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 4188 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 4189 ; X64-AVX512-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] 4190 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 4191 ; X64-AVX512-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 4192 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] 4193 ; X64-AVX512-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] 4194 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48] 4195 ; X64-AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 4196 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50] 4197 ; X64-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] 4198 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 4199 %res0 = insertelement <16 x i8> undef, i8 %a0 , i32 0 4200 %res1 = insertelement <16 x i8> %res0, i8 %a1 , i32 1 4201 %res2 = insertelement <16 x i8> %res1, i8 %a2 , i32 2 4202 %res3 = insertelement <16 x i8> %res2, i8 %a3 , i32 3 4203 %res4 = insertelement <16 x i8> %res3, i8 %a4 , i32 4 4204 %res5 = insertelement <16 x i8> %res4, i8 %a5 , i32 5 4205 %res6 = insertelement <16 x i8> %res5, i8 %a6 , i32 6 4206 %res7 = insertelement <16 x i8> %res6, i8 %a7 , i32 7 4207 %res8 = insertelement <16 x i8> %res7, i8 %a8 , i32 8 4208 %res9 = insertelement <16 x i8> %res8, i8 %a9 , i32 9 4209 %res10 = insertelement <16 x i8> %res9, i8 %a10, i32 10 4210 %res11 = insertelement <16 x i8> %res10, i8 %a11, i32 11 4211 %res12 = insertelement <16 x i8> %res11, i8 %a12, i32 12 4212 %res13 = insertelement <16 x i8> %res12, i8 %a13, i32 13 4213 %res14 = insertelement <16 x i8> %res13, i8 %a14, i32 14 4214 %res15 = insertelement <16 x i8> %res14, i8 %a15, i32 15 4215 %res = bitcast <16 x i8> %res15 to <2 x i64> 4216 ret <2 x i64> %res 4217 } 4218 4219 define <2 x i64> @test_mm_setr_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind { 4220 ; X86-SSE-LABEL: test_mm_setr_epi16: 4221 ; X86-SSE: # %bb.0: 4222 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20] 4223 ; X86-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] 4224 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c] 4225 ; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 4226 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18] 4227 ; X86-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] 4228 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14] 4229 ; X86-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0] 4230 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] 4231 ; X86-SSE-NEXT: movd %eax, %xmm5 # encoding: [0x66,0x0f,0x6e,0xe8] 4232 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c] 4233 ; X86-SSE-NEXT: movd %eax, %xmm6 # encoding: [0x66,0x0f,0x6e,0xf0] 4234 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] 4235 ; X86-SSE-NEXT: movd %eax, %xmm7 # encoding: [0x66,0x0f,0x6e,0xf8] 4236 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 4237 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4238 ; X86-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1] 4239 ; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 4240 ; X86-SSE-NEXT: punpcklwd %xmm3, %xmm4 # encoding: [0x66,0x0f,0x61,0xe3] 4241 ; X86-SSE-NEXT: # xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] 4242 ; X86-SSE-NEXT: punpckldq %xmm2, %xmm4 # encoding: [0x66,0x0f,0x62,0xe2] 4243 ; X86-SSE-NEXT: # xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] 4244 ; X86-SSE-NEXT: punpcklwd %xmm5, %xmm6 # encoding: [0x66,0x0f,0x61,0xf5] 4245 ; X86-SSE-NEXT: # xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3] 4246 ; X86-SSE-NEXT: punpcklwd %xmm7, %xmm0 # encoding: [0x66,0x0f,0x61,0xc7] 4247 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3] 4248 ; X86-SSE-NEXT: punpckldq %xmm6, %xmm0 # encoding: [0x66,0x0f,0x62,0xc6] 4249 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1] 4250 ; X86-SSE-NEXT: punpcklqdq %xmm4, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc4] 4251 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0] 4252 ; X86-SSE-NEXT: retl # encoding: [0xc3] 4253 ; 4254 ; X86-AVX1-LABEL: test_mm_setr_epi16: 4255 ; X86-AVX1: # %bb.0: 4256 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 4257 ; X86-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] 4258 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] 4259 ; X86-AVX1-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 4260 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c] 4261 ; X86-AVX1-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 4262 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] 4263 ; X86-AVX1-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x03] 4264 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14] 4265 ; X86-AVX1-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 4266 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18] 4267 ; X86-AVX1-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 4268 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c] 4269 ; X86-AVX1-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 4270 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20] 4271 ; X86-AVX1-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 4272 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 4273 ; 4274 ; X86-AVX512-LABEL: test_mm_setr_epi16: 4275 ; X86-AVX512: # %bb.0: 4276 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 4277 ; X86-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] 4278 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] 4279 ; X86-AVX512-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 4280 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c] 4281 ; X86-AVX512-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 4282 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] 4283 ; X86-AVX512-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03] 4284 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14] 4285 ; X86-AVX512-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 4286 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18] 4287 ; X86-AVX512-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 4288 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c] 4289 ; X86-AVX512-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 4290 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20] 4291 ; X86-AVX512-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 4292 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 4293 ; 4294 ; X64-SSE-LABEL: test_mm_setr_epi16: 4295 ; X64-SSE: # %bb.0: 4296 ; X64-SSE-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] 4297 ; X64-SSE-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x08] 4298 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4299 ; X64-SSE-NEXT: movd %r10d, %xmm1 # encoding: [0x66,0x41,0x0f,0x6e,0xca] 4300 ; X64-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8] 4301 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 4302 ; X64-SSE-NEXT: movd %r9d, %xmm0 # encoding: [0x66,0x41,0x0f,0x6e,0xc1] 4303 ; X64-SSE-NEXT: movd %r8d, %xmm2 # encoding: [0x66,0x41,0x0f,0x6e,0xd0] 4304 ; X64-SSE-NEXT: punpcklwd %xmm0, %xmm2 # encoding: [0x66,0x0f,0x61,0xd0] 4305 ; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 4306 ; X64-SSE-NEXT: punpckldq %xmm1, %xmm2 # encoding: [0x66,0x0f,0x62,0xd1] 4307 ; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 4308 ; X64-SSE-NEXT: movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1] 4309 ; X64-SSE-NEXT: movd %edx, %xmm1 # encoding: [0x66,0x0f,0x6e,0xca] 4310 ; X64-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8] 4311 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 4312 ; X64-SSE-NEXT: movd %esi, %xmm3 # encoding: [0x66,0x0f,0x6e,0xde] 4313 ; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] 4314 ; X64-SSE-NEXT: punpcklwd %xmm3, %xmm0 # encoding: [0x66,0x0f,0x61,0xc3] 4315 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] 4316 ; X64-SSE-NEXT: punpckldq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x62,0xc1] 4317 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 4318 ; X64-SSE-NEXT: punpcklqdq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc2] 4319 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0] 4320 ; X64-SSE-NEXT: retq # encoding: [0xc3] 4321 ; 4322 ; X64-AVX1-LABEL: test_mm_setr_epi16: 4323 ; X64-AVX1: # %bb.0: 4324 ; X64-AVX1-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x10] 4325 ; X64-AVX1-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] 4326 ; X64-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] 4327 ; X64-AVX1-NEXT: vpinsrw $1, %esi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc6,0x01] 4328 ; X64-AVX1-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc2,0x02] 4329 ; X64-AVX1-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x03] 4330 ; X64-AVX1-NEXT: vpinsrw $4, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x04] 4331 ; X64-AVX1-NEXT: vpinsrw $5, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x05] 4332 ; X64-AVX1-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 4333 ; X64-AVX1-NEXT: vpinsrw $7, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x07] 4334 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 4335 ; 4336 ; X64-AVX512-LABEL: test_mm_setr_epi16: 4337 ; X64-AVX512: # %bb.0: 4338 ; X64-AVX512-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x10] 4339 ; X64-AVX512-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] 4340 ; X64-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] 4341 ; X64-AVX512-NEXT: vpinsrw $1, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc6,0x01] 4342 ; X64-AVX512-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc2,0x02] 4343 ; X64-AVX512-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x03] 4344 ; X64-AVX512-NEXT: vpinsrw $4, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x04] 4345 ; X64-AVX512-NEXT: vpinsrw $5, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x05] 4346 ; X64-AVX512-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 4347 ; X64-AVX512-NEXT: vpinsrw $7, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x07] 4348 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 4349 %res0 = insertelement <8 x i16> undef, i16 %a0, i32 0 4350 %res1 = insertelement <8 x i16> %res0, i16 %a1, i32 1 4351 %res2 = insertelement <8 x i16> %res1, i16 %a2, i32 2 4352 %res3 = insertelement <8 x i16> %res2, i16 %a3, i32 3 4353 %res4 = insertelement <8 x i16> %res3, i16 %a4, i32 4 4354 %res5 = insertelement <8 x i16> %res4, i16 %a5, i32 5 4355 %res6 = insertelement <8 x i16> %res5, i16 %a6, i32 6 4356 %res7 = insertelement <8 x i16> %res6, i16 %a7, i32 7 4357 %res = bitcast <8 x i16> %res7 to <2 x i64> 4358 ret <2 x i64> %res 4359 } 4360 4361 define <2 x i64> @test_mm_setr_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind { 4362 ; X86-SSE-LABEL: test_mm_setr_epi32: 4363 ; X86-SSE: # %bb.0: 4364 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10] 4365 ; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 4366 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x0c] 4367 ; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 4368 ; X86-SSE-NEXT: unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8] 4369 ; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 4370 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x08] 4371 ; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero 4372 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04] 4373 ; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 4374 ; X86-SSE-NEXT: unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2] 4375 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 4376 ; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 4377 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 4378 ; X86-SSE-NEXT: retl # encoding: [0xc3] 4379 ; 4380 ; X86-AVX1-LABEL: test_mm_setr_epi32: 4381 ; X86-AVX1: # %bb.0: 4382 ; X86-AVX1-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04] 4383 ; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 4384 ; X86-AVX1-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01] 4385 ; X86-AVX1-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x02] 4386 ; X86-AVX1-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x03] 4387 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 4388 ; 4389 ; X86-AVX512-LABEL: test_mm_setr_epi32: 4390 ; X86-AVX512: # %bb.0: 4391 ; X86-AVX512-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04] 4392 ; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 4393 ; X86-AVX512-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01] 4394 ; X86-AVX512-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x02] 4395 ; X86-AVX512-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x03] 4396 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 4397 ; 4398 ; X64-SSE-LABEL: test_mm_setr_epi32: 4399 ; X64-SSE: # %bb.0: 4400 ; X64-SSE-NEXT: movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1] 4401 ; X64-SSE-NEXT: movd %edx, %xmm1 # encoding: [0x66,0x0f,0x6e,0xca] 4402 ; X64-SSE-NEXT: punpckldq %xmm0, %xmm1 # encoding: [0x66,0x0f,0x62,0xc8] 4403 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 4404 ; X64-SSE-NEXT: movd %esi, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd6] 4405 ; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] 4406 ; X64-SSE-NEXT: punpckldq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x62,0xc2] 4407 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 4408 ; X64-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] 4409 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 4410 ; X64-SSE-NEXT: retq # encoding: [0xc3] 4411 ; 4412 ; X64-AVX1-LABEL: test_mm_setr_epi32: 4413 ; X64-AVX1: # %bb.0: 4414 ; X64-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] 4415 ; X64-AVX1-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x01] 4416 ; X64-AVX1-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02] 4417 ; X64-AVX1-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x03] 4418 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 4419 ; 4420 ; X64-AVX512-LABEL: test_mm_setr_epi32: 4421 ; X64-AVX512: # %bb.0: 4422 ; X64-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] 4423 ; X64-AVX512-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x01] 4424 ; X64-AVX512-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02] 4425 ; X64-AVX512-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x03] 4426 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 4427 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0 4428 %res1 = insertelement <4 x i32> %res0, i32 %a1, i32 1 4429 %res2 = insertelement <4 x i32> %res1, i32 %a2, i32 2 4430 %res3 = insertelement <4 x i32> %res2, i32 %a3, i32 3 4431 %res = bitcast <4 x i32> %res3 to <2 x i64> 4432 ret <2 x i64> %res 4433 } 4434 4435 ; TODO test_mm_setr_epi64 4436 4437 define <2 x i64> @test_mm_setr_epi64x(i64 %a0, i64 %a1) nounwind { 4438 ; X86-SSE-LABEL: test_mm_setr_epi64x: 4439 ; X86-SSE: # %bb.0: 4440 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x0c] 4441 ; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 4442 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10] 4443 ; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 4444 ; X86-SSE-NEXT: unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8] 4445 ; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 4446 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04] 4447 ; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 4448 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x08] 4449 ; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero 4450 ; X86-SSE-NEXT: unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2] 4451 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 4452 ; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 4453 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 4454 ; X86-SSE-NEXT: retl # encoding: [0xc3] 4455 ; 4456 ; X86-AVX1-LABEL: test_mm_setr_epi64x: 4457 ; X86-AVX1: # %bb.0: 4458 ; X86-AVX1-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04] 4459 ; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 4460 ; X86-AVX1-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01] 4461 ; X86-AVX1-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x02] 4462 ; X86-AVX1-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x03] 4463 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 4464 ; 4465 ; X86-AVX512-LABEL: test_mm_setr_epi64x: 4466 ; X86-AVX512: # %bb.0: 4467 ; X86-AVX512-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04] 4468 ; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 4469 ; X86-AVX512-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01] 4470 ; X86-AVX512-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x02] 4471 ; X86-AVX512-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x03] 4472 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 4473 ; 4474 ; X64-SSE-LABEL: test_mm_setr_epi64x: 4475 ; X64-SSE: # %bb.0: 4476 ; X64-SSE-NEXT: movq %rsi, %xmm1 # encoding: [0x66,0x48,0x0f,0x6e,0xce] 4477 ; X64-SSE-NEXT: movq %rdi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc7] 4478 ; X64-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] 4479 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 4480 ; X64-SSE-NEXT: retq # encoding: [0xc3] 4481 ; 4482 ; X64-AVX1-LABEL: test_mm_setr_epi64x: 4483 ; X64-AVX1: # %bb.0: 4484 ; X64-AVX1-NEXT: vmovq %rsi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc6] 4485 ; X64-AVX1-NEXT: vmovq %rdi, %xmm1 # encoding: [0xc4,0xe1,0xf9,0x6e,0xcf] 4486 ; X64-AVX1-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x6c,0xc0] 4487 ; X64-AVX1-NEXT: # xmm0 = xmm1[0],xmm0[0] 4488 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 4489 ; 4490 ; X64-AVX512-LABEL: test_mm_setr_epi64x: 4491 ; X64-AVX512: # %bb.0: 4492 ; X64-AVX512-NEXT: vmovq %rsi, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc6] 4493 ; X64-AVX512-NEXT: vmovq %rdi, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xcf] 4494 ; X64-AVX512-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xc0] 4495 ; X64-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0] 4496 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 4497 %res0 = insertelement <2 x i64> undef, i64 %a0, i32 0 4498 %res1 = insertelement <2 x i64> %res0, i64 %a1, i32 1 4499 ret <2 x i64> %res1 4500 } 4501 4502 define <2 x double> @test_mm_setr_pd(double %a0, double %a1) nounwind { 4503 ; X86-SSE-LABEL: test_mm_setr_pd: 4504 ; X86-SSE: # %bb.0: 4505 ; X86-SSE-NEXT: movsd {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf2,0x0f,0x10,0x4c,0x24,0x0c] 4506 ; X86-SSE-NEXT: # xmm1 = mem[0],zero 4507 ; X86-SSE-NEXT: movsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x44,0x24,0x04] 4508 ; X86-SSE-NEXT: # xmm0 = mem[0],zero 4509 ; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 4510 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 4511 ; X86-SSE-NEXT: retl # encoding: [0xc3] 4512 ; 4513 ; X86-AVX1-LABEL: test_mm_setr_pd: 4514 ; X86-AVX1: # %bb.0: 4515 ; X86-AVX1-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x44,0x24,0x0c] 4516 ; X86-AVX1-NEXT: # xmm0 = mem[0],zero 4517 ; X86-AVX1-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x04] 4518 ; X86-AVX1-NEXT: # xmm1 = mem[0],zero 4519 ; X86-AVX1-NEXT: vmovlhps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0x16,0xc0] 4520 ; X86-AVX1-NEXT: # xmm0 = xmm1[0],xmm0[0] 4521 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 4522 ; 4523 ; X86-AVX512-LABEL: test_mm_setr_pd: 4524 ; X86-AVX512: # %bb.0: 4525 ; X86-AVX512-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x44,0x24,0x0c] 4526 ; X86-AVX512-NEXT: # xmm0 = mem[0],zero 4527 ; X86-AVX512-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x04] 4528 ; X86-AVX512-NEXT: # xmm1 = mem[0],zero 4529 ; X86-AVX512-NEXT: vmovlhps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x16,0xc0] 4530 ; X86-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0] 4531 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 4532 ; 4533 ; X64-SSE-LABEL: test_mm_setr_pd: 4534 ; X64-SSE: # %bb.0: 4535 ; X64-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 4536 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 4537 ; X64-SSE-NEXT: retq # encoding: [0xc3] 4538 ; 4539 ; X64-AVX1-LABEL: test_mm_setr_pd: 4540 ; X64-AVX1: # %bb.0: 4541 ; X64-AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1] 4542 ; X64-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0] 4543 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 4544 ; 4545 ; X64-AVX512-LABEL: test_mm_setr_pd: 4546 ; X64-AVX512: # %bb.0: 4547 ; X64-AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1] 4548 ; X64-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0] 4549 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 4550 %res0 = insertelement <2 x double> undef, double %a0, i32 0 4551 %res1 = insertelement <2 x double> %res0, double %a1, i32 1 4552 ret <2 x double> %res1 4553 } 4554 4555 define <2 x double> @test_mm_setzero_pd() { 4556 ; SSE-LABEL: test_mm_setzero_pd: 4557 ; SSE: # %bb.0: 4558 ; SSE-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0] 4559 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4560 ; 4561 ; AVX1-LABEL: test_mm_setzero_pd: 4562 ; AVX1: # %bb.0: 4563 ; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0] 4564 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4565 ; 4566 ; AVX512-LABEL: test_mm_setzero_pd: 4567 ; AVX512: # %bb.0: 4568 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0] 4569 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4570 ret <2 x double> zeroinitializer 4571 } 4572 4573 define <2 x i64> @test_mm_setzero_si128() { 4574 ; SSE-LABEL: test_mm_setzero_si128: 4575 ; SSE: # %bb.0: 4576 ; SSE-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0] 4577 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4578 ; 4579 ; AVX1-LABEL: test_mm_setzero_si128: 4580 ; AVX1: # %bb.0: 4581 ; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0] 4582 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4583 ; 4584 ; AVX512-LABEL: test_mm_setzero_si128: 4585 ; AVX512: # %bb.0: 4586 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0] 4587 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4588 ret <2 x i64> zeroinitializer 4589 } 4590 4591 define <2 x i64> @test_mm_shuffle_epi32(<2 x i64> %a0) { 4592 ; SSE-LABEL: test_mm_shuffle_epi32: 4593 ; SSE: # %bb.0: 4594 ; SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00] 4595 ; SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 4596 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4597 ; 4598 ; AVX1-LABEL: test_mm_shuffle_epi32: 4599 ; AVX1: # %bb.0: 4600 ; AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 4601 ; AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 4602 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4603 ; 4604 ; AVX512-LABEL: test_mm_shuffle_epi32: 4605 ; AVX512: # %bb.0: 4606 ; AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 4607 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4608 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 4609 %res = shufflevector <4 x i32> %arg0, <4 x i32> undef, <4 x i32> zeroinitializer 4610 %bc = bitcast <4 x i32> %res to <2 x i64> 4611 ret <2 x i64> %bc 4612 } 4613 4614 define <2 x double> @test_mm_shuffle_pd(<2 x double> %a0, <2 x double> %a1) { 4615 ; SSE-LABEL: test_mm_shuffle_pd: 4616 ; SSE: # %bb.0: 4617 ; SSE-NEXT: shufpd $1, %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc1,0x01] 4618 ; SSE-NEXT: # xmm0 = xmm0[1],xmm1[0] 4619 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4620 ; 4621 ; AVX1-LABEL: test_mm_shuffle_pd: 4622 ; AVX1: # %bb.0: 4623 ; AVX1-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc6,0xc1,0x01] 4624 ; AVX1-NEXT: # xmm0 = xmm0[1],xmm1[0] 4625 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4626 ; 4627 ; AVX512-LABEL: test_mm_shuffle_pd: 4628 ; AVX512: # %bb.0: 4629 ; AVX512-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc6,0xc1,0x01] 4630 ; AVX512-NEXT: # xmm0 = xmm0[1],xmm1[0] 4631 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4632 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2> 4633 ret <2 x double> %res 4634 } 4635 4636 define <2 x i64> @test_mm_shufflehi_epi16(<2 x i64> %a0) { 4637 ; SSE-LABEL: test_mm_shufflehi_epi16: 4638 ; SSE: # %bb.0: 4639 ; SSE-NEXT: pshufhw $0, %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x70,0xc0,0x00] 4640 ; SSE-NEXT: # xmm0 = xmm0[0,1,2,3,4,4,4,4] 4641 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4642 ; 4643 ; AVX1-LABEL: test_mm_shufflehi_epi16: 4644 ; AVX1: # %bb.0: 4645 ; AVX1-NEXT: vpshufhw $0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x70,0xc0,0x00] 4646 ; AVX1-NEXT: # xmm0 = xmm0[0,1,2,3,4,4,4,4] 4647 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4648 ; 4649 ; AVX512-LABEL: test_mm_shufflehi_epi16: 4650 ; AVX512: # %bb.0: 4651 ; AVX512-NEXT: vpshufhw $0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x70,0xc0,0x00] 4652 ; AVX512-NEXT: # xmm0 = xmm0[0,1,2,3,4,4,4,4] 4653 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4654 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 4655 %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4> 4656 %bc = bitcast <8 x i16> %res to <2 x i64> 4657 ret <2 x i64> %bc 4658 } 4659 4660 define <2 x i64> @test_mm_shufflelo_epi16(<2 x i64> %a0) { 4661 ; SSE-LABEL: test_mm_shufflelo_epi16: 4662 ; SSE: # %bb.0: 4663 ; SSE-NEXT: pshuflw $0, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0x00] 4664 ; SSE-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7] 4665 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4666 ; 4667 ; AVX1-LABEL: test_mm_shufflelo_epi16: 4668 ; AVX1: # %bb.0: 4669 ; AVX1-NEXT: vpshuflw $0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x70,0xc0,0x00] 4670 ; AVX1-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7] 4671 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4672 ; 4673 ; AVX512-LABEL: test_mm_shufflelo_epi16: 4674 ; AVX512: # %bb.0: 4675 ; AVX512-NEXT: vpshuflw $0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x70,0xc0,0x00] 4676 ; AVX512-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7] 4677 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4678 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 4679 %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7> 4680 %bc = bitcast <8 x i16> %res to <2 x i64> 4681 ret <2 x i64> %bc 4682 } 4683 4684 define <2 x i64> @test_mm_sll_epi16(<2 x i64> %a0, <2 x i64> %a1) { 4685 ; SSE-LABEL: test_mm_sll_epi16: 4686 ; SSE: # %bb.0: 4687 ; SSE-NEXT: psllw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf1,0xc1] 4688 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4689 ; 4690 ; AVX1-LABEL: test_mm_sll_epi16: 4691 ; AVX1: # %bb.0: 4692 ; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf1,0xc1] 4693 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4694 ; 4695 ; AVX512-LABEL: test_mm_sll_epi16: 4696 ; AVX512: # %bb.0: 4697 ; AVX512-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf1,0xc1] 4698 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4699 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 4700 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 4701 %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %arg0, <8 x i16> %arg1) 4702 %bc = bitcast <8 x i16> %res to <2 x i64> 4703 ret <2 x i64> %bc 4704 } 4705 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone 4706 4707 define <2 x i64> @test_mm_sll_epi32(<2 x i64> %a0, <2 x i64> %a1) { 4708 ; SSE-LABEL: test_mm_sll_epi32: 4709 ; SSE: # %bb.0: 4710 ; SSE-NEXT: pslld %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf2,0xc1] 4711 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4712 ; 4713 ; AVX1-LABEL: test_mm_sll_epi32: 4714 ; AVX1: # %bb.0: 4715 ; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf2,0xc1] 4716 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4717 ; 4718 ; AVX512-LABEL: test_mm_sll_epi32: 4719 ; AVX512: # %bb.0: 4720 ; AVX512-NEXT: vpslld %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf2,0xc1] 4721 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4722 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 4723 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 4724 %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %arg0, <4 x i32> %arg1) 4725 %bc = bitcast <4 x i32> %res to <2 x i64> 4726 ret <2 x i64> %bc 4727 } 4728 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone 4729 4730 define <2 x i64> @test_mm_sll_epi64(<2 x i64> %a0, <2 x i64> %a1) { 4731 ; SSE-LABEL: test_mm_sll_epi64: 4732 ; SSE: # %bb.0: 4733 ; SSE-NEXT: psllq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf3,0xc1] 4734 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4735 ; 4736 ; AVX1-LABEL: test_mm_sll_epi64: 4737 ; AVX1: # %bb.0: 4738 ; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf3,0xc1] 4739 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4740 ; 4741 ; AVX512-LABEL: test_mm_sll_epi64: 4742 ; AVX512: # %bb.0: 4743 ; AVX512-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf3,0xc1] 4744 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4745 %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) 4746 ret <2 x i64> %res 4747 } 4748 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone 4749 4750 define <2 x i64> @test_mm_slli_epi16(<2 x i64> %a0) { 4751 ; SSE-LABEL: test_mm_slli_epi16: 4752 ; SSE: # %bb.0: 4753 ; SSE-NEXT: psllw $1, %xmm0 # encoding: [0x66,0x0f,0x71,0xf0,0x01] 4754 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4755 ; 4756 ; AVX1-LABEL: test_mm_slli_epi16: 4757 ; AVX1: # %bb.0: 4758 ; AVX1-NEXT: vpsllw $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x71,0xf0,0x01] 4759 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4760 ; 4761 ; AVX512-LABEL: test_mm_slli_epi16: 4762 ; AVX512: # %bb.0: 4763 ; AVX512-NEXT: vpsllw $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xf0,0x01] 4764 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4765 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 4766 %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %arg0, i32 1) 4767 %bc = bitcast <8 x i16> %res to <2 x i64> 4768 ret <2 x i64> %bc 4769 } 4770 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone 4771 4772 define <2 x i64> @test_mm_slli_epi32(<2 x i64> %a0) { 4773 ; SSE-LABEL: test_mm_slli_epi32: 4774 ; SSE: # %bb.0: 4775 ; SSE-NEXT: pslld $1, %xmm0 # encoding: [0x66,0x0f,0x72,0xf0,0x01] 4776 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4777 ; 4778 ; AVX1-LABEL: test_mm_slli_epi32: 4779 ; AVX1: # %bb.0: 4780 ; AVX1-NEXT: vpslld $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x72,0xf0,0x01] 4781 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4782 ; 4783 ; AVX512-LABEL: test_mm_slli_epi32: 4784 ; AVX512: # %bb.0: 4785 ; AVX512-NEXT: vpslld $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xf0,0x01] 4786 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4787 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 4788 %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %arg0, i32 1) 4789 %bc = bitcast <4 x i32> %res to <2 x i64> 4790 ret <2 x i64> %bc 4791 } 4792 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone 4793 4794 define <2 x i64> @test_mm_slli_epi64(<2 x i64> %a0) { 4795 ; SSE-LABEL: test_mm_slli_epi64: 4796 ; SSE: # %bb.0: 4797 ; SSE-NEXT: psllq $1, %xmm0 # encoding: [0x66,0x0f,0x73,0xf0,0x01] 4798 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4799 ; 4800 ; AVX1-LABEL: test_mm_slli_epi64: 4801 ; AVX1: # %bb.0: 4802 ; AVX1-NEXT: vpsllq $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xf0,0x01] 4803 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4804 ; 4805 ; AVX512-LABEL: test_mm_slli_epi64: 4806 ; AVX512: # %bb.0: 4807 ; AVX512-NEXT: vpsllq $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf0,0x01] 4808 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4809 %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 1) 4810 ret <2 x i64> %res 4811 } 4812 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone 4813 4814 define <2 x i64> @test_mm_slli_si128(<2 x i64> %a0) nounwind { 4815 ; SSE-LABEL: test_mm_slli_si128: 4816 ; SSE: # %bb.0: 4817 ; SSE-NEXT: pslldq $5, %xmm0 # encoding: [0x66,0x0f,0x73,0xf8,0x05] 4818 ; SSE-NEXT: # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] 4819 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4820 ; 4821 ; AVX1-LABEL: test_mm_slli_si128: 4822 ; AVX1: # %bb.0: 4823 ; AVX1-NEXT: vpslldq $5, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xf8,0x05] 4824 ; AVX1-NEXT: # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] 4825 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4826 ; 4827 ; AVX512-LABEL: test_mm_slli_si128: 4828 ; AVX512: # %bb.0: 4829 ; AVX512-NEXT: vpslldq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x05] 4830 ; AVX512-NEXT: # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] 4831 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4832 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 4833 %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26> 4834 %bc = bitcast <16 x i8> %res to <2 x i64> 4835 ret <2 x i64> %bc 4836 } 4837 4838 define <2 x double> @test_mm_sqrt_pd(<2 x double> %a0) nounwind { 4839 ; SSE-LABEL: test_mm_sqrt_pd: 4840 ; SSE: # %bb.0: 4841 ; SSE-NEXT: sqrtpd %xmm0, %xmm0 # encoding: [0x66,0x0f,0x51,0xc0] 4842 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4843 ; 4844 ; AVX1-LABEL: test_mm_sqrt_pd: 4845 ; AVX1: # %bb.0: 4846 ; AVX1-NEXT: vsqrtpd %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x51,0xc0] 4847 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4848 ; 4849 ; AVX512-LABEL: test_mm_sqrt_pd: 4850 ; AVX512: # %bb.0: 4851 ; AVX512-NEXT: vsqrtpd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x51,0xc0] 4852 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4853 %res = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a0) 4854 ret <2 x double> %res 4855 } 4856 declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) nounwind readnone 4857 4858 define <2 x double> @test_mm_sqrt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 4859 ; SSE-LABEL: test_mm_sqrt_sd: 4860 ; SSE: # %bb.0: 4861 ; SSE-NEXT: sqrtsd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0x51,0xc8] 4862 ; SSE-NEXT: movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1] 4863 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4864 ; 4865 ; AVX1-LABEL: test_mm_sqrt_sd: 4866 ; AVX1: # %bb.0: 4867 ; AVX1-NEXT: vsqrtsd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf3,0x51,0xc0] 4868 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4869 ; 4870 ; AVX512-LABEL: test_mm_sqrt_sd: 4871 ; AVX512: # %bb.0: 4872 ; AVX512-NEXT: vsqrtsd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf3,0x51,0xc0] 4873 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4874 %ext = extractelement <2 x double> %a0, i32 0 4875 %sqrt = call double @llvm.sqrt.f64(double %ext) 4876 %ins = insertelement <2 x double> %a1, double %sqrt, i32 0 4877 ret <2 x double> %ins 4878 } 4879 declare double @llvm.sqrt.f64(double) nounwind readnone 4880 4881 ; This doesn't match a clang test, but helps with fast-isel coverage. 4882 define double @test_mm_sqrt_sd_scalar(double %a0) nounwind { 4883 ; X86-SSE-LABEL: test_mm_sqrt_sd_scalar: 4884 ; X86-SSE: # %bb.0: 4885 ; X86-SSE-NEXT: pushl %ebp # encoding: [0x55] 4886 ; X86-SSE-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] 4887 ; X86-SSE-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8] 4888 ; X86-SSE-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08] 4889 ; X86-SSE-NEXT: movsd 8(%ebp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x45,0x08] 4890 ; X86-SSE-NEXT: # xmm0 = mem[0],zero 4891 ; X86-SSE-NEXT: sqrtsd %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x51,0xc0] 4892 ; X86-SSE-NEXT: movsd %xmm0, (%esp) # encoding: [0xf2,0x0f,0x11,0x04,0x24] 4893 ; X86-SSE-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24] 4894 ; X86-SSE-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] 4895 ; X86-SSE-NEXT: popl %ebp # encoding: [0x5d] 4896 ; X86-SSE-NEXT: retl # encoding: [0xc3] 4897 ; 4898 ; X86-AVX1-LABEL: test_mm_sqrt_sd_scalar: 4899 ; X86-AVX1: # %bb.0: 4900 ; X86-AVX1-NEXT: pushl %ebp # encoding: [0x55] 4901 ; X86-AVX1-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] 4902 ; X86-AVX1-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8] 4903 ; X86-AVX1-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08] 4904 ; X86-AVX1-NEXT: vmovsd 8(%ebp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x45,0x08] 4905 ; X86-AVX1-NEXT: # xmm0 = mem[0],zero 4906 ; X86-AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x51,0xc0] 4907 ; X86-AVX1-NEXT: vmovsd %xmm0, (%esp) # encoding: [0xc5,0xfb,0x11,0x04,0x24] 4908 ; X86-AVX1-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24] 4909 ; X86-AVX1-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] 4910 ; X86-AVX1-NEXT: popl %ebp # encoding: [0x5d] 4911 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 4912 ; 4913 ; X86-AVX512-LABEL: test_mm_sqrt_sd_scalar: 4914 ; X86-AVX512: # %bb.0: 4915 ; X86-AVX512-NEXT: pushl %ebp # encoding: [0x55] 4916 ; X86-AVX512-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] 4917 ; X86-AVX512-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8] 4918 ; X86-AVX512-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08] 4919 ; X86-AVX512-NEXT: vmovsd 8(%ebp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x45,0x08] 4920 ; X86-AVX512-NEXT: # xmm0 = mem[0],zero 4921 ; X86-AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0] 4922 ; X86-AVX512-NEXT: vmovsd %xmm0, (%esp) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x04,0x24] 4923 ; X86-AVX512-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24] 4924 ; X86-AVX512-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] 4925 ; X86-AVX512-NEXT: popl %ebp # encoding: [0x5d] 4926 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 4927 ; 4928 ; X64-SSE-LABEL: test_mm_sqrt_sd_scalar: 4929 ; X64-SSE: # %bb.0: 4930 ; X64-SSE-NEXT: sqrtsd %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x51,0xc0] 4931 ; X64-SSE-NEXT: retq # encoding: [0xc3] 4932 ; 4933 ; X64-AVX1-LABEL: test_mm_sqrt_sd_scalar: 4934 ; X64-AVX1: # %bb.0: 4935 ; X64-AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x51,0xc0] 4936 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 4937 ; 4938 ; X64-AVX512-LABEL: test_mm_sqrt_sd_scalar: 4939 ; X64-AVX512: # %bb.0: 4940 ; X64-AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0] 4941 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 4942 %sqrt = call double @llvm.sqrt.f64(double %a0) 4943 ret double %sqrt 4944 } 4945 4946 define <2 x i64> @test_mm_sra_epi16(<2 x i64> %a0, <2 x i64> %a1) { 4947 ; SSE-LABEL: test_mm_sra_epi16: 4948 ; SSE: # %bb.0: 4949 ; SSE-NEXT: psraw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe1,0xc1] 4950 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4951 ; 4952 ; AVX1-LABEL: test_mm_sra_epi16: 4953 ; AVX1: # %bb.0: 4954 ; AVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe1,0xc1] 4955 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4956 ; 4957 ; AVX512-LABEL: test_mm_sra_epi16: 4958 ; AVX512: # %bb.0: 4959 ; AVX512-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe1,0xc1] 4960 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4961 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 4962 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 4963 %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %arg0, <8 x i16> %arg1) 4964 %bc = bitcast <8 x i16> %res to <2 x i64> 4965 ret <2 x i64> %bc 4966 } 4967 declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone 4968 4969 define <2 x i64> @test_mm_sra_epi32(<2 x i64> %a0, <2 x i64> %a1) { 4970 ; SSE-LABEL: test_mm_sra_epi32: 4971 ; SSE: # %bb.0: 4972 ; SSE-NEXT: psrad %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe2,0xc1] 4973 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4974 ; 4975 ; AVX1-LABEL: test_mm_sra_epi32: 4976 ; AVX1: # %bb.0: 4977 ; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe2,0xc1] 4978 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4979 ; 4980 ; AVX512-LABEL: test_mm_sra_epi32: 4981 ; AVX512: # %bb.0: 4982 ; AVX512-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe2,0xc1] 4983 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4984 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 4985 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 4986 %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %arg0, <4 x i32> %arg1) 4987 %bc = bitcast <4 x i32> %res to <2 x i64> 4988 ret <2 x i64> %bc 4989 } 4990 declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone 4991 4992 define <2 x i64> @test_mm_srai_epi16(<2 x i64> %a0) { 4993 ; SSE-LABEL: test_mm_srai_epi16: 4994 ; SSE: # %bb.0: 4995 ; SSE-NEXT: psraw $1, %xmm0 # encoding: [0x66,0x0f,0x71,0xe0,0x01] 4996 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4997 ; 4998 ; AVX1-LABEL: test_mm_srai_epi16: 4999 ; AVX1: # %bb.0: 5000 ; AVX1-NEXT: vpsraw $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x71,0xe0,0x01] 5001 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5002 ; 5003 ; AVX512-LABEL: test_mm_srai_epi16: 5004 ; AVX512: # %bb.0: 5005 ; AVX512-NEXT: vpsraw $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xe0,0x01] 5006 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5007 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 5008 %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %arg0, i32 1) 5009 %bc = bitcast <8 x i16> %res to <2 x i64> 5010 ret <2 x i64> %bc 5011 } 5012 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone 5013 5014 define <2 x i64> @test_mm_srai_epi32(<2 x i64> %a0) { 5015 ; SSE-LABEL: test_mm_srai_epi32: 5016 ; SSE: # %bb.0: 5017 ; SSE-NEXT: psrad $1, %xmm0 # encoding: [0x66,0x0f,0x72,0xe0,0x01] 5018 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5019 ; 5020 ; AVX1-LABEL: test_mm_srai_epi32: 5021 ; AVX1: # %bb.0: 5022 ; AVX1-NEXT: vpsrad $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x72,0xe0,0x01] 5023 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5024 ; 5025 ; AVX512-LABEL: test_mm_srai_epi32: 5026 ; AVX512: # %bb.0: 5027 ; AVX512-NEXT: vpsrad $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xe0,0x01] 5028 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5029 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 5030 %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %arg0, i32 1) 5031 %bc = bitcast <4 x i32> %res to <2 x i64> 5032 ret <2 x i64> %bc 5033 } 5034 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone 5035 5036 define <2 x i64> @test_mm_srl_epi16(<2 x i64> %a0, <2 x i64> %a1) { 5037 ; SSE-LABEL: test_mm_srl_epi16: 5038 ; SSE: # %bb.0: 5039 ; SSE-NEXT: psrlw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd1,0xc1] 5040 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5041 ; 5042 ; AVX1-LABEL: test_mm_srl_epi16: 5043 ; AVX1: # %bb.0: 5044 ; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd1,0xc1] 5045 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5046 ; 5047 ; AVX512-LABEL: test_mm_srl_epi16: 5048 ; AVX512: # %bb.0: 5049 ; AVX512-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd1,0xc1] 5050 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5051 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 5052 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 5053 %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %arg0, <8 x i16> %arg1) 5054 %bc = bitcast <8 x i16> %res to <2 x i64> 5055 ret <2 x i64> %bc 5056 } 5057 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone 5058 5059 define <2 x i64> @test_mm_srl_epi32(<2 x i64> %a0, <2 x i64> %a1) { 5060 ; SSE-LABEL: test_mm_srl_epi32: 5061 ; SSE: # %bb.0: 5062 ; SSE-NEXT: psrld %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd2,0xc1] 5063 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5064 ; 5065 ; AVX1-LABEL: test_mm_srl_epi32: 5066 ; AVX1: # %bb.0: 5067 ; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd2,0xc1] 5068 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5069 ; 5070 ; AVX512-LABEL: test_mm_srl_epi32: 5071 ; AVX512: # %bb.0: 5072 ; AVX512-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd2,0xc1] 5073 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5074 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 5075 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 5076 %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %arg0, <4 x i32> %arg1) 5077 %bc = bitcast <4 x i32> %res to <2 x i64> 5078 ret <2 x i64> %bc 5079 } 5080 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone 5081 5082 define <2 x i64> @test_mm_srl_epi64(<2 x i64> %a0, <2 x i64> %a1) { 5083 ; SSE-LABEL: test_mm_srl_epi64: 5084 ; SSE: # %bb.0: 5085 ; SSE-NEXT: psrlq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd3,0xc1] 5086 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5087 ; 5088 ; AVX1-LABEL: test_mm_srl_epi64: 5089 ; AVX1: # %bb.0: 5090 ; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd3,0xc1] 5091 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5092 ; 5093 ; AVX512-LABEL: test_mm_srl_epi64: 5094 ; AVX512: # %bb.0: 5095 ; AVX512-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd3,0xc1] 5096 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5097 %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) 5098 ret <2 x i64> %res 5099 } 5100 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone 5101 5102 define <2 x i64> @test_mm_srli_epi16(<2 x i64> %a0) { 5103 ; SSE-LABEL: test_mm_srli_epi16: 5104 ; SSE: # %bb.0: 5105 ; SSE-NEXT: psrlw $1, %xmm0 # encoding: [0x66,0x0f,0x71,0xd0,0x01] 5106 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5107 ; 5108 ; AVX1-LABEL: test_mm_srli_epi16: 5109 ; AVX1: # %bb.0: 5110 ; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x71,0xd0,0x01] 5111 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5112 ; 5113 ; AVX512-LABEL: test_mm_srli_epi16: 5114 ; AVX512: # %bb.0: 5115 ; AVX512-NEXT: vpsrlw $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xd0,0x01] 5116 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5117 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 5118 %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %arg0, i32 1) 5119 %bc = bitcast <8 x i16> %res to <2 x i64> 5120 ret <2 x i64> %bc 5121 } 5122 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone 5123 5124 define <2 x i64> @test_mm_srli_epi32(<2 x i64> %a0) { 5125 ; SSE-LABEL: test_mm_srli_epi32: 5126 ; SSE: # %bb.0: 5127 ; SSE-NEXT: psrld $1, %xmm0 # encoding: [0x66,0x0f,0x72,0xd0,0x01] 5128 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5129 ; 5130 ; AVX1-LABEL: test_mm_srli_epi32: 5131 ; AVX1: # %bb.0: 5132 ; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x72,0xd0,0x01] 5133 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5134 ; 5135 ; AVX512-LABEL: test_mm_srli_epi32: 5136 ; AVX512: # %bb.0: 5137 ; AVX512-NEXT: vpsrld $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xd0,0x01] 5138 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5139 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 5140 %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %arg0, i32 1) 5141 %bc = bitcast <4 x i32> %res to <2 x i64> 5142 ret <2 x i64> %bc 5143 } 5144 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone 5145 5146 define <2 x i64> @test_mm_srli_epi64(<2 x i64> %a0) { 5147 ; SSE-LABEL: test_mm_srli_epi64: 5148 ; SSE: # %bb.0: 5149 ; SSE-NEXT: psrlq $1, %xmm0 # encoding: [0x66,0x0f,0x73,0xd0,0x01] 5150 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5151 ; 5152 ; AVX1-LABEL: test_mm_srli_epi64: 5153 ; AVX1: # %bb.0: 5154 ; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xd0,0x01] 5155 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5156 ; 5157 ; AVX512-LABEL: test_mm_srli_epi64: 5158 ; AVX512: # %bb.0: 5159 ; AVX512-NEXT: vpsrlq $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x01] 5160 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5161 %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 1) 5162 ret <2 x i64> %res 5163 } 5164 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone 5165 5166 define <2 x i64> @test_mm_srli_si128(<2 x i64> %a0) nounwind { 5167 ; SSE-LABEL: test_mm_srli_si128: 5168 ; SSE: # %bb.0: 5169 ; SSE-NEXT: psrldq $5, %xmm0 # encoding: [0x66,0x0f,0x73,0xd8,0x05] 5170 ; SSE-NEXT: # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero 5171 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5172 ; 5173 ; AVX1-LABEL: test_mm_srli_si128: 5174 ; AVX1: # %bb.0: 5175 ; AVX1-NEXT: vpsrldq $5, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xd8,0x05] 5176 ; AVX1-NEXT: # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero 5177 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5178 ; 5179 ; AVX512-LABEL: test_mm_srli_si128: 5180 ; AVX512: # %bb.0: 5181 ; AVX512-NEXT: vpsrldq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x05] 5182 ; AVX512-NEXT: # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero 5183 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5184 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 5185 %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20> 5186 %bc = bitcast <16 x i8> %res to <2 x i64> 5187 ret <2 x i64> %bc 5188 } 5189 5190 define void @test_mm_store_pd(double *%a0, <2 x double> %a1) { 5191 ; X86-SSE-LABEL: test_mm_store_pd: 5192 ; X86-SSE: # %bb.0: 5193 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5194 ; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] 5195 ; X86-SSE-NEXT: retl # encoding: [0xc3] 5196 ; 5197 ; X86-AVX1-LABEL: test_mm_store_pd: 5198 ; X86-AVX1: # %bb.0: 5199 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5200 ; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] 5201 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 5202 ; 5203 ; X86-AVX512-LABEL: test_mm_store_pd: 5204 ; X86-AVX512: # %bb.0: 5205 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5206 ; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] 5207 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 5208 ; 5209 ; X64-SSE-LABEL: test_mm_store_pd: 5210 ; X64-SSE: # %bb.0: 5211 ; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] 5212 ; X64-SSE-NEXT: retq # encoding: [0xc3] 5213 ; 5214 ; X64-AVX1-LABEL: test_mm_store_pd: 5215 ; X64-AVX1: # %bb.0: 5216 ; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07] 5217 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 5218 ; 5219 ; X64-AVX512-LABEL: test_mm_store_pd: 5220 ; X64-AVX512: # %bb.0: 5221 ; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 5222 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 5223 %arg0 = bitcast double* %a0 to <2 x double>* 5224 store <2 x double> %a1, <2 x double>* %arg0, align 16 5225 ret void 5226 } 5227 5228 define void @test_mm_store_pd1(double *%a0, <2 x double> %a1) { 5229 ; X86-SSE-LABEL: test_mm_store_pd1: 5230 ; X86-SSE: # %bb.0: 5231 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5232 ; X86-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] 5233 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0] 5234 ; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] 5235 ; X86-SSE-NEXT: retl # encoding: [0xc3] 5236 ; 5237 ; X86-AVX1-LABEL: test_mm_store_pd1: 5238 ; X86-AVX1: # %bb.0: 5239 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5240 ; X86-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] 5241 ; X86-AVX1-NEXT: # xmm0 = xmm0[0,0] 5242 ; X86-AVX1-NEXT: vmovapd %xmm0, (%eax) # encoding: [0xc5,0xf9,0x29,0x00] 5243 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 5244 ; 5245 ; X86-AVX512-LABEL: test_mm_store_pd1: 5246 ; X86-AVX512: # %bb.0: 5247 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5248 ; X86-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] 5249 ; X86-AVX512-NEXT: # xmm0 = xmm0[0,0] 5250 ; X86-AVX512-NEXT: vmovapd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x00] 5251 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 5252 ; 5253 ; X64-SSE-LABEL: test_mm_store_pd1: 5254 ; X64-SSE: # %bb.0: 5255 ; X64-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] 5256 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0] 5257 ; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] 5258 ; X64-SSE-NEXT: retq # encoding: [0xc3] 5259 ; 5260 ; X64-AVX1-LABEL: test_mm_store_pd1: 5261 ; X64-AVX1: # %bb.0: 5262 ; X64-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] 5263 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,0] 5264 ; X64-AVX1-NEXT: vmovapd %xmm0, (%rdi) # encoding: [0xc5,0xf9,0x29,0x07] 5265 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 5266 ; 5267 ; X64-AVX512-LABEL: test_mm_store_pd1: 5268 ; X64-AVX512: # %bb.0: 5269 ; X64-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] 5270 ; X64-AVX512-NEXT: # xmm0 = xmm0[0,0] 5271 ; X64-AVX512-NEXT: vmovapd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x07] 5272 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 5273 %arg0 = bitcast double * %a0 to <2 x double>* 5274 %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer 5275 store <2 x double> %shuf, <2 x double>* %arg0, align 16 5276 ret void 5277 } 5278 5279 define void @test_mm_store_sd(double *%a0, <2 x double> %a1) { 5280 ; X86-SSE-LABEL: test_mm_store_sd: 5281 ; X86-SSE: # %bb.0: 5282 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5283 ; X86-SSE-NEXT: movsd %xmm0, (%eax) # encoding: [0xf2,0x0f,0x11,0x00] 5284 ; X86-SSE-NEXT: retl # encoding: [0xc3] 5285 ; 5286 ; X86-AVX1-LABEL: test_mm_store_sd: 5287 ; X86-AVX1: # %bb.0: 5288 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5289 ; X86-AVX1-NEXT: vmovsd %xmm0, (%eax) # encoding: [0xc5,0xfb,0x11,0x00] 5290 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 5291 ; 5292 ; X86-AVX512-LABEL: test_mm_store_sd: 5293 ; X86-AVX512: # %bb.0: 5294 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5295 ; X86-AVX512-NEXT: vmovsd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x00] 5296 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 5297 ; 5298 ; X64-SSE-LABEL: test_mm_store_sd: 5299 ; X64-SSE: # %bb.0: 5300 ; X64-SSE-NEXT: movsd %xmm0, (%rdi) # encoding: [0xf2,0x0f,0x11,0x07] 5301 ; X64-SSE-NEXT: retq # encoding: [0xc3] 5302 ; 5303 ; X64-AVX1-LABEL: test_mm_store_sd: 5304 ; X64-AVX1: # %bb.0: 5305 ; X64-AVX1-NEXT: vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07] 5306 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 5307 ; 5308 ; X64-AVX512-LABEL: test_mm_store_sd: 5309 ; X64-AVX512: # %bb.0: 5310 ; X64-AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07] 5311 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 5312 %ext = extractelement <2 x double> %a1, i32 0 5313 store double %ext, double* %a0, align 1 5314 ret void 5315 } 5316 5317 define void @test_mm_store_si128(<2 x i64> *%a0, <2 x i64> %a1) { 5318 ; X86-SSE-LABEL: test_mm_store_si128: 5319 ; X86-SSE: # %bb.0: 5320 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5321 ; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] 5322 ; X86-SSE-NEXT: retl # encoding: [0xc3] 5323 ; 5324 ; X86-AVX1-LABEL: test_mm_store_si128: 5325 ; X86-AVX1: # %bb.0: 5326 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5327 ; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] 5328 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 5329 ; 5330 ; X86-AVX512-LABEL: test_mm_store_si128: 5331 ; X86-AVX512: # %bb.0: 5332 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5333 ; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] 5334 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 5335 ; 5336 ; X64-SSE-LABEL: test_mm_store_si128: 5337 ; X64-SSE: # %bb.0: 5338 ; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] 5339 ; X64-SSE-NEXT: retq # encoding: [0xc3] 5340 ; 5341 ; X64-AVX1-LABEL: test_mm_store_si128: 5342 ; X64-AVX1: # %bb.0: 5343 ; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07] 5344 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 5345 ; 5346 ; X64-AVX512-LABEL: test_mm_store_si128: 5347 ; X64-AVX512: # %bb.0: 5348 ; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 5349 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 5350 store <2 x i64> %a1, <2 x i64>* %a0, align 16 5351 ret void 5352 } 5353 5354 define void @test_mm_store1_pd(double *%a0, <2 x double> %a1) { 5355 ; X86-SSE-LABEL: test_mm_store1_pd: 5356 ; X86-SSE: # %bb.0: 5357 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5358 ; X86-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] 5359 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0] 5360 ; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] 5361 ; X86-SSE-NEXT: retl # encoding: [0xc3] 5362 ; 5363 ; X86-AVX1-LABEL: test_mm_store1_pd: 5364 ; X86-AVX1: # %bb.0: 5365 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5366 ; X86-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] 5367 ; X86-AVX1-NEXT: # xmm0 = xmm0[0,0] 5368 ; X86-AVX1-NEXT: vmovapd %xmm0, (%eax) # encoding: [0xc5,0xf9,0x29,0x00] 5369 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 5370 ; 5371 ; X86-AVX512-LABEL: test_mm_store1_pd: 5372 ; X86-AVX512: # %bb.0: 5373 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5374 ; X86-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] 5375 ; X86-AVX512-NEXT: # xmm0 = xmm0[0,0] 5376 ; X86-AVX512-NEXT: vmovapd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x00] 5377 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 5378 ; 5379 ; X64-SSE-LABEL: test_mm_store1_pd: 5380 ; X64-SSE: # %bb.0: 5381 ; X64-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] 5382 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0] 5383 ; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] 5384 ; X64-SSE-NEXT: retq # encoding: [0xc3] 5385 ; 5386 ; X64-AVX1-LABEL: test_mm_store1_pd: 5387 ; X64-AVX1: # %bb.0: 5388 ; X64-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] 5389 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,0] 5390 ; X64-AVX1-NEXT: vmovapd %xmm0, (%rdi) # encoding: [0xc5,0xf9,0x29,0x07] 5391 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 5392 ; 5393 ; X64-AVX512-LABEL: test_mm_store1_pd: 5394 ; X64-AVX512: # %bb.0: 5395 ; X64-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] 5396 ; X64-AVX512-NEXT: # xmm0 = xmm0[0,0] 5397 ; X64-AVX512-NEXT: vmovapd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x07] 5398 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 5399 %arg0 = bitcast double * %a0 to <2 x double>* 5400 %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer 5401 store <2 x double> %shuf, <2 x double>* %arg0, align 16 5402 ret void 5403 } 5404 5405 define void @test_mm_storeh_sd(double *%a0, <2 x double> %a1) { 5406 ; X86-SSE-LABEL: test_mm_storeh_sd: 5407 ; X86-SSE: # %bb.0: 5408 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5409 ; X86-SSE-NEXT: movhlps %xmm0, %xmm0 # encoding: [0x0f,0x12,0xc0] 5410 ; X86-SSE-NEXT: # xmm0 = xmm0[1,1] 5411 ; X86-SSE-NEXT: movsd %xmm0, (%eax) # encoding: [0xf2,0x0f,0x11,0x00] 5412 ; X86-SSE-NEXT: retl # encoding: [0xc3] 5413 ; 5414 ; X86-AVX1-LABEL: test_mm_storeh_sd: 5415 ; X86-AVX1: # %bb.0: 5416 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5417 ; X86-AVX1-NEXT: vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] 5418 ; X86-AVX1-NEXT: # xmm0 = xmm0[1,0] 5419 ; X86-AVX1-NEXT: vmovsd %xmm0, (%eax) # encoding: [0xc5,0xfb,0x11,0x00] 5420 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 5421 ; 5422 ; X86-AVX512-LABEL: test_mm_storeh_sd: 5423 ; X86-AVX512: # %bb.0: 5424 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5425 ; X86-AVX512-NEXT: vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] 5426 ; X86-AVX512-NEXT: # xmm0 = xmm0[1,0] 5427 ; X86-AVX512-NEXT: vmovsd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x00] 5428 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 5429 ; 5430 ; X64-SSE-LABEL: test_mm_storeh_sd: 5431 ; X64-SSE: # %bb.0: 5432 ; X64-SSE-NEXT: movhlps %xmm0, %xmm0 # encoding: [0x0f,0x12,0xc0] 5433 ; X64-SSE-NEXT: # xmm0 = xmm0[1,1] 5434 ; X64-SSE-NEXT: movsd %xmm0, (%rdi) # encoding: [0xf2,0x0f,0x11,0x07] 5435 ; X64-SSE-NEXT: retq # encoding: [0xc3] 5436 ; 5437 ; X64-AVX1-LABEL: test_mm_storeh_sd: 5438 ; X64-AVX1: # %bb.0: 5439 ; X64-AVX1-NEXT: vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] 5440 ; X64-AVX1-NEXT: # xmm0 = xmm0[1,0] 5441 ; X64-AVX1-NEXT: vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07] 5442 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 5443 ; 5444 ; X64-AVX512-LABEL: test_mm_storeh_sd: 5445 ; X64-AVX512: # %bb.0: 5446 ; X64-AVX512-NEXT: vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] 5447 ; X64-AVX512-NEXT: # xmm0 = xmm0[1,0] 5448 ; X64-AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07] 5449 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 5450 %ext = extractelement <2 x double> %a1, i32 1 5451 store double %ext, double* %a0, align 8 5452 ret void 5453 } 5454 5455 define void @test_mm_storel_epi64(<2 x i64> *%a0, <2 x i64> %a1) { 5456 ; X86-SSE-LABEL: test_mm_storel_epi64: 5457 ; X86-SSE: # %bb.0: 5458 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5459 ; X86-SSE-NEXT: movlps %xmm0, (%eax) # encoding: [0x0f,0x13,0x00] 5460 ; X86-SSE-NEXT: retl # encoding: [0xc3] 5461 ; 5462 ; X86-AVX1-LABEL: test_mm_storel_epi64: 5463 ; X86-AVX1: # %bb.0: 5464 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5465 ; X86-AVX1-NEXT: vmovlps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x13,0x00] 5466 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 5467 ; 5468 ; X86-AVX512-LABEL: test_mm_storel_epi64: 5469 ; X86-AVX512: # %bb.0: 5470 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5471 ; X86-AVX512-NEXT: vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00] 5472 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 5473 ; 5474 ; X64-SSE-LABEL: test_mm_storel_epi64: 5475 ; X64-SSE: # %bb.0: 5476 ; X64-SSE-NEXT: movq %xmm0, %rax # encoding: [0x66,0x48,0x0f,0x7e,0xc0] 5477 ; X64-SSE-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 5478 ; X64-SSE-NEXT: retq # encoding: [0xc3] 5479 ; 5480 ; X64-AVX1-LABEL: test_mm_storel_epi64: 5481 ; X64-AVX1: # %bb.0: 5482 ; X64-AVX1-NEXT: vmovq %xmm0, %rax # encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] 5483 ; X64-AVX1-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 5484 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 5485 ; 5486 ; X64-AVX512-LABEL: test_mm_storel_epi64: 5487 ; X64-AVX512: # %bb.0: 5488 ; X64-AVX512-NEXT: vmovq %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] 5489 ; X64-AVX512-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 5490 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 5491 %ext = extractelement <2 x i64> %a1, i32 0 5492 %bc = bitcast <2 x i64> *%a0 to i64* 5493 store i64 %ext, i64* %bc, align 8 5494 ret void 5495 } 5496 5497 define void @test_mm_storel_sd(double *%a0, <2 x double> %a1) { 5498 ; X86-SSE-LABEL: test_mm_storel_sd: 5499 ; X86-SSE: # %bb.0: 5500 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5501 ; X86-SSE-NEXT: movsd %xmm0, (%eax) # encoding: [0xf2,0x0f,0x11,0x00] 5502 ; X86-SSE-NEXT: retl # encoding: [0xc3] 5503 ; 5504 ; X86-AVX1-LABEL: test_mm_storel_sd: 5505 ; X86-AVX1: # %bb.0: 5506 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5507 ; X86-AVX1-NEXT: vmovsd %xmm0, (%eax) # encoding: [0xc5,0xfb,0x11,0x00] 5508 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 5509 ; 5510 ; X86-AVX512-LABEL: test_mm_storel_sd: 5511 ; X86-AVX512: # %bb.0: 5512 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5513 ; X86-AVX512-NEXT: vmovsd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x00] 5514 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 5515 ; 5516 ; X64-SSE-LABEL: test_mm_storel_sd: 5517 ; X64-SSE: # %bb.0: 5518 ; X64-SSE-NEXT: movsd %xmm0, (%rdi) # encoding: [0xf2,0x0f,0x11,0x07] 5519 ; X64-SSE-NEXT: retq # encoding: [0xc3] 5520 ; 5521 ; X64-AVX1-LABEL: test_mm_storel_sd: 5522 ; X64-AVX1: # %bb.0: 5523 ; X64-AVX1-NEXT: vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07] 5524 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 5525 ; 5526 ; X64-AVX512-LABEL: test_mm_storel_sd: 5527 ; X64-AVX512: # %bb.0: 5528 ; X64-AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07] 5529 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 5530 %ext = extractelement <2 x double> %a1, i32 0 5531 store double %ext, double* %a0, align 8 5532 ret void 5533 } 5534 5535 define void @test_mm_storer_pd(double *%a0, <2 x double> %a1) { 5536 ; X86-SSE-LABEL: test_mm_storer_pd: 5537 ; X86-SSE: # %bb.0: 5538 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5539 ; X86-SSE-NEXT: shufpd $1, %xmm0, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc0,0x01] 5540 ; X86-SSE-NEXT: # xmm0 = xmm0[1,0] 5541 ; X86-SSE-NEXT: movapd %xmm0, (%eax) # encoding: [0x66,0x0f,0x29,0x00] 5542 ; X86-SSE-NEXT: retl # encoding: [0xc3] 5543 ; 5544 ; X86-AVX1-LABEL: test_mm_storer_pd: 5545 ; X86-AVX1: # %bb.0: 5546 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5547 ; X86-AVX1-NEXT: vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] 5548 ; X86-AVX1-NEXT: # xmm0 = xmm0[1,0] 5549 ; X86-AVX1-NEXT: vmovapd %xmm0, (%eax) # encoding: [0xc5,0xf9,0x29,0x00] 5550 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 5551 ; 5552 ; X86-AVX512-LABEL: test_mm_storer_pd: 5553 ; X86-AVX512: # %bb.0: 5554 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5555 ; X86-AVX512-NEXT: vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] 5556 ; X86-AVX512-NEXT: # xmm0 = xmm0[1,0] 5557 ; X86-AVX512-NEXT: vmovapd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x00] 5558 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 5559 ; 5560 ; X64-SSE-LABEL: test_mm_storer_pd: 5561 ; X64-SSE: # %bb.0: 5562 ; X64-SSE-NEXT: shufpd $1, %xmm0, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc0,0x01] 5563 ; X64-SSE-NEXT: # xmm0 = xmm0[1,0] 5564 ; X64-SSE-NEXT: movapd %xmm0, (%rdi) # encoding: [0x66,0x0f,0x29,0x07] 5565 ; X64-SSE-NEXT: retq # encoding: [0xc3] 5566 ; 5567 ; X64-AVX1-LABEL: test_mm_storer_pd: 5568 ; X64-AVX1: # %bb.0: 5569 ; X64-AVX1-NEXT: vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] 5570 ; X64-AVX1-NEXT: # xmm0 = xmm0[1,0] 5571 ; X64-AVX1-NEXT: vmovapd %xmm0, (%rdi) # encoding: [0xc5,0xf9,0x29,0x07] 5572 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 5573 ; 5574 ; X64-AVX512-LABEL: test_mm_storer_pd: 5575 ; X64-AVX512: # %bb.0: 5576 ; X64-AVX512-NEXT: vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] 5577 ; X64-AVX512-NEXT: # xmm0 = xmm0[1,0] 5578 ; X64-AVX512-NEXT: vmovapd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x07] 5579 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 5580 %arg0 = bitcast double* %a0 to <2 x double>* 5581 %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> <i32 1, i32 0> 5582 store <2 x double> %shuf, <2 x double>* %arg0, align 16 5583 ret void 5584 } 5585 5586 define void @test_mm_storeu_pd(double *%a0, <2 x double> %a1) { 5587 ; X86-SSE-LABEL: test_mm_storeu_pd: 5588 ; X86-SSE: # %bb.0: 5589 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5590 ; X86-SSE-NEXT: movups %xmm0, (%eax) # encoding: [0x0f,0x11,0x00] 5591 ; X86-SSE-NEXT: retl # encoding: [0xc3] 5592 ; 5593 ; X86-AVX1-LABEL: test_mm_storeu_pd: 5594 ; X86-AVX1: # %bb.0: 5595 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5596 ; X86-AVX1-NEXT: vmovups %xmm0, (%eax) # encoding: [0xc5,0xf8,0x11,0x00] 5597 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 5598 ; 5599 ; X86-AVX512-LABEL: test_mm_storeu_pd: 5600 ; X86-AVX512: # %bb.0: 5601 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5602 ; X86-AVX512-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00] 5603 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 5604 ; 5605 ; X64-SSE-LABEL: test_mm_storeu_pd: 5606 ; X64-SSE: # %bb.0: 5607 ; X64-SSE-NEXT: movups %xmm0, (%rdi) # encoding: [0x0f,0x11,0x07] 5608 ; X64-SSE-NEXT: retq # encoding: [0xc3] 5609 ; 5610 ; X64-AVX1-LABEL: test_mm_storeu_pd: 5611 ; X64-AVX1: # %bb.0: 5612 ; X64-AVX1-NEXT: vmovups %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x11,0x07] 5613 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 5614 ; 5615 ; X64-AVX512-LABEL: test_mm_storeu_pd: 5616 ; X64-AVX512: # %bb.0: 5617 ; X64-AVX512-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] 5618 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 5619 %arg0 = bitcast double* %a0 to <2 x double>* 5620 store <2 x double> %a1, <2 x double>* %arg0, align 1 5621 ret void 5622 } 5623 5624 define void @test_mm_storeu_si128(<2 x i64> *%a0, <2 x i64> %a1) { 5625 ; X86-SSE-LABEL: test_mm_storeu_si128: 5626 ; X86-SSE: # %bb.0: 5627 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5628 ; X86-SSE-NEXT: movups %xmm0, (%eax) # encoding: [0x0f,0x11,0x00] 5629 ; X86-SSE-NEXT: retl # encoding: [0xc3] 5630 ; 5631 ; X86-AVX1-LABEL: test_mm_storeu_si128: 5632 ; X86-AVX1: # %bb.0: 5633 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5634 ; X86-AVX1-NEXT: vmovups %xmm0, (%eax) # encoding: [0xc5,0xf8,0x11,0x00] 5635 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 5636 ; 5637 ; X86-AVX512-LABEL: test_mm_storeu_si128: 5638 ; X86-AVX512: # %bb.0: 5639 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5640 ; X86-AVX512-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00] 5641 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 5642 ; 5643 ; X64-SSE-LABEL: test_mm_storeu_si128: 5644 ; X64-SSE: # %bb.0: 5645 ; X64-SSE-NEXT: movups %xmm0, (%rdi) # encoding: [0x0f,0x11,0x07] 5646 ; X64-SSE-NEXT: retq # encoding: [0xc3] 5647 ; 5648 ; X64-AVX1-LABEL: test_mm_storeu_si128: 5649 ; X64-AVX1: # %bb.0: 5650 ; X64-AVX1-NEXT: vmovups %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x11,0x07] 5651 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 5652 ; 5653 ; X64-AVX512-LABEL: test_mm_storeu_si128: 5654 ; X64-AVX512: # %bb.0: 5655 ; X64-AVX512-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] 5656 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 5657 store <2 x i64> %a1, <2 x i64>* %a0, align 1 5658 ret void 5659 } 5660 5661 define void @test_mm_stream_pd(double *%a0, <2 x double> %a1) { 5662 ; X86-SSE-LABEL: test_mm_stream_pd: 5663 ; X86-SSE: # %bb.0: 5664 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5665 ; X86-SSE-NEXT: movntps %xmm0, (%eax) # encoding: [0x0f,0x2b,0x00] 5666 ; X86-SSE-NEXT: retl # encoding: [0xc3] 5667 ; 5668 ; X86-AVX1-LABEL: test_mm_stream_pd: 5669 ; X86-AVX1: # %bb.0: 5670 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5671 ; X86-AVX1-NEXT: vmovntps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x2b,0x00] 5672 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 5673 ; 5674 ; X86-AVX512-LABEL: test_mm_stream_pd: 5675 ; X86-AVX512: # %bb.0: 5676 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5677 ; X86-AVX512-NEXT: vmovntps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x00] 5678 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 5679 ; 5680 ; X64-SSE-LABEL: test_mm_stream_pd: 5681 ; X64-SSE: # %bb.0: 5682 ; X64-SSE-NEXT: movntps %xmm0, (%rdi) # encoding: [0x0f,0x2b,0x07] 5683 ; X64-SSE-NEXT: retq # encoding: [0xc3] 5684 ; 5685 ; X64-AVX1-LABEL: test_mm_stream_pd: 5686 ; X64-AVX1: # %bb.0: 5687 ; X64-AVX1-NEXT: vmovntps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x2b,0x07] 5688 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 5689 ; 5690 ; X64-AVX512-LABEL: test_mm_stream_pd: 5691 ; X64-AVX512: # %bb.0: 5692 ; X64-AVX512-NEXT: vmovntps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x07] 5693 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 5694 %arg0 = bitcast double* %a0 to <2 x double>* 5695 store <2 x double> %a1, <2 x double>* %arg0, align 16, !nontemporal !0 5696 ret void 5697 } 5698 5699 define void @test_mm_stream_si32(i32 *%a0, i32 %a1) { 5700 ; X86-LABEL: test_mm_stream_si32: 5701 ; X86: # %bb.0: 5702 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 5703 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 5704 ; X86-NEXT: movntil %eax, (%ecx) # encoding: [0x0f,0xc3,0x01] 5705 ; X86-NEXT: retl # encoding: [0xc3] 5706 ; 5707 ; X64-LABEL: test_mm_stream_si32: 5708 ; X64: # %bb.0: 5709 ; X64-NEXT: movntil %esi, (%rdi) # encoding: [0x0f,0xc3,0x37] 5710 ; X64-NEXT: retq # encoding: [0xc3] 5711 store i32 %a1, i32* %a0, align 1, !nontemporal !0 5712 ret void 5713 } 5714 5715 define void @test_mm_stream_si128(<2 x i64> *%a0, <2 x i64> %a1) { 5716 ; X86-SSE-LABEL: test_mm_stream_si128: 5717 ; X86-SSE: # %bb.0: 5718 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5719 ; X86-SSE-NEXT: movntps %xmm0, (%eax) # encoding: [0x0f,0x2b,0x00] 5720 ; X86-SSE-NEXT: retl # encoding: [0xc3] 5721 ; 5722 ; X86-AVX1-LABEL: test_mm_stream_si128: 5723 ; X86-AVX1: # %bb.0: 5724 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5725 ; X86-AVX1-NEXT: vmovntps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x2b,0x00] 5726 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 5727 ; 5728 ; X86-AVX512-LABEL: test_mm_stream_si128: 5729 ; X86-AVX512: # %bb.0: 5730 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5731 ; X86-AVX512-NEXT: vmovntps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x00] 5732 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 5733 ; 5734 ; X64-SSE-LABEL: test_mm_stream_si128: 5735 ; X64-SSE: # %bb.0: 5736 ; X64-SSE-NEXT: movntps %xmm0, (%rdi) # encoding: [0x0f,0x2b,0x07] 5737 ; X64-SSE-NEXT: retq # encoding: [0xc3] 5738 ; 5739 ; X64-AVX1-LABEL: test_mm_stream_si128: 5740 ; X64-AVX1: # %bb.0: 5741 ; X64-AVX1-NEXT: vmovntps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x2b,0x07] 5742 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 5743 ; 5744 ; X64-AVX512-LABEL: test_mm_stream_si128: 5745 ; X64-AVX512: # %bb.0: 5746 ; X64-AVX512-NEXT: vmovntps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x07] 5747 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 5748 store <2 x i64> %a1, <2 x i64>* %a0, align 16, !nontemporal !0 5749 ret void 5750 } 5751 5752 define <2 x i64> @test_mm_sub_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 5753 ; SSE-LABEL: test_mm_sub_epi8: 5754 ; SSE: # %bb.0: 5755 ; SSE-NEXT: psubb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf8,0xc1] 5756 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5757 ; 5758 ; AVX1-LABEL: test_mm_sub_epi8: 5759 ; AVX1: # %bb.0: 5760 ; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc1] 5761 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5762 ; 5763 ; AVX512-LABEL: test_mm_sub_epi8: 5764 ; AVX512: # %bb.0: 5765 ; AVX512-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf8,0xc1] 5766 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5767 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 5768 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 5769 %res = sub <16 x i8> %arg0, %arg1 5770 %bc = bitcast <16 x i8> %res to <2 x i64> 5771 ret <2 x i64> %bc 5772 } 5773 5774 define <2 x i64> @test_mm_sub_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 5775 ; SSE-LABEL: test_mm_sub_epi16: 5776 ; SSE: # %bb.0: 5777 ; SSE-NEXT: psubw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf9,0xc1] 5778 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5779 ; 5780 ; AVX1-LABEL: test_mm_sub_epi16: 5781 ; AVX1: # %bb.0: 5782 ; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf9,0xc1] 5783 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5784 ; 5785 ; AVX512-LABEL: test_mm_sub_epi16: 5786 ; AVX512: # %bb.0: 5787 ; AVX512-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf9,0xc1] 5788 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5789 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 5790 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 5791 %res = sub <8 x i16> %arg0, %arg1 5792 %bc = bitcast <8 x i16> %res to <2 x i64> 5793 ret <2 x i64> %bc 5794 } 5795 5796 define <2 x i64> @test_mm_sub_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind { 5797 ; SSE-LABEL: test_mm_sub_epi32: 5798 ; SSE: # %bb.0: 5799 ; SSE-NEXT: psubd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfa,0xc1] 5800 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5801 ; 5802 ; AVX1-LABEL: test_mm_sub_epi32: 5803 ; AVX1: # %bb.0: 5804 ; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfa,0xc1] 5805 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5806 ; 5807 ; AVX512-LABEL: test_mm_sub_epi32: 5808 ; AVX512: # %bb.0: 5809 ; AVX512-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfa,0xc1] 5810 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5811 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 5812 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 5813 %res = sub <4 x i32> %arg0, %arg1 5814 %bc = bitcast <4 x i32> %res to <2 x i64> 5815 ret <2 x i64> %bc 5816 } 5817 5818 define <2 x i64> @test_mm_sub_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind { 5819 ; SSE-LABEL: test_mm_sub_epi64: 5820 ; SSE: # %bb.0: 5821 ; SSE-NEXT: psubq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfb,0xc1] 5822 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5823 ; 5824 ; AVX1-LABEL: test_mm_sub_epi64: 5825 ; AVX1: # %bb.0: 5826 ; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfb,0xc1] 5827 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5828 ; 5829 ; AVX512-LABEL: test_mm_sub_epi64: 5830 ; AVX512: # %bb.0: 5831 ; AVX512-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1] 5832 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5833 %res = sub <2 x i64> %a0, %a1 5834 ret <2 x i64> %res 5835 } 5836 5837 define <2 x double> @test_mm_sub_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 5838 ; SSE-LABEL: test_mm_sub_pd: 5839 ; SSE: # %bb.0: 5840 ; SSE-NEXT: subpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x5c,0xc1] 5841 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5842 ; 5843 ; AVX1-LABEL: test_mm_sub_pd: 5844 ; AVX1: # %bb.0: 5845 ; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5c,0xc1] 5846 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5847 ; 5848 ; AVX512-LABEL: test_mm_sub_pd: 5849 ; AVX512: # %bb.0: 5850 ; AVX512-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5c,0xc1] 5851 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5852 %res = fsub <2 x double> %a0, %a1 5853 ret <2 x double> %res 5854 } 5855 5856 define <2 x double> @test_mm_sub_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 5857 ; SSE-LABEL: test_mm_sub_sd: 5858 ; SSE: # %bb.0: 5859 ; SSE-NEXT: subsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5c,0xc1] 5860 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5861 ; 5862 ; AVX1-LABEL: test_mm_sub_sd: 5863 ; AVX1: # %bb.0: 5864 ; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5c,0xc1] 5865 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5866 ; 5867 ; AVX512-LABEL: test_mm_sub_sd: 5868 ; AVX512: # %bb.0: 5869 ; AVX512-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5c,0xc1] 5870 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5871 %ext0 = extractelement <2 x double> %a0, i32 0 5872 %ext1 = extractelement <2 x double> %a1, i32 0 5873 %fsub = fsub double %ext0, %ext1 5874 %res = insertelement <2 x double> %a0, double %fsub, i32 0 5875 ret <2 x double> %res 5876 } 5877 5878 define <2 x i64> @test_mm_subs_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 5879 ; SSE-LABEL: test_mm_subs_epi8: 5880 ; SSE: # %bb.0: 5881 ; SSE-NEXT: psubsb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe8,0xc1] 5882 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5883 ; 5884 ; AVX1-LABEL: test_mm_subs_epi8: 5885 ; AVX1: # %bb.0: 5886 ; AVX1-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe8,0xc1] 5887 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5888 ; 5889 ; AVX512-LABEL: test_mm_subs_epi8: 5890 ; AVX512: # %bb.0: 5891 ; AVX512-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0xc1] 5892 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5893 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 5894 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 5895 %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %arg0, <16 x i8> %arg1) 5896 %bc = bitcast <16 x i8> %res to <2 x i64> 5897 ret <2 x i64> %bc 5898 } 5899 declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone 5900 5901 define <2 x i64> @test_mm_subs_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 5902 ; SSE-LABEL: test_mm_subs_epi16: 5903 ; SSE: # %bb.0: 5904 ; SSE-NEXT: psubsw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe9,0xc1] 5905 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5906 ; 5907 ; AVX1-LABEL: test_mm_subs_epi16: 5908 ; AVX1: # %bb.0: 5909 ; AVX1-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe9,0xc1] 5910 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5911 ; 5912 ; AVX512-LABEL: test_mm_subs_epi16: 5913 ; AVX512: # %bb.0: 5914 ; AVX512-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0xc1] 5915 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5916 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 5917 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 5918 %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %arg0, <8 x i16> %arg1) 5919 %bc = bitcast <8 x i16> %res to <2 x i64> 5920 ret <2 x i64> %bc 5921 } 5922 declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone 5923 5924 define <2 x i64> @test_mm_subs_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 5925 ; SSE-LABEL: test_mm_subs_epu8: 5926 ; SSE: # %bb.0: 5927 ; SSE-NEXT: psubusb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd8,0xc1] 5928 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5929 ; 5930 ; AVX1-LABEL: test_mm_subs_epu8: 5931 ; AVX1: # %bb.0: 5932 ; AVX1-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd8,0xc1] 5933 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5934 ; 5935 ; AVX512-LABEL: test_mm_subs_epu8: 5936 ; AVX512: # %bb.0: 5937 ; AVX512-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0xc1] 5938 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5939 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 5940 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 5941 %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %arg0, <16 x i8> %arg1) 5942 %bc = bitcast <16 x i8> %res to <2 x i64> 5943 ret <2 x i64> %bc 5944 } 5945 declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone 5946 5947 define <2 x i64> @test_mm_subs_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 5948 ; SSE-LABEL: test_mm_subs_epu16: 5949 ; SSE: # %bb.0: 5950 ; SSE-NEXT: psubusw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd9,0xc1] 5951 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5952 ; 5953 ; AVX1-LABEL: test_mm_subs_epu16: 5954 ; AVX1: # %bb.0: 5955 ; AVX1-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd9,0xc1] 5956 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5957 ; 5958 ; AVX512-LABEL: test_mm_subs_epu16: 5959 ; AVX512: # %bb.0: 5960 ; AVX512-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0xc1] 5961 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5962 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 5963 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 5964 %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %arg0, <8 x i16> %arg1) 5965 %bc = bitcast <8 x i16> %res to <2 x i64> 5966 ret <2 x i64> %bc 5967 } 5968 declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone 5969 5970 define i32 @test_mm_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 5971 ; SSE-LABEL: test_mm_ucomieq_sd: 5972 ; SSE: # %bb.0: 5973 ; SSE-NEXT: ucomisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2e,0xc1] 5974 ; SSE-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 5975 ; SSE-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 5976 ; SSE-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 5977 ; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 5978 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5979 ; 5980 ; AVX1-LABEL: test_mm_ucomieq_sd: 5981 ; AVX1: # %bb.0: 5982 ; AVX1-NEXT: vucomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2e,0xc1] 5983 ; AVX1-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 5984 ; AVX1-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 5985 ; AVX1-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 5986 ; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 5987 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5988 ; 5989 ; AVX512-LABEL: test_mm_ucomieq_sd: 5990 ; AVX512: # %bb.0: 5991 ; AVX512-NEXT: vucomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1] 5992 ; AVX512-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 5993 ; AVX512-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 5994 ; AVX512-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 5995 ; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 5996 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5997 %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) 5998 ret i32 %res 5999 } 6000 declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone 6001 6002 define i32 @test_mm_ucomige_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 6003 ; SSE-LABEL: test_mm_ucomige_sd: 6004 ; SSE: # %bb.0: 6005 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6006 ; SSE-NEXT: ucomisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2e,0xc1] 6007 ; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 6008 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6009 ; 6010 ; AVX1-LABEL: test_mm_ucomige_sd: 6011 ; AVX1: # %bb.0: 6012 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6013 ; AVX1-NEXT: vucomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2e,0xc1] 6014 ; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 6015 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6016 ; 6017 ; AVX512-LABEL: test_mm_ucomige_sd: 6018 ; AVX512: # %bb.0: 6019 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6020 ; AVX512-NEXT: vucomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1] 6021 ; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 6022 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6023 %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) 6024 ret i32 %res 6025 } 6026 declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone 6027 6028 define i32 @test_mm_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 6029 ; SSE-LABEL: test_mm_ucomigt_sd: 6030 ; SSE: # %bb.0: 6031 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6032 ; SSE-NEXT: ucomisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2e,0xc1] 6033 ; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 6034 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6035 ; 6036 ; AVX1-LABEL: test_mm_ucomigt_sd: 6037 ; AVX1: # %bb.0: 6038 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6039 ; AVX1-NEXT: vucomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2e,0xc1] 6040 ; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 6041 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6042 ; 6043 ; AVX512-LABEL: test_mm_ucomigt_sd: 6044 ; AVX512: # %bb.0: 6045 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6046 ; AVX512-NEXT: vucomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1] 6047 ; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 6048 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6049 %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) 6050 ret i32 %res 6051 } 6052 declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone 6053 6054 define i32 @test_mm_ucomile_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 6055 ; SSE-LABEL: test_mm_ucomile_sd: 6056 ; SSE: # %bb.0: 6057 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6058 ; SSE-NEXT: ucomisd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x2e,0xc8] 6059 ; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 6060 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6061 ; 6062 ; AVX1-LABEL: test_mm_ucomile_sd: 6063 ; AVX1: # %bb.0: 6064 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6065 ; AVX1-NEXT: vucomisd %xmm0, %xmm1 # encoding: [0xc5,0xf9,0x2e,0xc8] 6066 ; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 6067 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6068 ; 6069 ; AVX512-LABEL: test_mm_ucomile_sd: 6070 ; AVX512: # %bb.0: 6071 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6072 ; AVX512-NEXT: vucomisd %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc8] 6073 ; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 6074 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6075 %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) 6076 ret i32 %res 6077 } 6078 declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone 6079 6080 define i32 @test_mm_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 6081 ; SSE-LABEL: test_mm_ucomilt_sd: 6082 ; SSE: # %bb.0: 6083 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6084 ; SSE-NEXT: ucomisd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x2e,0xc8] 6085 ; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 6086 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6087 ; 6088 ; AVX1-LABEL: test_mm_ucomilt_sd: 6089 ; AVX1: # %bb.0: 6090 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6091 ; AVX1-NEXT: vucomisd %xmm0, %xmm1 # encoding: [0xc5,0xf9,0x2e,0xc8] 6092 ; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 6093 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6094 ; 6095 ; AVX512-LABEL: test_mm_ucomilt_sd: 6096 ; AVX512: # %bb.0: 6097 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6098 ; AVX512-NEXT: vucomisd %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc8] 6099 ; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 6100 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6101 %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) 6102 ret i32 %res 6103 } 6104 declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone 6105 6106 define i32 @test_mm_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 6107 ; SSE-LABEL: test_mm_ucomineq_sd: 6108 ; SSE: # %bb.0: 6109 ; SSE-NEXT: ucomisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2e,0xc1] 6110 ; SSE-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 6111 ; SSE-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 6112 ; SSE-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 6113 ; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 6114 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6115 ; 6116 ; AVX1-LABEL: test_mm_ucomineq_sd: 6117 ; AVX1: # %bb.0: 6118 ; AVX1-NEXT: vucomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2e,0xc1] 6119 ; AVX1-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 6120 ; AVX1-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 6121 ; AVX1-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 6122 ; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 6123 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6124 ; 6125 ; AVX512-LABEL: test_mm_ucomineq_sd: 6126 ; AVX512: # %bb.0: 6127 ; AVX512-NEXT: vucomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1] 6128 ; AVX512-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 6129 ; AVX512-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 6130 ; AVX512-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 6131 ; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 6132 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6133 %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) 6134 ret i32 %res 6135 } 6136 declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone 6137 6138 define <2 x double> @test_mm_undefined_pd() { 6139 ; CHECK-LABEL: test_mm_undefined_pd: 6140 ; CHECK: # %bb.0: 6141 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6142 ret <2 x double> undef 6143 } 6144 6145 define <2 x i64> @test_mm_undefined_si128() { 6146 ; CHECK-LABEL: test_mm_undefined_si128: 6147 ; CHECK: # %bb.0: 6148 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6149 ret <2 x i64> undef 6150 } 6151 6152 define <2 x i64> @test_mm_unpackhi_epi8(<2 x i64> %a0, <2 x i64> %a1) { 6153 ; SSE-LABEL: test_mm_unpackhi_epi8: 6154 ; SSE: # %bb.0: 6155 ; SSE-NEXT: punpckhbw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x68,0xc1] 6156 ; SSE-NEXT: # xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 6157 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6158 ; 6159 ; AVX1-LABEL: test_mm_unpackhi_epi8: 6160 ; AVX1: # %bb.0: 6161 ; AVX1-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x68,0xc1] 6162 ; AVX1-NEXT: # xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 6163 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6164 ; 6165 ; AVX512-LABEL: test_mm_unpackhi_epi8: 6166 ; AVX512: # %bb.0: 6167 ; AVX512-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x68,0xc1] 6168 ; AVX512-NEXT: # xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 6169 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6170 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 6171 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 6172 %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 6173 %bc = bitcast <16 x i8> %res to <2 x i64> 6174 ret <2 x i64> %bc 6175 } 6176 6177 define <2 x i64> @test_mm_unpackhi_epi16(<2 x i64> %a0, <2 x i64> %a1) { 6178 ; SSE-LABEL: test_mm_unpackhi_epi16: 6179 ; SSE: # %bb.0: 6180 ; SSE-NEXT: punpckhwd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x69,0xc1] 6181 ; SSE-NEXT: # xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 6182 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6183 ; 6184 ; AVX1-LABEL: test_mm_unpackhi_epi16: 6185 ; AVX1: # %bb.0: 6186 ; AVX1-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x69,0xc1] 6187 ; AVX1-NEXT: # xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 6188 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6189 ; 6190 ; AVX512-LABEL: test_mm_unpackhi_epi16: 6191 ; AVX512: # %bb.0: 6192 ; AVX512-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x69,0xc1] 6193 ; AVX512-NEXT: # xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 6194 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6195 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 6196 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 6197 %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 6198 %bc = bitcast <8 x i16> %res to <2 x i64> 6199 ret <2 x i64> %bc 6200 } 6201 6202 define <2 x i64> @test_mm_unpackhi_epi32(<2 x i64> %a0, <2 x i64> %a1) { 6203 ; SSE-LABEL: test_mm_unpackhi_epi32: 6204 ; SSE: # %bb.0: 6205 ; SSE-NEXT: unpckhps %xmm1, %xmm0 # encoding: [0x0f,0x15,0xc1] 6206 ; SSE-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6207 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6208 ; 6209 ; AVX1-LABEL: test_mm_unpackhi_epi32: 6210 ; AVX1: # %bb.0: 6211 ; AVX1-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x15,0xc1] 6212 ; AVX1-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6213 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6214 ; 6215 ; AVX512-LABEL: test_mm_unpackhi_epi32: 6216 ; AVX512: # %bb.0: 6217 ; AVX512-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1] 6218 ; AVX512-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6219 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6220 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 6221 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 6222 %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 6223 %bc = bitcast <4 x i32> %res to <2 x i64> 6224 ret <2 x i64> %bc 6225 } 6226 6227 define <2 x i64> @test_mm_unpackhi_epi64(<2 x i64> %a0, <2 x i64> %a1) { 6228 ; SSE-LABEL: test_mm_unpackhi_epi64: 6229 ; SSE: # %bb.0: 6230 ; SSE-NEXT: unpckhpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x15,0xc1] 6231 ; SSE-NEXT: # xmm0 = xmm0[1],xmm1[1] 6232 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6233 ; 6234 ; AVX1-LABEL: test_mm_unpackhi_epi64: 6235 ; AVX1: # %bb.0: 6236 ; AVX1-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x15,0xc1] 6237 ; AVX1-NEXT: # xmm0 = xmm0[1],xmm1[1] 6238 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6239 ; 6240 ; AVX512-LABEL: test_mm_unpackhi_epi64: 6241 ; AVX512: # %bb.0: 6242 ; AVX512-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1] 6243 ; AVX512-NEXT: # xmm0 = xmm0[1],xmm1[1] 6244 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6245 %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 1, i32 3> 6246 ret <2 x i64> %res 6247 } 6248 6249 define <2 x double> @test_mm_unpackhi_pd(<2 x double> %a0, <2 x double> %a1) { 6250 ; SSE-LABEL: test_mm_unpackhi_pd: 6251 ; SSE: # %bb.0: 6252 ; SSE-NEXT: unpckhpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x15,0xc1] 6253 ; SSE-NEXT: # xmm0 = xmm0[1],xmm1[1] 6254 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6255 ; 6256 ; AVX1-LABEL: test_mm_unpackhi_pd: 6257 ; AVX1: # %bb.0: 6258 ; AVX1-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x15,0xc1] 6259 ; AVX1-NEXT: # xmm0 = xmm0[1],xmm1[1] 6260 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6261 ; 6262 ; AVX512-LABEL: test_mm_unpackhi_pd: 6263 ; AVX512: # %bb.0: 6264 ; AVX512-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1] 6265 ; AVX512-NEXT: # xmm0 = xmm0[1],xmm1[1] 6266 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6267 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3> 6268 ret <2 x double> %res 6269 } 6270 6271 define <2 x i64> @test_mm_unpacklo_epi8(<2 x i64> %a0, <2 x i64> %a1) { 6272 ; SSE-LABEL: test_mm_unpacklo_epi8: 6273 ; SSE: # %bb.0: 6274 ; SSE-NEXT: punpcklbw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x60,0xc1] 6275 ; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 6276 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6277 ; 6278 ; AVX1-LABEL: test_mm_unpacklo_epi8: 6279 ; AVX1: # %bb.0: 6280 ; AVX1-NEXT: vpunpcklbw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x60,0xc1] 6281 ; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 6282 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6283 ; 6284 ; AVX512-LABEL: test_mm_unpacklo_epi8: 6285 ; AVX512: # %bb.0: 6286 ; AVX512-NEXT: vpunpcklbw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x60,0xc1] 6287 ; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 6288 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6289 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 6290 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 6291 %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 6292 %bc = bitcast <16 x i8> %res to <2 x i64> 6293 ret <2 x i64> %bc 6294 } 6295 6296 define <2 x i64> @test_mm_unpacklo_epi16(<2 x i64> %a0, <2 x i64> %a1) { 6297 ; SSE-LABEL: test_mm_unpacklo_epi16: 6298 ; SSE: # %bb.0: 6299 ; SSE-NEXT: punpcklwd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x61,0xc1] 6300 ; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6301 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6302 ; 6303 ; AVX1-LABEL: test_mm_unpacklo_epi16: 6304 ; AVX1: # %bb.0: 6305 ; AVX1-NEXT: vpunpcklwd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x61,0xc1] 6306 ; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6307 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6308 ; 6309 ; AVX512-LABEL: test_mm_unpacklo_epi16: 6310 ; AVX512: # %bb.0: 6311 ; AVX512-NEXT: vpunpcklwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x61,0xc1] 6312 ; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6313 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6314 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 6315 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 6316 %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 6317 %bc = bitcast <8 x i16> %res to <2 x i64> 6318 ret <2 x i64> %bc 6319 } 6320 6321 define <2 x i64> @test_mm_unpacklo_epi32(<2 x i64> %a0, <2 x i64> %a1) { 6322 ; SSE-LABEL: test_mm_unpacklo_epi32: 6323 ; SSE: # %bb.0: 6324 ; SSE-NEXT: unpcklps %xmm1, %xmm0 # encoding: [0x0f,0x14,0xc1] 6325 ; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 6326 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6327 ; 6328 ; AVX1-LABEL: test_mm_unpacklo_epi32: 6329 ; AVX1: # %bb.0: 6330 ; AVX1-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x14,0xc1] 6331 ; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 6332 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6333 ; 6334 ; AVX512-LABEL: test_mm_unpacklo_epi32: 6335 ; AVX512: # %bb.0: 6336 ; AVX512-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xc1] 6337 ; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 6338 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6339 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 6340 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 6341 %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 6342 %bc = bitcast <4 x i32> %res to <2 x i64> 6343 ret <2 x i64> %bc 6344 } 6345 6346 define <2 x i64> @test_mm_unpacklo_epi64(<2 x i64> %a0, <2 x i64> %a1) { 6347 ; SSE-LABEL: test_mm_unpacklo_epi64: 6348 ; SSE: # %bb.0: 6349 ; SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 6350 ; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 6351 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6352 ; 6353 ; AVX1-LABEL: test_mm_unpacklo_epi64: 6354 ; AVX1: # %bb.0: 6355 ; AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1] 6356 ; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0] 6357 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6358 ; 6359 ; AVX512-LABEL: test_mm_unpacklo_epi64: 6360 ; AVX512: # %bb.0: 6361 ; AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1] 6362 ; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0] 6363 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6364 %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2> 6365 ret <2 x i64> %res 6366 } 6367 6368 define <2 x double> @test_mm_unpacklo_pd(<2 x double> %a0, <2 x double> %a1) { 6369 ; SSE-LABEL: test_mm_unpacklo_pd: 6370 ; SSE: # %bb.0: 6371 ; SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 6372 ; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 6373 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6374 ; 6375 ; AVX1-LABEL: test_mm_unpacklo_pd: 6376 ; AVX1: # %bb.0: 6377 ; AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1] 6378 ; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0] 6379 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6380 ; 6381 ; AVX512-LABEL: test_mm_unpacklo_pd: 6382 ; AVX512: # %bb.0: 6383 ; AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1] 6384 ; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0] 6385 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6386 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2> 6387 ret <2 x double> %res 6388 } 6389 6390 define <2 x double> @test_mm_xor_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 6391 ; SSE-LABEL: test_mm_xor_pd: 6392 ; SSE: # %bb.0: 6393 ; SSE-NEXT: xorps %xmm1, %xmm0 # encoding: [0x0f,0x57,0xc1] 6394 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6395 ; 6396 ; AVX1-LABEL: test_mm_xor_pd: 6397 ; AVX1: # %bb.0: 6398 ; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc1] 6399 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6400 ; 6401 ; AVX512-LABEL: test_mm_xor_pd: 6402 ; AVX512: # %bb.0: 6403 ; AVX512-NEXT: vxorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc1] 6404 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6405 %arg0 = bitcast <2 x double> %a0 to <4 x i32> 6406 %arg1 = bitcast <2 x double> %a1 to <4 x i32> 6407 %res = xor <4 x i32> %arg0, %arg1 6408 %bc = bitcast <4 x i32> %res to <2 x double> 6409 ret <2 x double> %bc 6410 } 6411 6412 define <2 x i64> @test_mm_xor_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind { 6413 ; SSE-LABEL: test_mm_xor_si128: 6414 ; SSE: # %bb.0: 6415 ; SSE-NEXT: xorps %xmm1, %xmm0 # encoding: [0x0f,0x57,0xc1] 6416 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6417 ; 6418 ; AVX1-LABEL: test_mm_xor_si128: 6419 ; AVX1: # %bb.0: 6420 ; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc1] 6421 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6422 ; 6423 ; AVX512-LABEL: test_mm_xor_si128: 6424 ; AVX512: # %bb.0: 6425 ; AVX512-NEXT: vxorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc1] 6426 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6427 %res = xor <2 x i64> %a0, %a1 6428 ret <2 x i64> %res 6429 } 6430 6431 !0 = !{i32 1} 6432 6433