1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=core-avx2 -mattr=avx2 | FileCheck %s 2 3 define <16 x i16> @test_x86_avx2_packssdw(<8 x i32> %a0, <8 x i32> %a1) { 4 ; CHECK: vpackssdw 5 %res = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> %a1) ; <<16 x i16>> [#uses=1] 6 ret <16 x i16> %res 7 } 8 declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone 9 10 11 define <32 x i8> @test_x86_avx2_packsswb(<16 x i16> %a0, <16 x i16> %a1) { 12 ; CHECK: vpacksswb 13 %res = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a0, <16 x i16> %a1) ; <<32 x i8>> [#uses=1] 14 ret <32 x i8> %res 15 } 16 declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone 17 18 19 define <32 x i8> @test_x86_avx2_packuswb(<16 x i16> %a0, <16 x i16> %a1) { 20 ; CHECK: vpackuswb 21 %res = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a0, <16 x i16> %a1) ; <<32 x i8>> [#uses=1] 22 ret <32 x i8> %res 23 } 24 declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone 25 26 27 define <32 x i8> @test_x86_avx2_padds_b(<32 x i8> %a0, <32 x i8> %a1) { 28 ; CHECK: vpaddsb 29 %res = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 30 ret <32 x i8> %res 31 } 32 declare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) nounwind readnone 33 34 35 define <16 x i16> @test_x86_avx2_padds_w(<16 x i16> %a0, <16 x i16> %a1) { 36 ; CHECK: vpaddsw 37 %res = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 38 ret <16 x i16> %res 39 } 40 declare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) nounwind readnone 41 42 43 define <32 x i8> @test_x86_avx2_paddus_b(<32 x i8> %a0, <32 x i8> %a1) { 44 ; CHECK: vpaddusb 45 %res = call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 46 ret <32 x i8> %res 47 } 48 declare <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8>, <32 x i8>) nounwind readnone 49 50 51 define <16 x i16> @test_x86_avx2_paddus_w(<16 x i16> %a0, <16 x i16> %a1) { 52 ; CHECK: vpaddusw 53 %res = call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 54 ret <16 x i16> %res 55 } 56 declare <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16>, <16 x i16>) nounwind readnone 57 58 59 define <32 x i8> @test_x86_avx2_pavg_b(<32 x i8> %a0, <32 x i8> %a1) { 60 ; CHECK: vpavgb 61 %res = call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 62 ret <32 x i8> %res 63 } 64 declare <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8>, <32 x i8>) nounwind readnone 65 66 67 define <16 x i16> @test_x86_avx2_pavg_w(<16 x i16> %a0, <16 x i16> %a1) { 68 ; CHECK: vpavgw 69 %res = call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 70 ret <16 x i16> %res 71 } 72 declare <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16>, <16 x i16>) nounwind readnone 73 74 75 define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) { 76 ; CHECK: vpmaddwd 77 %res = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> %a1) ; <<8 x i32>> [#uses=1] 78 ret <8 x i32> %res 79 } 80 declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) nounwind readnone 81 82 83 define <16 x i16> @test_x86_avx2_pmaxs_w(<16 x i16> %a0, <16 x i16> %a1) { 84 ; CHECK: vpmaxsw 85 %res = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 86 ret <16 x i16> %res 87 } 88 declare <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16>, <16 x i16>) nounwind readnone 89 90 91 define <32 x i8> @test_x86_avx2_pmaxu_b(<32 x i8> %a0, <32 x i8> %a1) { 92 ; CHECK: vpmaxub 93 %res = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 94 ret <32 x i8> %res 95 } 96 declare <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8>, <32 x i8>) nounwind readnone 97 98 99 define <16 x i16> @test_x86_avx2_pmins_w(<16 x i16> %a0, <16 x i16> %a1) { 100 ; CHECK: vpminsw 101 %res = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 102 ret <16 x i16> %res 103 } 104 declare <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16>, <16 x i16>) nounwind readnone 105 106 107 define <32 x i8> @test_x86_avx2_pminu_b(<32 x i8> %a0, <32 x i8> %a1) { 108 ; CHECK: vpminub 109 %res = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 110 ret <32 x i8> %res 111 } 112 declare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone 113 114 115 define i32 @test_x86_avx2_pmovmskb(<32 x i8> %a0) { 116 ; CHECK: vpmovmskb 117 %res = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %a0) ; <i32> [#uses=1] 118 ret i32 %res 119 } 120 declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>) nounwind readnone 121 122 123 define <16 x i16> @test_x86_avx2_pmulh_w(<16 x i16> %a0, <16 x i16> %a1) { 124 ; CHECK: vpmulhw 125 %res = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 126 ret <16 x i16> %res 127 } 128 declare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>) nounwind readnone 129 130 131 define <16 x i16> @test_x86_avx2_pmulhu_w(<16 x i16> %a0, <16 x i16> %a1) { 132 ; CHECK: vpmulhuw 133 %res = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 134 ret <16 x i16> %res 135 } 136 declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) nounwind readnone 137 138 139 define <4 x i64> @test_x86_avx2_pmulu_dq(<8 x i32> %a0, <8 x i32> %a1) { 140 ; CHECK: vpmuludq 141 %res = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i64>> [#uses=1] 142 ret <4 x i64> %res 143 } 144 declare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone 145 146 147 define <4 x i64> @test_x86_avx2_psad_bw(<32 x i8> %a0, <32 x i8> %a1) { 148 ; CHECK: vpsadbw 149 %res = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %a0, <32 x i8> %a1) ; <<4 x i64>> [#uses=1] 150 ret <4 x i64> %res 151 } 152 declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone 153 154 155 define <8 x i32> @test_x86_avx2_psll_d(<8 x i32> %a0, <4 x i32> %a1) { 156 ; CHECK: vpslld 157 %res = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1] 158 ret <8 x i32> %res 159 } 160 declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone 161 162 163 define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) { 164 ; CHECK: vpslldq 165 %res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] 166 ret <4 x i64> %res 167 } 168 declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone 169 170 171 define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) { 172 ; CHECK: vpslldq 173 %res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] 174 ret <4 x i64> %res 175 } 176 declare <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64>, i32) nounwind readnone 177 178 179 define <4 x i64> @test_x86_avx2_psll_q(<4 x i64> %a0, <2 x i64> %a1) { 180 ; CHECK: vpsllq 181 %res = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1] 182 ret <4 x i64> %res 183 } 184 declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) nounwind readnone 185 186 187 define <16 x i16> @test_x86_avx2_psll_w(<16 x i16> %a0, <8 x i16> %a1) { 188 ; CHECK: vpsllw 189 %res = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1] 190 ret <16 x i16> %res 191 } 192 declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) nounwind readnone 193 194 195 define <8 x i32> @test_x86_avx2_pslli_d(<8 x i32> %a0) { 196 ; CHECK: vpslld 197 %res = call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1] 198 ret <8 x i32> %res 199 } 200 declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) nounwind readnone 201 202 203 define <4 x i64> @test_x86_avx2_pslli_q(<4 x i64> %a0) { 204 ; CHECK: vpsllq 205 %res = call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] 206 ret <4 x i64> %res 207 } 208 declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) nounwind readnone 209 210 211 define <16 x i16> @test_x86_avx2_pslli_w(<16 x i16> %a0) { 212 ; CHECK: vpsllw 213 %res = call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1] 214 ret <16 x i16> %res 215 } 216 declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) nounwind readnone 217 218 219 define <8 x i32> @test_x86_avx2_psra_d(<8 x i32> %a0, <4 x i32> %a1) { 220 ; CHECK: vpsrad 221 %res = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1] 222 ret <8 x i32> %res 223 } 224 declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) nounwind readnone 225 226 227 define <16 x i16> @test_x86_avx2_psra_w(<16 x i16> %a0, <8 x i16> %a1) { 228 ; CHECK: vpsraw 229 %res = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1] 230 ret <16 x i16> %res 231 } 232 declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) nounwind readnone 233 234 235 define <8 x i32> @test_x86_avx2_psrai_d(<8 x i32> %a0) { 236 ; CHECK: vpsrad 237 %res = call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1] 238 ret <8 x i32> %res 239 } 240 declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) nounwind readnone 241 242 243 define <16 x i16> @test_x86_avx2_psrai_w(<16 x i16> %a0) { 244 ; CHECK: vpsraw 245 %res = call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1] 246 ret <16 x i16> %res 247 } 248 declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) nounwind readnone 249 250 251 define <8 x i32> @test_x86_avx2_psrl_d(<8 x i32> %a0, <4 x i32> %a1) { 252 ; CHECK: vpsrld 253 %res = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1] 254 ret <8 x i32> %res 255 } 256 declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone 257 258 259 define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) { 260 ; CHECK: vpsrldq 261 %res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] 262 ret <4 x i64> %res 263 } 264 declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone 265 266 267 define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) { 268 ; CHECK: vpsrldq 269 %res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] 270 ret <4 x i64> %res 271 } 272 declare <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64>, i32) nounwind readnone 273 274 275 define <4 x i64> @test_x86_avx2_psrl_q(<4 x i64> %a0, <2 x i64> %a1) { 276 ; CHECK: vpsrlq 277 %res = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1] 278 ret <4 x i64> %res 279 } 280 declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) nounwind readnone 281 282 283 define <16 x i16> @test_x86_avx2_psrl_w(<16 x i16> %a0, <8 x i16> %a1) { 284 ; CHECK: vpsrlw 285 %res = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1] 286 ret <16 x i16> %res 287 } 288 declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) nounwind readnone 289 290 291 define <8 x i32> @test_x86_avx2_psrli_d(<8 x i32> %a0) { 292 ; CHECK: vpsrld 293 %res = call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1] 294 ret <8 x i32> %res 295 } 296 declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) nounwind readnone 297 298 299 define <4 x i64> @test_x86_avx2_psrli_q(<4 x i64> %a0) { 300 ; CHECK: vpsrlq 301 %res = call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] 302 ret <4 x i64> %res 303 } 304 declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) nounwind readnone 305 306 307 define <16 x i16> @test_x86_avx2_psrli_w(<16 x i16> %a0) { 308 ; CHECK: vpsrlw 309 %res = call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1] 310 ret <16 x i16> %res 311 } 312 declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) nounwind readnone 313 314 315 define <32 x i8> @test_x86_avx2_psubs_b(<32 x i8> %a0, <32 x i8> %a1) { 316 ; CHECK: vpsubsb 317 %res = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 318 ret <32 x i8> %res 319 } 320 declare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) nounwind readnone 321 322 323 define <16 x i16> @test_x86_avx2_psubs_w(<16 x i16> %a0, <16 x i16> %a1) { 324 ; CHECK: vpsubsw 325 %res = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 326 ret <16 x i16> %res 327 } 328 declare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) nounwind readnone 329 330 331 define <32 x i8> @test_x86_avx2_psubus_b(<32 x i8> %a0, <32 x i8> %a1) { 332 ; CHECK: vpsubusb 333 %res = call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 334 ret <32 x i8> %res 335 } 336 declare <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8>, <32 x i8>) nounwind readnone 337 338 339 define <16 x i16> @test_x86_avx2_psubus_w(<16 x i16> %a0, <16 x i16> %a1) { 340 ; CHECK: vpsubusw 341 %res = call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 342 ret <16 x i16> %res 343 } 344 declare <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16>, <16 x i16>) nounwind readnone 345 346 347 define <32 x i8> @test_x86_avx2_pabs_b(<32 x i8> %a0) { 348 ; CHECK: vpabsb 349 %res = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0) ; <<32 x i8>> [#uses=1] 350 ret <32 x i8> %res 351 } 352 declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone 353 354 355 define <8 x i32> @test_x86_avx2_pabs_d(<8 x i32> %a0) { 356 ; CHECK: vpabsd 357 %res = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0) ; <<8 x i32>> [#uses=1] 358 ret <8 x i32> %res 359 } 360 declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone 361 362 363 define <16 x i16> @test_x86_avx2_pabs_w(<16 x i16> %a0) { 364 ; CHECK: vpabsw 365 %res = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0) ; <<16 x i16>> [#uses=1] 366 ret <16 x i16> %res 367 } 368 declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone 369 370 371 define <8 x i32> @test_x86_avx2_phadd_d(<8 x i32> %a0, <8 x i32> %a1) { 372 ; CHECK: vphaddd 373 %res = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 374 ret <8 x i32> %res 375 } 376 declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) nounwind readnone 377 378 379 define <16 x i16> @test_x86_avx2_phadd_sw(<16 x i16> %a0, <16 x i16> %a1) { 380 ; CHECK: vphaddsw 381 %res = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 382 ret <16 x i16> %res 383 } 384 declare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) nounwind readnone 385 386 387 define <16 x i16> @test_x86_avx2_phadd_w(<16 x i16> %a0, <16 x i16> %a1) { 388 ; CHECK: vphaddw 389 %res = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 390 ret <16 x i16> %res 391 } 392 declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) nounwind readnone 393 394 395 define <8 x i32> @test_x86_avx2_phsub_d(<8 x i32> %a0, <8 x i32> %a1) { 396 ; CHECK: vphsubd 397 %res = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 398 ret <8 x i32> %res 399 } 400 declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) nounwind readnone 401 402 403 define <16 x i16> @test_x86_avx2_phsub_sw(<16 x i16> %a0, <16 x i16> %a1) { 404 ; CHECK: vphsubsw 405 %res = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 406 ret <16 x i16> %res 407 } 408 declare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) nounwind readnone 409 410 411 define <16 x i16> @test_x86_avx2_phsub_w(<16 x i16> %a0, <16 x i16> %a1) { 412 ; CHECK: vphsubw 413 %res = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 414 ret <16 x i16> %res 415 } 416 declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) nounwind readnone 417 418 419 define <16 x i16> @test_x86_avx2_pmadd_ub_sw(<32 x i8> %a0, <32 x i8> %a1) { 420 ; CHECK: vpmaddubsw 421 %res = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i16>> [#uses=1] 422 ret <16 x i16> %res 423 } 424 declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) nounwind readnone 425 426 427 define <16 x i16> @test_x86_avx2_pmul_hr_sw(<16 x i16> %a0, <16 x i16> %a1) { 428 ; CHECK: vpmulhrsw 429 %res = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 430 ret <16 x i16> %res 431 } 432 declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>) nounwind readnone 433 434 435 define <32 x i8> @test_x86_avx2_pshuf_b(<32 x i8> %a0, <32 x i8> %a1) { 436 ; CHECK: vpshufb 437 %res = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i8>> [#uses=1] 438 ret <32 x i8> %res 439 } 440 declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) nounwind readnone 441 442 443 define <32 x i8> @test_x86_avx2_psign_b(<32 x i8> %a0, <32 x i8> %a1) { 444 ; CHECK: vpsignb 445 %res = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 446 ret <32 x i8> %res 447 } 448 declare <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8>, <32 x i8>) nounwind readnone 449 450 451 define <8 x i32> @test_x86_avx2_psign_d(<8 x i32> %a0, <8 x i32> %a1) { 452 ; CHECK: vpsignd 453 %res = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i32>> [#uses=1] 454 ret <8 x i32> %res 455 } 456 declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) nounwind readnone 457 458 459 define <16 x i16> @test_x86_avx2_psign_w(<16 x i16> %a0, <16 x i16> %a1) { 460 ; CHECK: vpsignw 461 %res = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 462 ret <16 x i16> %res 463 } 464 declare <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16>, <16 x i16>) nounwind readnone 465 466 467 define <4 x i64> @test_x86_avx2_movntdqa(i8* %a0) { 468 ; CHECK: movl 469 ; CHECK: vmovntdqa 470 %res = call <4 x i64> @llvm.x86.avx2.movntdqa(i8* %a0) ; <<4 x i64>> [#uses=1] 471 ret <4 x i64> %res 472 } 473 declare <4 x i64> @llvm.x86.avx2.movntdqa(i8*) nounwind readonly 474 475 476 define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) { 477 ; CHECK: vmpsadbw 478 %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i32 7) ; <<16 x i16>> [#uses=1] 479 ret <16 x i16> %res 480 } 481 declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i32) nounwind readnone 482 483 484 define <16 x i16> @test_x86_avx2_packusdw(<8 x i32> %a0, <8 x i32> %a1) { 485 ; CHECK: vpackusdw 486 %res = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> %a1) ; <<16 x i16>> [#uses=1] 487 ret <16 x i16> %res 488 } 489 declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone 490 491 492 define <32 x i8> @test_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) { 493 ; CHECK: vpblendvb 494 %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) ; <<32 x i8>> [#uses=1] 495 ret <32 x i8> %res 496 } 497 declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) nounwind readnone 498 499 500 define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) { 501 ; CHECK: vpblendw 502 %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 7) ; <<16 x i16>> [#uses=1] 503 ret <16 x i16> %res 504 } 505 declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i32) nounwind readnone 506 507 508 define <32 x i8> @test_x86_avx2_pmaxsb(<32 x i8> %a0, <32 x i8> %a1) { 509 ; CHECK: vpmaxsb 510 %res = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 511 ret <32 x i8> %res 512 } 513 declare <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8>, <32 x i8>) nounwind readnone 514 515 516 define <8 x i32> @test_x86_avx2_pmaxsd(<8 x i32> %a0, <8 x i32> %a1) { 517 ; CHECK: vpmaxsd 518 %res = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 519 ret <8 x i32> %res 520 } 521 declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readnone 522 523 524 define <8 x i32> @test_x86_avx2_pmaxud(<8 x i32> %a0, <8 x i32> %a1) { 525 ; CHECK: vpmaxud 526 %res = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 527 ret <8 x i32> %res 528 } 529 declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readnone 530 531 532 define <16 x i16> @test_x86_avx2_pmaxuw(<16 x i16> %a0, <16 x i16> %a1) { 533 ; CHECK: vpmaxuw 534 %res = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 535 ret <16 x i16> %res 536 } 537 declare <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16>, <16 x i16>) nounwind readnone 538 539 540 define <32 x i8> @test_x86_avx2_pminsb(<32 x i8> %a0, <32 x i8> %a1) { 541 ; CHECK: vpminsb 542 %res = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 543 ret <32 x i8> %res 544 } 545 declare <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8>, <32 x i8>) nounwind readnone 546 547 548 define <8 x i32> @test_x86_avx2_pminsd(<8 x i32> %a0, <8 x i32> %a1) { 549 ; CHECK: vpminsd 550 %res = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 551 ret <8 x i32> %res 552 } 553 declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readnone 554 555 556 define <8 x i32> @test_x86_avx2_pminud(<8 x i32> %a0, <8 x i32> %a1) { 557 ; CHECK: vpminud 558 %res = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 559 ret <8 x i32> %res 560 } 561 declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readnone 562 563 564 define <16 x i16> @test_x86_avx2_pminuw(<16 x i16> %a0, <16 x i16> %a1) { 565 ; CHECK: vpminuw 566 %res = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 567 ret <16 x i16> %res 568 } 569 declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone 570 571 572 define <8 x i32> @test_x86_avx2_pmovsxbd(<16 x i8> %a0) { 573 ; CHECK: vpmovsxbd 574 %res = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1] 575 ret <8 x i32> %res 576 } 577 declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>) nounwind readnone 578 579 580 define <4 x i64> @test_x86_avx2_pmovsxbq(<16 x i8> %a0) { 581 ; CHECK: vpmovsxbq 582 %res = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1] 583 ret <4 x i64> %res 584 } 585 declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>) nounwind readnone 586 587 588 define <16 x i16> @test_x86_avx2_pmovsxbw(<16 x i8> %a0) { 589 ; CHECK: vpmovsxbw 590 %res = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] 591 ret <16 x i16> %res 592 } 593 declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>) nounwind readnone 594 595 596 define <4 x i64> @test_x86_avx2_pmovsxdq(<4 x i32> %a0) { 597 ; CHECK: vpmovsxdq 598 %res = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1] 599 ret <4 x i64> %res 600 } 601 declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>) nounwind readnone 602 603 604 define <8 x i32> @test_x86_avx2_pmovsxwd(<8 x i16> %a0) { 605 ; CHECK: vpmovsxwd 606 %res = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1] 607 ret <8 x i32> %res 608 } 609 declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>) nounwind readnone 610 611 612 define <4 x i64> @test_x86_avx2_pmovsxwq(<8 x i16> %a0) { 613 ; CHECK: vpmovsxwq 614 %res = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1] 615 ret <4 x i64> %res 616 } 617 declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>) nounwind readnone 618 619 620 define <8 x i32> @test_x86_avx2_pmovzxbd(<16 x i8> %a0) { 621 ; CHECK: vpmovzxbd 622 %res = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1] 623 ret <8 x i32> %res 624 } 625 declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>) nounwind readnone 626 627 628 define <4 x i64> @test_x86_avx2_pmovzxbq(<16 x i8> %a0) { 629 ; CHECK: vpmovzxbq 630 %res = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1] 631 ret <4 x i64> %res 632 } 633 declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>) nounwind readnone 634 635 636 define <16 x i16> @test_x86_avx2_pmovzxbw(<16 x i8> %a0) { 637 ; CHECK: vpmovzxbw 638 %res = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %a0) ; <<16 x i16>> [#uses=1] 639 ret <16 x i16> %res 640 } 641 declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>) nounwind readnone 642 643 644 define <4 x i64> @test_x86_avx2_pmovzxdq(<4 x i32> %a0) { 645 ; CHECK: vpmovzxdq 646 %res = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1] 647 ret <4 x i64> %res 648 } 649 declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>) nounwind readnone 650 651 652 define <8 x i32> @test_x86_avx2_pmovzxwd(<8 x i16> %a0) { 653 ; CHECK: vpmovzxwd 654 %res = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1] 655 ret <8 x i32> %res 656 } 657 declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>) nounwind readnone 658 659 660 define <4 x i64> @test_x86_avx2_pmovzxwq(<8 x i16> %a0) { 661 ; CHECK: vpmovzxwq 662 %res = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1] 663 ret <4 x i64> %res 664 } 665 declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone 666 667 668 define <4 x i64> @test_x86_avx2_pmul.dq(<8 x i32> %a0, <8 x i32> %a1) { 669 ; CHECK: vpmuldq 670 %res = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> %a1) ; <<2 x i64>> [#uses=1] 671 ret <4 x i64> %res 672 } 673 declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone 674 675 676 define <4 x i64> @test_x86_avx2_vbroadcasti128(i8* %a0) { 677 ; CHECK: vbroadcasti128 678 %res = call <4 x i64> @llvm.x86.avx2.vbroadcasti128(i8* %a0) ; <<4 x i64>> [#uses=1] 679 ret <4 x i64> %res 680 } 681 declare <4 x i64> @llvm.x86.avx2.vbroadcasti128(i8*) nounwind readonly 682 683 define <4 x double> @test_x86_avx2_vbroadcast_sd_pd_256(<2 x double> %a0) { 684 ; CHECK: vbroadcastsd 685 %res = call <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double> %a0) ; <<4 x double>> [#uses=1] 686 ret <4 x double> %res 687 } 688 declare <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double>) nounwind readonly 689 690 691 define <4 x float> @test_x86_avx2_vbroadcast_ss_ps(<4 x float> %a0) { 692 ; CHECK: vbroadcastss 693 %res = call <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] 694 ret <4 x float> %res 695 } 696 declare <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float>) nounwind readonly 697 698 699 define <8 x float> @test_x86_avx2_vbroadcast_ss_ps_256(<4 x float> %a0) { 700 ; CHECK: vbroadcastss 701 %res = call <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float> %a0) ; <<8 x float>> [#uses=1] 702 ret <8 x float> %res 703 } 704 declare <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float>) nounwind readonly 705 706 707 define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) { 708 ; CHECK: vpblendd 709 %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i32 7) ; <<4 x i32>> [#uses=1] 710 ret <4 x i32> %res 711 } 712 declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i32) nounwind readnone 713 714 715 define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) { 716 ; CHECK: vpblendd 717 %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 7) ; <<8 x i32>> [#uses=1] 718 ret <8 x i32> %res 719 } 720 declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i32) nounwind readnone 721 722 723 define <16 x i8> @test_x86_avx2_pbroadcastb_128(<16 x i8> %a0) { 724 ; CHECK: vpbroadcastb 725 %res = call <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1] 726 ret <16 x i8> %res 727 } 728 declare <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8>) nounwind readonly 729 730 731 define <32 x i8> @test_x86_avx2_pbroadcastb_256(<16 x i8> %a0) { 732 ; CHECK: vpbroadcastb 733 %res = call <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8> %a0) ; <<32 x i8>> [#uses=1] 734 ret <32 x i8> %res 735 } 736 declare <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8>) nounwind readonly 737 738 739 define <8 x i16> @test_x86_avx2_pbroadcastw_128(<8 x i16> %a0) { 740 ; CHECK: vpbroadcastw 741 %res = call <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] 742 ret <8 x i16> %res 743 } 744 declare <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16>) nounwind readonly 745 746 747 define <16 x i16> @test_x86_avx2_pbroadcastw_256(<8 x i16> %a0) { 748 ; CHECK: vpbroadcastw 749 %res = call <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16> %a0) ; <<16 x i16>> [#uses=1] 750 ret <16 x i16> %res 751 } 752 declare <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16>) nounwind readonly 753 754 755 define <4 x i32> @test_x86_avx2_pbroadcastd_128(<4 x i32> %a0) { 756 ; CHECK: vpbroadcastd 757 %res = call <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1] 758 ret <4 x i32> %res 759 } 760 declare <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32>) nounwind readonly 761 762 763 define <8 x i32> @test_x86_avx2_pbroadcastd_256(<4 x i32> %a0) { 764 ; CHECK: vpbroadcastd 765 %res = call <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32> %a0) ; <<8 x i32>> [#uses=1] 766 ret <8 x i32> %res 767 } 768 declare <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32>) nounwind readonly 769 770 771 define <2 x i64> @test_x86_avx2_pbroadcastq_128(<2 x i64> %a0) { 772 ; CHECK: vpbroadcastq 773 %res = call <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64> %a0) ; <<2 x i64>> [#uses=1] 774 ret <2 x i64> %res 775 } 776 declare <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64>) nounwind readonly 777 778 779 define <4 x i64> @test_x86_avx2_pbroadcastq_256(<2 x i64> %a0) { 780 ; CHECK: vpbroadcastq 781 %res = call <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64> %a0) ; <<4 x i64>> [#uses=1] 782 ret <4 x i64> %res 783 } 784 declare <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64>) nounwind readonly 785 786 787 define <8 x i32> @test_x86_avx2_permd(<8 x i32> %a0, <8 x i32> %a1) { 788 ; CHECK: vpermd 789 %res = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 790 ret <8 x i32> %res 791 } 792 declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly 793 794 795 define <8 x float> @test_x86_avx2_permps(<8 x float> %a0, <8 x float> %a1) { 796 ; CHECK: vpermps 797 %res = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 798 ret <8 x float> %res 799 } 800 declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x float>) nounwind readonly 801 802 803 define <4 x i64> @test_x86_avx2_vperm2i128(<4 x i64> %a0, <4 x i64> %a1) { 804 ; CHECK: vperm2i128 805 %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 1) ; <<4 x i64>> [#uses=1] 806 ret <4 x i64> %res 807 } 808 declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readonly 809 810 811 define <2 x i64> @test_x86_avx2_vextracti128(<4 x i64> %a0) { 812 ; CHECK: vextracti128 813 %res = call <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1] 814 ret <2 x i64> %res 815 } 816 declare <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64>, i8) nounwind readnone 817 818 819 define <4 x i64> @test_x86_avx2_vinserti128(<4 x i64> %a0, <2 x i64> %a1) { 820 ; CHECK: vinserti128 821 %res = call <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64> %a0, <2 x i64> %a1, i8 7) ; <<4 x i64>> [#uses=1] 822 ret <4 x i64> %res 823 } 824 declare <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64>, <2 x i64>, i8) nounwind readnone 825 826 827 define <2 x i64> @test_x86_avx2_maskload_q(i8* %a0, <2 x i64> %a1) { 828 ; CHECK: vpmaskmovq 829 %res = call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 830 ret <2 x i64> %res 831 } 832 declare <2 x i64> @llvm.x86.avx2.maskload.q(i8*, <2 x i64>) nounwind readonly 833 834 835 define <4 x i64> @test_x86_avx2_maskload_q_256(i8* %a0, <4 x i64> %a1) { 836 ; CHECK: vpmaskmovq 837 %res = call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1] 838 ret <4 x i64> %res 839 } 840 declare <4 x i64> @llvm.x86.avx2.maskload.q.256(i8*, <4 x i64>) nounwind readonly 841 842 843 define <4 x i32> @test_x86_avx2_maskload_d(i8* %a0, <4 x i32> %a1) { 844 ; CHECK: vpmaskmovd 845 %res = call <4 x i32> @llvm.x86.avx2.maskload.d(i8* %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 846 ret <4 x i32> %res 847 } 848 declare <4 x i32> @llvm.x86.avx2.maskload.d(i8*, <4 x i32>) nounwind readonly 849 850 851 define <8 x i32> @test_x86_avx2_maskload_d_256(i8* %a0, <8 x i32> %a1) { 852 ; CHECK: vpmaskmovd 853 %res = call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 854 ret <8 x i32> %res 855 } 856 declare <8 x i32> @llvm.x86.avx2.maskload.d.256(i8*, <8 x i32>) nounwind readonly 857 858 859 define void @test_x86_avx2_maskstore_q(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) { 860 ; CHECK: vpmaskmovq 861 call void @llvm.x86.avx2.maskstore.q(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) 862 ret void 863 } 864 declare void @llvm.x86.avx2.maskstore.q(i8*, <2 x i64>, <2 x i64>) nounwind 865 866 867 define void @test_x86_avx2_maskstore_q_256(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) { 868 ; CHECK: vpmaskmovq 869 call void @llvm.x86.avx2.maskstore.q.256(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) 870 ret void 871 } 872 declare void @llvm.x86.avx2.maskstore.q.256(i8*, <4 x i64>, <4 x i64>) nounwind 873 874 875 define void @test_x86_avx2_maskstore_d(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) { 876 ; CHECK: vpmaskmovd 877 call void @llvm.x86.avx2.maskstore.d(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) 878 ret void 879 } 880 declare void @llvm.x86.avx2.maskstore.d(i8*, <4 x i32>, <4 x i32>) nounwind 881 882 883 define void @test_x86_avx2_maskstore_d_256(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) { 884 ; CHECK: vpmaskmovd 885 call void @llvm.x86.avx2.maskstore.d.256(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) 886 ret void 887 } 888 declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>) nounwind 889 890 891 define <4 x i32> @test_x86_avx2_psllv_d(<4 x i32> %a0, <4 x i32> %a1) { 892 ; CHECK: vpsllvd 893 %res = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 894 ret <4 x i32> %res 895 } 896 declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) nounwind readnone 897 898 899 define <8 x i32> @test_x86_avx2_psllv_d_256(<8 x i32> %a0, <8 x i32> %a1) { 900 ; CHECK: vpsllvd 901 %res = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 902 ret <8 x i32> %res 903 } 904 declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) nounwind readnone 905 906 907 define <2 x i64> @test_x86_avx2_psllv_q(<2 x i64> %a0, <2 x i64> %a1) { 908 ; CHECK: vpsllvq 909 %res = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 910 ret <2 x i64> %res 911 } 912 declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) nounwind readnone 913 914 915 define <4 x i64> @test_x86_avx2_psllv_q_256(<4 x i64> %a0, <4 x i64> %a1) { 916 ; CHECK: vpsllvq 917 %res = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1] 918 ret <4 x i64> %res 919 } 920 declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) nounwind readnone 921 922 923 define <4 x i32> @test_x86_avx2_psrlv_d(<4 x i32> %a0, <4 x i32> %a1) { 924 ; CHECK: vpsrlvd 925 %res = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 926 ret <4 x i32> %res 927 } 928 declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) nounwind readnone 929 930 931 define <8 x i32> @test_x86_avx2_psrlv_d_256(<8 x i32> %a0, <8 x i32> %a1) { 932 ; CHECK: vpsrlvd 933 %res = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 934 ret <8 x i32> %res 935 } 936 declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) nounwind readnone 937 938 939 define <2 x i64> @test_x86_avx2_psrlv_q(<2 x i64> %a0, <2 x i64> %a1) { 940 ; CHECK: vpsrlvq 941 %res = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 942 ret <2 x i64> %res 943 } 944 declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) nounwind readnone 945 946 947 define <4 x i64> @test_x86_avx2_psrlv_q_256(<4 x i64> %a0, <4 x i64> %a1) { 948 ; CHECK: vpsrlvq 949 %res = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1] 950 ret <4 x i64> %res 951 } 952 declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) nounwind readnone 953 954 955 define <4 x i32> @test_x86_avx2_psrav_d(<4 x i32> %a0, <4 x i32> %a1) { 956 ; CHECK: vpsravd 957 %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 958 ret <4 x i32> %res 959 } 960 declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone 961 962 963 define <8 x i32> @test_x86_avx2_psrav_d_256(<8 x i32> %a0, <8 x i32> %a1) { 964 ; CHECK: vpsravd 965 %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 966 ret <8 x i32> %res 967 } 968 declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone 969 970 ; This is checked here because the execution dependency fix pass makes it hard to test in AVX mode since we don't have 256-bit integer instructions 971 define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) { 972 ; CHECK: vmovdqu 973 ; add operation forces the execution domain. 974 %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 975 call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2) 976 ret void 977 } 978 declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind 979 980 define <2 x double> @test_x86_avx2_gather_d_pd(<2 x double> %a0, i8* %a1, 981 <4 x i32> %idx, <2 x double> %mask) { 982 ; CHECK: vgatherdpd 983 %res = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %a0, 984 i8* %a1, <4 x i32> %idx, <2 x double> %mask, i8 2) ; 985 ret <2 x double> %res 986 } 987 declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*, 988 <4 x i32>, <2 x double>, i8) nounwind readonly 989 990 define <4 x double> @test_x86_avx2_gather_d_pd_256(<4 x double> %a0, i8* %a1, 991 <4 x i32> %idx, <4 x double> %mask) { 992 ; CHECK: vgatherdpd 993 %res = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0, 994 i8* %a1, <4 x i32> %idx, <4 x double> %mask, i8 2) ; 995 ret <4 x double> %res 996 } 997 declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*, 998 <4 x i32>, <4 x double>, i8) nounwind readonly 999 1000 define <2 x double> @test_x86_avx2_gather_q_pd(<2 x double> %a0, i8* %a1, 1001 <2 x i64> %idx, <2 x double> %mask) { 1002 ; CHECK: vgatherqpd 1003 %res = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %a0, 1004 i8* %a1, <2 x i64> %idx, <2 x double> %mask, i8 2) ; 1005 ret <2 x double> %res 1006 } 1007 declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, i8*, 1008 <2 x i64>, <2 x double>, i8) nounwind readonly 1009 1010 define <4 x double> @test_x86_avx2_gather_q_pd_256(<4 x double> %a0, i8* %a1, 1011 <4 x i64> %idx, <4 x double> %mask) { 1012 ; CHECK: vgatherqpd 1013 %res = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %a0, 1014 i8* %a1, <4 x i64> %idx, <4 x double> %mask, i8 2) ; 1015 ret <4 x double> %res 1016 } 1017 declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, i8*, 1018 <4 x i64>, <4 x double>, i8) nounwind readonly 1019 1020 define <4 x float> @test_x86_avx2_gather_d_ps(<4 x float> %a0, i8* %a1, 1021 <4 x i32> %idx, <4 x float> %mask) { 1022 ; CHECK: vgatherdps 1023 %res = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %a0, 1024 i8* %a1, <4 x i32> %idx, <4 x float> %mask, i8 2) ; 1025 ret <4 x float> %res 1026 } 1027 declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*, 1028 <4 x i32>, <4 x float>, i8) nounwind readonly 1029 1030 define <8 x float> @test_x86_avx2_gather_d_ps_256(<8 x float> %a0, i8* %a1, 1031 <8 x i32> %idx, <8 x float> %mask) { 1032 ; CHECK: vgatherdps 1033 %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0, 1034 i8* %a1, <8 x i32> %idx, <8 x float> %mask, i8 2) ; 1035 ret <8 x float> %res 1036 } 1037 declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*, 1038 <8 x i32>, <8 x float>, i8) nounwind readonly 1039 1040 define <4 x float> @test_x86_avx2_gather_q_ps(<4 x float> %a0, i8* %a1, 1041 <2 x i64> %idx, <4 x float> %mask) { 1042 ; CHECK: vgatherqps 1043 %res = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %a0, 1044 i8* %a1, <2 x i64> %idx, <4 x float> %mask, i8 2) ; 1045 ret <4 x float> %res 1046 } 1047 declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, i8*, 1048 <2 x i64>, <4 x float>, i8) nounwind readonly 1049 1050 define <4 x float> @test_x86_avx2_gather_q_ps_256(<4 x float> %a0, i8* %a1, 1051 <4 x i64> %idx, <4 x float> %mask) { 1052 ; CHECK: vgatherqps 1053 %res = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0, 1054 i8* %a1, <4 x i64> %idx, <4 x float> %mask, i8 2) ; 1055 ret <4 x float> %res 1056 } 1057 declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, i8*, 1058 <4 x i64>, <4 x float>, i8) nounwind readonly 1059 1060 define <2 x i64> @test_x86_avx2_gather_d_q(<2 x i64> %a0, i8* %a1, 1061 <4 x i32> %idx, <2 x i64> %mask) { 1062 ; CHECK: vpgatherdq 1063 %res = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0, 1064 i8* %a1, <4 x i32> %idx, <2 x i64> %mask, i8 2) ; 1065 ret <2 x i64> %res 1066 } 1067 declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, i8*, 1068 <4 x i32>, <2 x i64>, i8) nounwind readonly 1069 1070 define <4 x i64> @test_x86_avx2_gather_d_q_256(<4 x i64> %a0, i8* %a1, 1071 <4 x i32> %idx, <4 x i64> %mask) { 1072 ; CHECK: vpgatherdq 1073 %res = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0, 1074 i8* %a1, <4 x i32> %idx, <4 x i64> %mask, i8 2) ; 1075 ret <4 x i64> %res 1076 } 1077 declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, i8*, 1078 <4 x i32>, <4 x i64>, i8) nounwind readonly 1079 1080 define <2 x i64> @test_x86_avx2_gather_q_q(<2 x i64> %a0, i8* %a1, 1081 <2 x i64> %idx, <2 x i64> %mask) { 1082 ; CHECK: vpgatherqq 1083 %res = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0, 1084 i8* %a1, <2 x i64> %idx, <2 x i64> %mask, i8 2) ; 1085 ret <2 x i64> %res 1086 } 1087 declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, i8*, 1088 <2 x i64>, <2 x i64>, i8) nounwind readonly 1089 1090 define <4 x i64> @test_x86_avx2_gather_q_q_256(<4 x i64> %a0, i8* %a1, 1091 <4 x i64> %idx, <4 x i64> %mask) { 1092 ; CHECK: vpgatherqq 1093 %res = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0, 1094 i8* %a1, <4 x i64> %idx, <4 x i64> %mask, i8 2) ; 1095 ret <4 x i64> %res 1096 } 1097 declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, i8*, 1098 <4 x i64>, <4 x i64>, i8) nounwind readonly 1099 1100 define <4 x i32> @test_x86_avx2_gather_d_d(<4 x i32> %a0, i8* %a1, 1101 <4 x i32> %idx, <4 x i32> %mask) { 1102 ; CHECK: vpgatherdd 1103 %res = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %a0, 1104 i8* %a1, <4 x i32> %idx, <4 x i32> %mask, i8 2) ; 1105 ret <4 x i32> %res 1106 } 1107 declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*, 1108 <4 x i32>, <4 x i32>, i8) nounwind readonly 1109 1110 define <8 x i32> @test_x86_avx2_gather_d_d_256(<8 x i32> %a0, i8* %a1, 1111 <8 x i32> %idx, <8 x i32> %mask) { 1112 ; CHECK: vpgatherdd 1113 %res = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> %a0, 1114 i8* %a1, <8 x i32> %idx, <8 x i32> %mask, i8 2) ; 1115 ret <8 x i32> %res 1116 } 1117 declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, i8*, 1118 <8 x i32>, <8 x i32>, i8) nounwind readonly 1119 1120 define <4 x i32> @test_x86_avx2_gather_q_d(<4 x i32> %a0, i8* %a1, 1121 <2 x i64> %idx, <4 x i32> %mask) { 1122 ; CHECK: vpgatherqd 1123 %res = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> %a0, 1124 i8* %a1, <2 x i64> %idx, <4 x i32> %mask, i8 2) ; 1125 ret <4 x i32> %res 1126 } 1127 declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, i8*, 1128 <2 x i64>, <4 x i32>, i8) nounwind readonly 1129 1130 define <4 x i32> @test_x86_avx2_gather_q_d_256(<4 x i32> %a0, i8* %a1, 1131 <4 x i64> %idx, <4 x i32> %mask) { 1132 ; CHECK: vpgatherqd 1133 %res = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %a0, 1134 i8* %a1, <4 x i64> %idx, <4 x i32> %mask, i8 2) ; 1135 ret <4 x i32> %res 1136 } 1137 declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*, 1138 <4 x i64>, <4 x i32>, i8) nounwind readonly 1139 1140 ; PR13298 1141 define <8 x float> @test_gather_mask(<8 x float> %a0, float* %a, 1142 <8 x i32> %idx, <8 x float> %mask, 1143 float* nocapture %out) { 1144 ; CHECK: test_gather_mask 1145 ; CHECK: vmovdqa %ymm2, [[DEST:%.*]] 1146 ; CHECK: vgatherdps [[DEST]] 1147 ;; gather with mask 1148 %a_i8 = bitcast float* %a to i8* 1149 %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0, 1150 i8* %a_i8, <8 x i32> %idx, <8 x float> %mask, i8 4) ; 1151 1152 ;; for debugging, we'll just dump out the mask 1153 %out_ptr = bitcast float * %out to <8 x float> * 1154 store <8 x float> %mask, <8 x float> * %out_ptr, align 4 1155 1156 ret <8 x float> %res 1157 } 1158