1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -disable-peephole -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X32 3 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X64 4 ; RUN: llc < %s -disable-peephole -mtriple=i686-apple-darwin -mattr=+avx512vl,avx512bw | FileCheck %s --check-prefix=X32 5 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512vl,avx512bw | FileCheck %s --check-prefix=X64 6 7 define <16 x i16> @test_llvm_x86_avx2_pmovsxbw(<16 x i8>* %a) { 8 ; X32-LABEL: test_llvm_x86_avx2_pmovsxbw: 9 ; X32: ## %bb.0: 10 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 11 ; X32-NEXT: vpmovsxbw (%eax), %ymm0 12 ; X32-NEXT: retl 13 ; 14 ; X64-LABEL: test_llvm_x86_avx2_pmovsxbw: 15 ; X64: ## %bb.0: 16 ; X64-NEXT: vpmovsxbw (%rdi), %ymm0 17 ; X64-NEXT: retq 18 %1 = load <16 x i8>, <16 x i8>* %a, align 1 19 %2 = sext <16 x i8> %1 to <16 x i16> 20 ret <16 x i16> %2 21 } 22 23 define <8 x i32> @test_llvm_x86_avx2_pmovsxbd(<16 x i8>* %a) { 24 ; X32-LABEL: test_llvm_x86_avx2_pmovsxbd: 25 ; X32: ## %bb.0: 26 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 27 ; X32-NEXT: vpmovsxbd (%eax), %ymm0 28 ; X32-NEXT: retl 29 ; 30 ; X64-LABEL: test_llvm_x86_avx2_pmovsxbd: 31 ; X64: ## %bb.0: 32 ; X64-NEXT: vpmovsxbd (%rdi), %ymm0 33 ; X64-NEXT: retq 34 %1 = load <16 x i8>, <16 x i8>* %a, align 1 35 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 36 %3 = sext <8 x i8> %2 to <8 x i32> 37 ret <8 x i32> %3 38 } 39 40 define <4 x i64> @test_llvm_x86_avx2_pmovsxbq(<16 x i8>* %a) { 41 ; X32-LABEL: test_llvm_x86_avx2_pmovsxbq: 42 ; X32: ## %bb.0: 43 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 44 ; X32-NEXT: vpmovsxbq (%eax), %ymm0 45 ; X32-NEXT: retl 46 ; 47 ; X64-LABEL: test_llvm_x86_avx2_pmovsxbq: 48 ; X64: ## %bb.0: 49 ; X64-NEXT: vpmovsxbq (%rdi), %ymm0 50 ; X64-NEXT: retq 51 %1 = load <16 x i8>, <16 x i8>* %a, align 1 52 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 53 %3 = sext <4 x i8> %2 to <4 x i64> 54 ret <4 x i64> %3 55 } 56 57 define <8 x i32> @test_llvm_x86_avx2_pmovsxwd(<8 x i16>* %a) { 58 ; X32-LABEL: test_llvm_x86_avx2_pmovsxwd: 59 ; X32: ## %bb.0: 60 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 61 ; X32-NEXT: vpmovsxwd (%eax), %ymm0 62 ; X32-NEXT: retl 63 ; 64 ; X64-LABEL: test_llvm_x86_avx2_pmovsxwd: 65 ; X64: ## %bb.0: 66 ; X64-NEXT: vpmovsxwd (%rdi), %ymm0 67 ; X64-NEXT: retq 68 %1 = load <8 x i16>, <8 x i16>* %a, align 1 69 %2 = sext <8 x i16> %1 to <8 x i32> 70 ret <8 x i32> %2 71 } 72 73 define <4 x i64> @test_llvm_x86_avx2_pmovsxwq(<8 x i16>* %a) { 74 ; X32-LABEL: test_llvm_x86_avx2_pmovsxwq: 75 ; X32: ## %bb.0: 76 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 77 ; X32-NEXT: vpmovsxwq (%eax), %ymm0 78 ; X32-NEXT: retl 79 ; 80 ; X64-LABEL: test_llvm_x86_avx2_pmovsxwq: 81 ; X64: ## %bb.0: 82 ; X64-NEXT: vpmovsxwq (%rdi), %ymm0 83 ; X64-NEXT: retq 84 %1 = load <8 x i16>, <8 x i16>* %a, align 1 85 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 86 %3 = sext <4 x i16> %2 to <4 x i64> 87 ret <4 x i64> %3 88 } 89 90 define <4 x i64> @test_llvm_x86_avx2_pmovsxdq(<4 x i32>* %a) { 91 ; X32-LABEL: test_llvm_x86_avx2_pmovsxdq: 92 ; X32: ## %bb.0: 93 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 94 ; X32-NEXT: vpmovsxdq (%eax), %ymm0 95 ; X32-NEXT: retl 96 ; 97 ; X64-LABEL: test_llvm_x86_avx2_pmovsxdq: 98 ; X64: ## %bb.0: 99 ; X64-NEXT: vpmovsxdq (%rdi), %ymm0 100 ; X64-NEXT: retq 101 %1 = load <4 x i32>, <4 x i32>* %a, align 1 102 %2 = sext <4 x i32> %1 to <4 x i64> 103 ret <4 x i64> %2 104 } 105 106 define <16 x i16> @test_llvm_x86_avx2_pmovzxbw(<16 x i8>* %a) { 107 ; X32-LABEL: test_llvm_x86_avx2_pmovzxbw: 108 ; X32: ## %bb.0: 109 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 110 ; X32-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 111 ; X32-NEXT: retl 112 ; 113 ; X64-LABEL: test_llvm_x86_avx2_pmovzxbw: 114 ; X64: ## %bb.0: 115 ; X64-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 116 ; X64-NEXT: retq 117 %1 = load <16 x i8>, <16 x i8>* %a, align 1 118 %2 = zext <16 x i8> %1 to <16 x i16> 119 ret <16 x i16> %2 120 } 121 122 define <8 x i32> @test_llvm_x86_avx2_pmovzxbd(<16 x i8>* %a) { 123 ; X32-LABEL: test_llvm_x86_avx2_pmovzxbd: 124 ; X32: ## %bb.0: 125 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 126 ; X32-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 127 ; X32-NEXT: retl 128 ; 129 ; X64-LABEL: test_llvm_x86_avx2_pmovzxbd: 130 ; X64: ## %bb.0: 131 ; X64-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 132 ; X64-NEXT: retq 133 %1 = load <16 x i8>, <16 x i8>* %a, align 1 134 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 135 %3 = zext <8 x i8> %2 to <8 x i32> 136 ret <8 x i32> %3 137 } 138 139 define <4 x i64> @test_llvm_x86_avx2_pmovzxbq(<16 x i8>* %a) { 140 ; X32-LABEL: test_llvm_x86_avx2_pmovzxbq: 141 ; X32: ## %bb.0: 142 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 143 ; X32-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 144 ; X32-NEXT: retl 145 ; 146 ; X64-LABEL: test_llvm_x86_avx2_pmovzxbq: 147 ; X64: ## %bb.0: 148 ; X64-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 149 ; X64-NEXT: retq 150 %1 = load <16 x i8>, <16 x i8>* %a, align 1 151 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 152 %3 = zext <4 x i8> %2 to <4 x i64> 153 ret <4 x i64> %3 154 } 155 156 define <8 x i32> @test_llvm_x86_avx2_pmovzxwd(<8 x i16>* %a) { 157 ; X32-LABEL: test_llvm_x86_avx2_pmovzxwd: 158 ; X32: ## %bb.0: 159 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 160 ; X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 161 ; X32-NEXT: retl 162 ; 163 ; X64-LABEL: test_llvm_x86_avx2_pmovzxwd: 164 ; X64: ## %bb.0: 165 ; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 166 ; X64-NEXT: retq 167 %1 = load <8 x i16>, <8 x i16>* %a, align 1 168 %2 = zext <8 x i16> %1 to <8 x i32> 169 ret <8 x i32> %2 170 } 171 172 define <4 x i64> @test_llvm_x86_avx2_pmovzxwq(<8 x i16>* %a) { 173 ; X32-LABEL: test_llvm_x86_avx2_pmovzxwq: 174 ; X32: ## %bb.0: 175 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 176 ; X32-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 177 ; X32-NEXT: retl 178 ; 179 ; X64-LABEL: test_llvm_x86_avx2_pmovzxwq: 180 ; X64: ## %bb.0: 181 ; X64-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 182 ; X64-NEXT: retq 183 %1 = load <8 x i16>, <8 x i16>* %a, align 1 184 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 185 %3 = zext <4 x i16> %2 to <4 x i64> 186 ret <4 x i64> %3 187 } 188 189 define <4 x i64> @test_llvm_x86_avx2_pmovzxdq(<4 x i32>* %a) { 190 ; X32-LABEL: test_llvm_x86_avx2_pmovzxdq: 191 ; X32: ## %bb.0: 192 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 193 ; X32-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 194 ; X32-NEXT: retl 195 ; 196 ; X64-LABEL: test_llvm_x86_avx2_pmovzxdq: 197 ; X64: ## %bb.0: 198 ; X64-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 199 ; X64-NEXT: retq 200 %1 = load <4 x i32>, <4 x i32>* %a, align 1 201 %2 = zext <4 x i32> %1 to <4 x i64> 202 ret <4 x i64> %2 203 } 204