1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X32 3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X64 4 5 define <16 x i16> @test_llvm_x86_avx2_pmovsxbw(<16 x i8>* %a) { 6 ; X32-LABEL: test_llvm_x86_avx2_pmovsxbw: 7 ; X32: ## BB#0: 8 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 9 ; X32-NEXT: vpmovsxbw (%eax), %ymm0 10 ; X32-NEXT: retl 11 ; 12 ; X64-LABEL: test_llvm_x86_avx2_pmovsxbw: 13 ; X64: ## BB#0: 14 ; X64-NEXT: vpmovsxbw (%rdi), %ymm0 15 ; X64-NEXT: retq 16 %1 = load <16 x i8>, <16 x i8>* %a, align 1 17 %2 = sext <16 x i8> %1 to <16 x i16> 18 ret <16 x i16> %2 19 } 20 21 define <8 x i32> @test_llvm_x86_avx2_pmovsxbd(<16 x i8>* %a) { 22 ; X32-LABEL: test_llvm_x86_avx2_pmovsxbd: 23 ; X32: ## BB#0: 24 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 25 ; X32-NEXT: vpmovsxbd (%eax), %ymm0 26 ; X32-NEXT: retl 27 ; 28 ; X64-LABEL: test_llvm_x86_avx2_pmovsxbd: 29 ; X64: ## BB#0: 30 ; X64-NEXT: vpmovsxbd (%rdi), %ymm0 31 ; X64-NEXT: retq 32 %1 = load <16 x i8>, <16 x i8>* %a, align 1 33 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 34 %3 = sext <8 x i8> %2 to <8 x i32> 35 ret <8 x i32> %3 36 } 37 38 define <4 x i64> @test_llvm_x86_avx2_pmovsxbq(<16 x i8>* %a) { 39 ; X32-LABEL: test_llvm_x86_avx2_pmovsxbq: 40 ; X32: ## BB#0: 41 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 42 ; X32-NEXT: vpmovsxbq (%eax), %ymm0 43 ; X32-NEXT: retl 44 ; 45 ; X64-LABEL: test_llvm_x86_avx2_pmovsxbq: 46 ; X64: ## BB#0: 47 ; X64-NEXT: vpmovsxbq (%rdi), %ymm0 48 ; X64-NEXT: retq 49 %1 = load <16 x i8>, <16 x i8>* %a, align 1 50 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 51 %3 = sext <4 x i8> %2 to <4 x i64> 52 ret <4 x i64> %3 53 } 54 55 define <8 x i32> @test_llvm_x86_avx2_pmovsxwd(<8 x i16>* %a) { 56 ; X32-LABEL: test_llvm_x86_avx2_pmovsxwd: 57 ; X32: ## BB#0: 58 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 59 ; X32-NEXT: vpmovsxwd (%eax), %ymm0 60 ; X32-NEXT: retl 61 ; 62 ; X64-LABEL: test_llvm_x86_avx2_pmovsxwd: 63 ; X64: ## BB#0: 64 ; X64-NEXT: vpmovsxwd (%rdi), %ymm0 65 ; X64-NEXT: retq 66 %1 = load <8 x i16>, <8 x i16>* %a, align 1 67 %2 = sext <8 x i16> %1 to <8 x i32> 68 ret <8 x i32> %2 69 } 70 71 define <4 x i64> @test_llvm_x86_avx2_pmovsxwq(<8 x i16>* %a) { 72 ; X32-LABEL: test_llvm_x86_avx2_pmovsxwq: 73 ; X32: ## BB#0: 74 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 75 ; X32-NEXT: vpmovsxwq (%eax), %ymm0 76 ; X32-NEXT: retl 77 ; 78 ; X64-LABEL: test_llvm_x86_avx2_pmovsxwq: 79 ; X64: ## BB#0: 80 ; X64-NEXT: vpmovsxwq (%rdi), %ymm0 81 ; X64-NEXT: retq 82 %1 = load <8 x i16>, <8 x i16>* %a, align 1 83 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 84 %3 = sext <4 x i16> %2 to <4 x i64> 85 ret <4 x i64> %3 86 } 87 88 define <4 x i64> @test_llvm_x86_avx2_pmovsxdq(<4 x i32>* %a) { 89 ; X32-LABEL: test_llvm_x86_avx2_pmovsxdq: 90 ; X32: ## BB#0: 91 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 92 ; X32-NEXT: vpmovsxdq (%eax), %ymm0 93 ; X32-NEXT: retl 94 ; 95 ; X64-LABEL: test_llvm_x86_avx2_pmovsxdq: 96 ; X64: ## BB#0: 97 ; X64-NEXT: vpmovsxdq (%rdi), %ymm0 98 ; X64-NEXT: retq 99 %1 = load <4 x i32>, <4 x i32>* %a, align 1 100 %2 = sext <4 x i32> %1 to <4 x i64> 101 ret <4 x i64> %2 102 } 103 104 define <16 x i16> @test_llvm_x86_avx2_pmovzxbw(<16 x i8>* %a) { 105 ; X32-LABEL: test_llvm_x86_avx2_pmovzxbw: 106 ; X32: ## BB#0: 107 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 108 ; X32-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 109 ; X32-NEXT: retl 110 ; 111 ; X64-LABEL: test_llvm_x86_avx2_pmovzxbw: 112 ; X64: ## BB#0: 113 ; X64-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 114 ; X64-NEXT: retq 115 %1 = load <16 x i8>, <16 x i8>* %a, align 1 116 %2 = zext <16 x i8> %1 to <16 x i16> 117 ret <16 x i16> %2 118 } 119 120 define <8 x i32> @test_llvm_x86_avx2_pmovzxbd(<16 x i8>* %a) { 121 ; X32-LABEL: test_llvm_x86_avx2_pmovzxbd: 122 ; X32: ## BB#0: 123 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 124 ; X32-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 125 ; X32-NEXT: retl 126 ; 127 ; X64-LABEL: test_llvm_x86_avx2_pmovzxbd: 128 ; X64: ## BB#0: 129 ; X64-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 130 ; X64-NEXT: retq 131 %1 = load <16 x i8>, <16 x i8>* %a, align 1 132 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 133 %3 = zext <8 x i8> %2 to <8 x i32> 134 ret <8 x i32> %3 135 } 136 137 define <4 x i64> @test_llvm_x86_avx2_pmovzxbq(<16 x i8>* %a) { 138 ; X32-LABEL: test_llvm_x86_avx2_pmovzxbq: 139 ; X32: ## BB#0: 140 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 141 ; X32-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 142 ; X32-NEXT: retl 143 ; 144 ; X64-LABEL: test_llvm_x86_avx2_pmovzxbq: 145 ; X64: ## BB#0: 146 ; X64-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 147 ; X64-NEXT: retq 148 %1 = load <16 x i8>, <16 x i8>* %a, align 1 149 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 150 %3 = zext <4 x i8> %2 to <4 x i64> 151 ret <4 x i64> %3 152 } 153 154 define <8 x i32> @test_llvm_x86_avx2_pmovzxwd(<8 x i16>* %a) { 155 ; X32-LABEL: test_llvm_x86_avx2_pmovzxwd: 156 ; X32: ## BB#0: 157 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 158 ; X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 159 ; X32-NEXT: retl 160 ; 161 ; X64-LABEL: test_llvm_x86_avx2_pmovzxwd: 162 ; X64: ## BB#0: 163 ; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 164 ; X64-NEXT: retq 165 %1 = load <8 x i16>, <8 x i16>* %a, align 1 166 %2 = zext <8 x i16> %1 to <8 x i32> 167 ret <8 x i32> %2 168 } 169 170 define <4 x i64> @test_llvm_x86_avx2_pmovzxwq(<8 x i16>* %a) { 171 ; X32-LABEL: test_llvm_x86_avx2_pmovzxwq: 172 ; X32: ## BB#0: 173 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 174 ; X32-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 175 ; X32-NEXT: retl 176 ; 177 ; X64-LABEL: test_llvm_x86_avx2_pmovzxwq: 178 ; X64: ## BB#0: 179 ; X64-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 180 ; X64-NEXT: retq 181 %1 = load <8 x i16>, <8 x i16>* %a, align 1 182 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 183 %3 = zext <4 x i16> %2 to <4 x i64> 184 ret <4 x i64> %3 185 } 186 187 define <4 x i64> @test_llvm_x86_avx2_pmovzxdq(<4 x i32>* %a) { 188 ; X32-LABEL: test_llvm_x86_avx2_pmovzxdq: 189 ; X32: ## BB#0: 190 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 191 ; X32-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 192 ; X32-NEXT: retl 193 ; 194 ; X64-LABEL: test_llvm_x86_avx2_pmovzxdq: 195 ; X64: ## BB#0: 196 ; X64-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 197 ; X64-NEXT: retq 198 %1 = load <4 x i32>, <4 x i32>* %a, align 1 199 %2 = zext <4 x i32> %1 to <4 x i64> 200 ret <4 x i64> %2 201 } 202