1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 | FileCheck %s --check-prefix=SSE2 3 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s --check-prefix=AVX2 4 5 define void @trunc_shl_7_v4i32_v4i64(<4 x i32> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) { 6 ; SSE2-LABEL: trunc_shl_7_v4i32_v4i64: 7 ; SSE2: # %bb.0: 8 ; SSE2-NEXT: movaps (%rsi), %xmm0 9 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],mem[0,2] 10 ; SSE2-NEXT: pslld $7, %xmm0 11 ; SSE2-NEXT: movdqa %xmm0, (%rdi) 12 ; SSE2-NEXT: retq 13 ; 14 ; AVX2-LABEL: trunc_shl_7_v4i32_v4i64: 15 ; AVX2: # %bb.0: 16 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = mem[0,2,2,3,4,6,6,7] 17 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 18 ; AVX2-NEXT: vpslld $7, %xmm0, %xmm0 19 ; AVX2-NEXT: vmovdqa %xmm0, (%rdi) 20 ; AVX2-NEXT: vzeroupper 21 ; AVX2-NEXT: retq 22 %val = load <4 x i64>, <4 x i64> addrspace(1)* %in 23 %shl = shl <4 x i64> %val, <i64 7, i64 7, i64 7, i64 7> 24 %trunc = trunc <4 x i64> %shl to <4 x i32> 25 store <4 x i32> %trunc, <4 x i32> addrspace(1)* %out 26 ret void 27 } 28 29 define <8 x i16> @trunc_shl_15_v8i16_v8i32(<8 x i32> %a) { 30 ; SSE2-LABEL: trunc_shl_15_v8i16_v8i32: 31 ; SSE2: # %bb.0: 32 ; SSE2-NEXT: pslld $16, %xmm1 33 ; SSE2-NEXT: psrad $16, %xmm1 34 ; SSE2-NEXT: pslld $16, %xmm0 35 ; SSE2-NEXT: psrad $16, %xmm0 36 ; SSE2-NEXT: packssdw %xmm1, %xmm0 37 ; SSE2-NEXT: psllw $15, %xmm0 38 ; SSE2-NEXT: retq 39 ; 40 ; AVX2-LABEL: trunc_shl_15_v8i16_v8i32: 41 ; AVX2: # %bb.0: 42 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31] 43 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 44 ; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0 45 ; AVX2-NEXT: vzeroupper 46 ; AVX2-NEXT: retq 47 %shl = shl <8 x i32> %a, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> 48 %conv = trunc <8 x i32> %shl to <8 x i16> 49 ret <8 x i16> %conv 50 } 51 52 define <8 x i16> @trunc_shl_16_v8i16_v8i32(<8 x i32> %a) { 53 ; SSE2-LABEL: trunc_shl_16_v8i16_v8i32: 54 ; SSE2: # %bb.0: 55 ; SSE2-NEXT: xorps %xmm0, %xmm0 56 ; SSE2-NEXT: retq 57 ; 58 ; AVX2-LABEL: trunc_shl_16_v8i16_v8i32: 59 ; AVX2: # %bb.0: 60 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[28,29] 61 ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 62 ; AVX2-NEXT: vzeroupper 63 ; AVX2-NEXT: retq 64 %shl = shl <8 x i32> %a, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 65 %conv = trunc <8 x i32> %shl to <8 x i16> 66 ret <8 x i16> %conv 67 } 68 69 define <8 x i16> @trunc_shl_17_v8i16_v8i32(<8 x i32> %a) { 70 ; SSE2-LABEL: trunc_shl_17_v8i16_v8i32: 71 ; SSE2: # %bb.0: 72 ; SSE2-NEXT: xorps %xmm0, %xmm0 73 ; SSE2-NEXT: retq 74 ; 75 ; AVX2-LABEL: trunc_shl_17_v8i16_v8i32: 76 ; AVX2: # %bb.0: 77 ; AVX2-NEXT: vpslld $17, %ymm0, %ymm0 78 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31] 79 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 80 ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 81 ; AVX2-NEXT: vzeroupper 82 ; AVX2-NEXT: retq 83 %shl = shl <8 x i32> %a, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17> 84 %conv = trunc <8 x i32> %shl to <8 x i16> 85 ret <8 x i16> %conv 86 } 87 88 define void @trunc_shl_31_i32_i64(i32* %out, i64* %in) { 89 ; SSE2-LABEL: trunc_shl_31_i32_i64: 90 ; SSE2: # %bb.0: 91 ; SSE2-NEXT: movl (%rsi), %eax 92 ; SSE2-NEXT: shll $31, %eax 93 ; SSE2-NEXT: movl %eax, (%rdi) 94 ; SSE2-NEXT: retq 95 ; 96 ; AVX2-LABEL: trunc_shl_31_i32_i64: 97 ; AVX2: # %bb.0: 98 ; AVX2-NEXT: movl (%rsi), %eax 99 ; AVX2-NEXT: shll $31, %eax 100 ; AVX2-NEXT: movl %eax, (%rdi) 101 ; AVX2-NEXT: retq 102 %val = load i64, i64* %in 103 %shl = shl i64 %val, 31 104 %trunc = trunc i64 %shl to i32 105 store i32 %trunc, i32* %out 106 ret void 107 } 108 109 define void @trunc_shl_32_i32_i64(i32* %out, i64* %in) { 110 ; SSE2-LABEL: trunc_shl_32_i32_i64: 111 ; SSE2: # %bb.0: 112 ; SSE2-NEXT: movl $0, (%rdi) 113 ; SSE2-NEXT: retq 114 ; 115 ; AVX2-LABEL: trunc_shl_32_i32_i64: 116 ; AVX2: # %bb.0: 117 ; AVX2-NEXT: movl $0, (%rdi) 118 ; AVX2-NEXT: retq 119 %val = load i64, i64* %in 120 %shl = shl i64 %val, 32 121 %trunc = trunc i64 %shl to i32 122 store i32 %trunc, i32* %out 123 ret void 124 } 125 126 define void @trunc_shl_15_i16_i64(i16* %out, i64* %in) { 127 ; SSE2-LABEL: trunc_shl_15_i16_i64: 128 ; SSE2: # %bb.0: 129 ; SSE2-NEXT: movl (%rsi), %eax 130 ; SSE2-NEXT: shll $15, %eax 131 ; SSE2-NEXT: movw %ax, (%rdi) 132 ; SSE2-NEXT: retq 133 ; 134 ; AVX2-LABEL: trunc_shl_15_i16_i64: 135 ; AVX2: # %bb.0: 136 ; AVX2-NEXT: movl (%rsi), %eax 137 ; AVX2-NEXT: shll $15, %eax 138 ; AVX2-NEXT: movw %ax, (%rdi) 139 ; AVX2-NEXT: retq 140 %val = load i64, i64* %in 141 %shl = shl i64 %val, 15 142 %trunc = trunc i64 %shl to i16 143 store i16 %trunc, i16* %out 144 ret void 145 } 146 147 define void @trunc_shl_16_i16_i64(i16* %out, i64* %in) { 148 ; SSE2-LABEL: trunc_shl_16_i16_i64: 149 ; SSE2: # %bb.0: 150 ; SSE2-NEXT: movw $0, (%rdi) 151 ; SSE2-NEXT: retq 152 ; 153 ; AVX2-LABEL: trunc_shl_16_i16_i64: 154 ; AVX2: # %bb.0: 155 ; AVX2-NEXT: movw $0, (%rdi) 156 ; AVX2-NEXT: retq 157 %val = load i64, i64* %in 158 %shl = shl i64 %val, 16 159 %trunc = trunc i64 %shl to i16 160 store i16 %trunc, i16* %out 161 ret void 162 } 163 164 define void @trunc_shl_7_i8_i64(i8* %out, i64* %in) { 165 ; SSE2-LABEL: trunc_shl_7_i8_i64: 166 ; SSE2: # %bb.0: 167 ; SSE2-NEXT: movb (%rsi), %al 168 ; SSE2-NEXT: shlb $7, %al 169 ; SSE2-NEXT: movb %al, (%rdi) 170 ; SSE2-NEXT: retq 171 ; 172 ; AVX2-LABEL: trunc_shl_7_i8_i64: 173 ; AVX2: # %bb.0: 174 ; AVX2-NEXT: movb (%rsi), %al 175 ; AVX2-NEXT: shlb $7, %al 176 ; AVX2-NEXT: movb %al, (%rdi) 177 ; AVX2-NEXT: retq 178 %val = load i64, i64* %in 179 %shl = shl i64 %val, 7 180 %trunc = trunc i64 %shl to i8 181 store i8 %trunc, i8* %out 182 ret void 183 } 184 185 define void @trunc_shl_8_i8_i64(i8* %out, i64* %in) { 186 ; SSE2-LABEL: trunc_shl_8_i8_i64: 187 ; SSE2: # %bb.0: 188 ; SSE2-NEXT: movb $0, (%rdi) 189 ; SSE2-NEXT: retq 190 ; 191 ; AVX2-LABEL: trunc_shl_8_i8_i64: 192 ; AVX2: # %bb.0: 193 ; AVX2-NEXT: movb $0, (%rdi) 194 ; AVX2-NEXT: retq 195 %val = load i64, i64* %in 196 %shl = shl i64 %val, 8 197 %trunc = trunc i64 %shl to i8 198 store i8 %trunc, i8* %out 199 ret void 200 } 201