1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X32-SSE --check-prefix=X32-SSE42 3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X64-SSE --check-prefix=X64-SSE42 4 5 ; 6 ; AND/XOR/OR i24 as v3i8 7 ; 8 9 define i24 @and_i24_as_v3i8(i24 %a, i24 %b) nounwind { 10 ; X32-SSE-LABEL: and_i24_as_v3i8: 11 ; X32-SSE: # BB#0: 12 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 13 ; X32-SSE-NEXT: andl {{[0-9]+}}(%esp), %eax 14 ; X32-SSE-NEXT: retl 15 ; 16 ; X64-SSE-LABEL: and_i24_as_v3i8: 17 ; X64-SSE: # BB#0: 18 ; X64-SSE-NEXT: andl %esi, %edi 19 ; X64-SSE-NEXT: movl %edi, %eax 20 ; X64-SSE-NEXT: retq 21 %1 = bitcast i24 %a to <3 x i8> 22 %2 = bitcast i24 %b to <3 x i8> 23 %3 = and <3 x i8> %1, %2 24 %4 = bitcast <3 x i8> %3 to i24 25 ret i24 %4 26 } 27 28 define i24 @xor_i24_as_v3i8(i24 %a, i24 %b) nounwind { 29 ; X32-SSE-LABEL: xor_i24_as_v3i8: 30 ; X32-SSE: # BB#0: 31 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 32 ; X32-SSE-NEXT: xorl {{[0-9]+}}(%esp), %eax 33 ; X32-SSE-NEXT: retl 34 ; 35 ; X64-SSE-LABEL: xor_i24_as_v3i8: 36 ; X64-SSE: # BB#0: 37 ; X64-SSE-NEXT: xorl %esi, %edi 38 ; X64-SSE-NEXT: movl %edi, %eax 39 ; X64-SSE-NEXT: retq 40 %1 = bitcast i24 %a to <3 x i8> 41 %2 = bitcast i24 %b to <3 x i8> 42 %3 = xor <3 x i8> %1, %2 43 %4 = bitcast <3 x i8> %3 to i24 44 ret i24 %4 45 } 46 47 define i24 @or_i24_as_v3i8(i24 %a, i24 %b) nounwind { 48 ; X32-SSE-LABEL: or_i24_as_v3i8: 49 ; X32-SSE: # BB#0: 50 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 51 ; X32-SSE-NEXT: orl {{[0-9]+}}(%esp), %eax 52 ; X32-SSE-NEXT: retl 53 ; 54 ; X64-SSE-LABEL: or_i24_as_v3i8: 55 ; X64-SSE: # BB#0: 56 ; X64-SSE-NEXT: orl %esi, %edi 57 ; X64-SSE-NEXT: movl %edi, %eax 58 ; X64-SSE-NEXT: retq 59 %1 = bitcast i24 %a to <3 x i8> 60 %2 = bitcast i24 %b to <3 x i8> 61 %3 = or <3 x i8> %1, %2 62 %4 = bitcast <3 x i8> %3 to i24 63 ret i24 %4 64 } 65 66 ; 67 ; AND/XOR/OR i24 as v8i3 68 ; 69 70 define i24 @and_i24_as_v8i3(i24 %a, i24 %b) nounwind { 71 ; X32-SSE-LABEL: and_i24_as_v8i3: 72 ; X32-SSE: # BB#0: 73 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 74 ; X32-SSE-NEXT: andl {{[0-9]+}}(%esp), %eax 75 ; X32-SSE-NEXT: retl 76 ; 77 ; X64-SSE-LABEL: and_i24_as_v8i3: 78 ; X64-SSE: # BB#0: 79 ; X64-SSE-NEXT: andl %esi, %edi 80 ; X64-SSE-NEXT: movl %edi, %eax 81 ; X64-SSE-NEXT: retq 82 %1 = bitcast i24 %a to <8 x i3> 83 %2 = bitcast i24 %b to <8 x i3> 84 %3 = and <8 x i3> %1, %2 85 %4 = bitcast <8 x i3> %3 to i24 86 ret i24 %4 87 } 88 89 define i24 @xor_i24_as_v8i3(i24 %a, i24 %b) nounwind { 90 ; X32-SSE-LABEL: xor_i24_as_v8i3: 91 ; X32-SSE: # BB#0: 92 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 93 ; X32-SSE-NEXT: xorl {{[0-9]+}}(%esp), %eax 94 ; X32-SSE-NEXT: retl 95 ; 96 ; X64-SSE-LABEL: xor_i24_as_v8i3: 97 ; X64-SSE: # BB#0: 98 ; X64-SSE-NEXT: xorl %esi, %edi 99 ; X64-SSE-NEXT: movl %edi, %eax 100 ; X64-SSE-NEXT: retq 101 %1 = bitcast i24 %a to <8 x i3> 102 %2 = bitcast i24 %b to <8 x i3> 103 %3 = xor <8 x i3> %1, %2 104 %4 = bitcast <8 x i3> %3 to i24 105 ret i24 %4 106 } 107 108 define i24 @or_i24_as_v8i3(i24 %a, i24 %b) nounwind { 109 ; X32-SSE-LABEL: or_i24_as_v8i3: 110 ; X32-SSE: # BB#0: 111 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 112 ; X32-SSE-NEXT: orl {{[0-9]+}}(%esp), %eax 113 ; X32-SSE-NEXT: retl 114 ; 115 ; X64-SSE-LABEL: or_i24_as_v8i3: 116 ; X64-SSE: # BB#0: 117 ; X64-SSE-NEXT: orl %esi, %edi 118 ; X64-SSE-NEXT: movl %edi, %eax 119 ; X64-SSE-NEXT: retq 120 %1 = bitcast i24 %a to <8 x i3> 121 %2 = bitcast i24 %b to <8 x i3> 122 %3 = or <8 x i3> %1, %2 123 %4 = bitcast <8 x i3> %3 to i24 124 ret i24 %4 125 } 126 127 ; 128 ; AND/XOR/OR v3i8 as i24 129 ; 130 131 define <3 x i8> @and_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind { 132 ; X32-SSE-LABEL: and_v3i8_as_i24: 133 ; X32-SSE: # BB#0: 134 ; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0 135 ; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0 136 ; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0 137 ; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm1 138 ; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm1 139 ; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm1 140 ; X32-SSE-NEXT: pand %xmm0, %xmm1 141 ; X32-SSE-NEXT: pextrb $0, %xmm1, %eax 142 ; X32-SSE-NEXT: pextrb $4, %xmm1, %edx 143 ; X32-SSE-NEXT: pextrb $8, %xmm1, %ecx 144 ; X32-SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill> 145 ; X32-SSE-NEXT: # kill: %DL<def> %DL<kill> %EDX<kill> 146 ; X32-SSE-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill> 147 ; X32-SSE-NEXT: retl 148 ; 149 ; X64-SSE-LABEL: and_v3i8_as_i24: 150 ; X64-SSE: # BB#0: 151 ; X64-SSE-NEXT: movd %ecx, %xmm0 152 ; X64-SSE-NEXT: pinsrd $1, %r8d, %xmm0 153 ; X64-SSE-NEXT: pinsrd $2, %r9d, %xmm0 154 ; X64-SSE-NEXT: movd %edi, %xmm1 155 ; X64-SSE-NEXT: pinsrd $1, %esi, %xmm1 156 ; X64-SSE-NEXT: pinsrd $2, %edx, %xmm1 157 ; X64-SSE-NEXT: pand %xmm0, %xmm1 158 ; X64-SSE-NEXT: pextrb $0, %xmm1, %eax 159 ; X64-SSE-NEXT: pextrb $4, %xmm1, %edx 160 ; X64-SSE-NEXT: pextrb $8, %xmm1, %ecx 161 ; X64-SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill> 162 ; X64-SSE-NEXT: # kill: %DL<def> %DL<kill> %EDX<kill> 163 ; X64-SSE-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill> 164 ; X64-SSE-NEXT: retq 165 %1 = bitcast <3 x i8> %a to i24 166 %2 = bitcast <3 x i8> %b to i24 167 %3 = and i24 %1, %2 168 %4 = bitcast i24 %3 to <3 x i8> 169 ret <3 x i8> %4 170 } 171 172 define <3 x i8> @xor_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind { 173 ; X32-SSE-LABEL: xor_v3i8_as_i24: 174 ; X32-SSE: # BB#0: 175 ; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0 176 ; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0 177 ; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0 178 ; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm1 179 ; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm1 180 ; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm1 181 ; X32-SSE-NEXT: pxor %xmm0, %xmm1 182 ; X32-SSE-NEXT: pextrb $0, %xmm1, %eax 183 ; X32-SSE-NEXT: pextrb $4, %xmm1, %edx 184 ; X32-SSE-NEXT: pextrb $8, %xmm1, %ecx 185 ; X32-SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill> 186 ; X32-SSE-NEXT: # kill: %DL<def> %DL<kill> %EDX<kill> 187 ; X32-SSE-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill> 188 ; X32-SSE-NEXT: retl 189 ; 190 ; X64-SSE-LABEL: xor_v3i8_as_i24: 191 ; X64-SSE: # BB#0: 192 ; X64-SSE-NEXT: movd %ecx, %xmm0 193 ; X64-SSE-NEXT: pinsrd $1, %r8d, %xmm0 194 ; X64-SSE-NEXT: pinsrd $2, %r9d, %xmm0 195 ; X64-SSE-NEXT: movd %edi, %xmm1 196 ; X64-SSE-NEXT: pinsrd $1, %esi, %xmm1 197 ; X64-SSE-NEXT: pinsrd $2, %edx, %xmm1 198 ; X64-SSE-NEXT: pxor %xmm0, %xmm1 199 ; X64-SSE-NEXT: pextrb $0, %xmm1, %eax 200 ; X64-SSE-NEXT: pextrb $4, %xmm1, %edx 201 ; X64-SSE-NEXT: pextrb $8, %xmm1, %ecx 202 ; X64-SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill> 203 ; X64-SSE-NEXT: # kill: %DL<def> %DL<kill> %EDX<kill> 204 ; X64-SSE-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill> 205 ; X64-SSE-NEXT: retq 206 %1 = bitcast <3 x i8> %a to i24 207 %2 = bitcast <3 x i8> %b to i24 208 %3 = xor i24 %1, %2 209 %4 = bitcast i24 %3 to <3 x i8> 210 ret <3 x i8> %4 211 } 212 213 define <3 x i8> @or_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind { 214 ; X32-SSE-LABEL: or_v3i8_as_i24: 215 ; X32-SSE: # BB#0: 216 ; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0 217 ; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0 218 ; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0 219 ; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm1 220 ; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm1 221 ; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm1 222 ; X32-SSE-NEXT: por %xmm0, %xmm1 223 ; X32-SSE-NEXT: pextrb $0, %xmm1, %eax 224 ; X32-SSE-NEXT: pextrb $4, %xmm1, %edx 225 ; X32-SSE-NEXT: pextrb $8, %xmm1, %ecx 226 ; X32-SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill> 227 ; X32-SSE-NEXT: # kill: %DL<def> %DL<kill> %EDX<kill> 228 ; X32-SSE-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill> 229 ; X32-SSE-NEXT: retl 230 ; 231 ; X64-SSE-LABEL: or_v3i8_as_i24: 232 ; X64-SSE: # BB#0: 233 ; X64-SSE-NEXT: movd %ecx, %xmm0 234 ; X64-SSE-NEXT: pinsrd $1, %r8d, %xmm0 235 ; X64-SSE-NEXT: pinsrd $2, %r9d, %xmm0 236 ; X64-SSE-NEXT: movd %edi, %xmm1 237 ; X64-SSE-NEXT: pinsrd $1, %esi, %xmm1 238 ; X64-SSE-NEXT: pinsrd $2, %edx, %xmm1 239 ; X64-SSE-NEXT: por %xmm0, %xmm1 240 ; X64-SSE-NEXT: pextrb $0, %xmm1, %eax 241 ; X64-SSE-NEXT: pextrb $4, %xmm1, %edx 242 ; X64-SSE-NEXT: pextrb $8, %xmm1, %ecx 243 ; X64-SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill> 244 ; X64-SSE-NEXT: # kill: %DL<def> %DL<kill> %EDX<kill> 245 ; X64-SSE-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill> 246 ; X64-SSE-NEXT: retq 247 %1 = bitcast <3 x i8> %a to i24 248 %2 = bitcast <3 x i8> %b to i24 249 %3 = or i24 %1, %2 250 %4 = bitcast i24 %3 to <3 x i8> 251 ret <3 x i8> %4 252 } 253 254 ; 255 ; AND/XOR/OR v8i3 as i24 256 ; 257 258 define <8 x i3> @and_v8i3_as_i24(<8 x i3> %a, <8 x i3> %b) nounwind { 259 ; X32-SSE-LABEL: and_v8i3_as_i24: 260 ; X32-SSE: # BB#0: 261 ; X32-SSE-NEXT: andps %xmm1, %xmm0 262 ; X32-SSE-NEXT: retl 263 ; 264 ; X64-SSE-LABEL: and_v8i3_as_i24: 265 ; X64-SSE: # BB#0: 266 ; X64-SSE-NEXT: andps %xmm1, %xmm0 267 ; X64-SSE-NEXT: retq 268 %1 = bitcast <8 x i3> %a to i24 269 %2 = bitcast <8 x i3> %b to i24 270 %3 = and i24 %1, %2 271 %4 = bitcast i24 %3 to <8 x i3> 272 ret <8 x i3> %4 273 } 274 275 define <8 x i3> @xor_v8i3_as_i24(<8 x i3> %a, <8 x i3> %b) nounwind { 276 ; X32-SSE-LABEL: xor_v8i3_as_i24: 277 ; X32-SSE: # BB#0: 278 ; X32-SSE-NEXT: xorps %xmm1, %xmm0 279 ; X32-SSE-NEXT: retl 280 ; 281 ; X64-SSE-LABEL: xor_v8i3_as_i24: 282 ; X64-SSE: # BB#0: 283 ; X64-SSE-NEXT: xorps %xmm1, %xmm0 284 ; X64-SSE-NEXT: retq 285 %1 = bitcast <8 x i3> %a to i24 286 %2 = bitcast <8 x i3> %b to i24 287 %3 = xor i24 %1, %2 288 %4 = bitcast i24 %3 to <8 x i3> 289 ret <8 x i3> %4 290 } 291 292 define <8 x i3> @or_v8i3_as_i24(<8 x i3> %a, <8 x i3> %b) nounwind { 293 ; X32-SSE-LABEL: or_v8i3_as_i24: 294 ; X32-SSE: # BB#0: 295 ; X32-SSE-NEXT: orps %xmm1, %xmm0 296 ; X32-SSE-NEXT: retl 297 ; 298 ; X64-SSE-LABEL: or_v8i3_as_i24: 299 ; X64-SSE: # BB#0: 300 ; X64-SSE-NEXT: orps %xmm1, %xmm0 301 ; X64-SSE-NEXT: retq 302 %1 = bitcast <8 x i3> %a to i24 303 %2 = bitcast <8 x i3> %b to i24 304 %3 = or i24 %1, %2 305 %4 = bitcast i24 %3 to <8 x i3> 306 ret <8 x i3> %4 307 } 308