Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
      3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
      4 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
      5 
      6 define <8 x i16> @sdiv_vec8x16(<8 x i16> %var) {
      7 ; SSE-LABEL: sdiv_vec8x16:
      8 ; SSE:       # BB#0: # %entry
      9 ; SSE-NEXT:    movdqa %xmm0, %xmm1
     10 ; SSE-NEXT:    psraw $15, %xmm1
     11 ; SSE-NEXT:    psrlw $11, %xmm1
     12 ; SSE-NEXT:    paddw %xmm0, %xmm1
     13 ; SSE-NEXT:    psraw $5, %xmm1
     14 ; SSE-NEXT:    movdqa %xmm1, %xmm0
     15 ; SSE-NEXT:    retq
     16 ;
     17 ; AVX-LABEL: sdiv_vec8x16:
     18 ; AVX:       # BB#0: # %entry
     19 ; AVX-NEXT:    vpsraw $15, %xmm0, %xmm1
     20 ; AVX-NEXT:    vpsrlw $11, %xmm1, %xmm1
     21 ; AVX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
     22 ; AVX-NEXT:    vpsraw $5, %xmm0, %xmm0
     23 ; AVX-NEXT:    retq
     24 entry:
     25   %0 = sdiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32>
     26   ret <8 x i16> %0
     27 }
     28 
     29 define <8 x i16> @sdiv_vec8x16_minsize(<8 x i16> %var) minsize {
     30 ; SSE-LABEL: sdiv_vec8x16_minsize:
     31 ; SSE:       # BB#0: # %entry
     32 ; SSE-NEXT:    movdqa %xmm0, %xmm1
     33 ; SSE-NEXT:    psraw $15, %xmm1
     34 ; SSE-NEXT:    psrlw $11, %xmm1
     35 ; SSE-NEXT:    paddw %xmm0, %xmm1
     36 ; SSE-NEXT:    psraw $5, %xmm1
     37 ; SSE-NEXT:    movdqa %xmm1, %xmm0
     38 ; SSE-NEXT:    retq
     39 ;
     40 ; AVX-LABEL: sdiv_vec8x16_minsize:
     41 ; AVX:       # BB#0: # %entry
     42 ; AVX-NEXT:    vpsraw $15, %xmm0, %xmm1
     43 ; AVX-NEXT:    vpsrlw $11, %xmm1, %xmm1
     44 ; AVX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
     45 ; AVX-NEXT:    vpsraw $5, %xmm0, %xmm0
     46 ; AVX-NEXT:    retq
     47 entry:
     48   %0 = sdiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32>
     49   ret <8 x i16> %0
     50 }
     51 
     52 define <4 x i32> @sdiv_zero(<4 x i32> %var) {
     53 ; SSE-LABEL: sdiv_zero:
     54 ; SSE:       # BB#0: # %entry
     55 ; SSE-NEXT:    pextrd $1, %xmm0, %eax
     56 ; SSE-NEXT:    xorl %esi, %esi
     57 ; SSE-NEXT:    cltd
     58 ; SSE-NEXT:    idivl %esi
     59 ; SSE-NEXT:    movl %eax, %ecx
     60 ; SSE-NEXT:    movd %xmm0, %eax
     61 ; SSE-NEXT:    cltd
     62 ; SSE-NEXT:    idivl %esi
     63 ; SSE-NEXT:    movd %eax, %xmm1
     64 ; SSE-NEXT:    pinsrd $1, %ecx, %xmm1
     65 ; SSE-NEXT:    pextrd $2, %xmm0, %eax
     66 ; SSE-NEXT:    cltd
     67 ; SSE-NEXT:    idivl %esi
     68 ; SSE-NEXT:    pinsrd $2, %eax, %xmm1
     69 ; SSE-NEXT:    pextrd $3, %xmm0, %eax
     70 ; SSE-NEXT:    cltd
     71 ; SSE-NEXT:    idivl %esi
     72 ; SSE-NEXT:    pinsrd $3, %eax, %xmm1
     73 ; SSE-NEXT:    movdqa %xmm1, %xmm0
     74 ; SSE-NEXT:    retq
     75 ;
     76 ; AVX-LABEL: sdiv_zero:
     77 ; AVX:       # BB#0: # %entry
     78 ; AVX-NEXT:    vpextrd $1, %xmm0, %eax
     79 ; AVX-NEXT:    xorl %esi, %esi
     80 ; AVX-NEXT:    cltd
     81 ; AVX-NEXT:    idivl %esi
     82 ; AVX-NEXT:    movl %eax, %ecx
     83 ; AVX-NEXT:    vmovd %xmm0, %eax
     84 ; AVX-NEXT:    cltd
     85 ; AVX-NEXT:    idivl %esi
     86 ; AVX-NEXT:    vmovd %eax, %xmm1
     87 ; AVX-NEXT:    vpinsrd $1, %ecx, %xmm1, %xmm1
     88 ; AVX-NEXT:    vpextrd $2, %xmm0, %eax
     89 ; AVX-NEXT:    cltd
     90 ; AVX-NEXT:    idivl %esi
     91 ; AVX-NEXT:    vpinsrd $2, %eax, %xmm1, %xmm1
     92 ; AVX-NEXT:    vpextrd $3, %xmm0, %eax
     93 ; AVX-NEXT:    cltd
     94 ; AVX-NEXT:    idivl %esi
     95 ; AVX-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm0
     96 ; AVX-NEXT:    retq
     97 entry:
     98   %0 = sdiv <4 x i32> %var, <i32 0, i32 0, i32 0, i32 0>
     99   ret <4 x i32> %0
    100 }
    101 
    102 define <4 x i32> @sdiv_vec4x32(<4 x i32> %var) {
    103 ; SSE-LABEL: sdiv_vec4x32:
    104 ; SSE:       # BB#0: # %entry
    105 ; SSE-NEXT:    movdqa %xmm0, %xmm1
    106 ; SSE-NEXT:    psrad $31, %xmm1
    107 ; SSE-NEXT:    psrld $28, %xmm1
    108 ; SSE-NEXT:    paddd %xmm0, %xmm1
    109 ; SSE-NEXT:    psrad $4, %xmm1
    110 ; SSE-NEXT:    movdqa %xmm1, %xmm0
    111 ; SSE-NEXT:    retq
    112 ;
    113 ; AVX-LABEL: sdiv_vec4x32:
    114 ; AVX:       # BB#0: # %entry
    115 ; AVX-NEXT:    vpsrad $31, %xmm0, %xmm1
    116 ; AVX-NEXT:    vpsrld $28, %xmm1, %xmm1
    117 ; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
    118 ; AVX-NEXT:    vpsrad $4, %xmm0, %xmm0
    119 ; AVX-NEXT:    retq
    120 entry:
    121 %0 = sdiv <4 x i32> %var, <i32 16, i32 16, i32 16, i32 16>
    122 ret <4 x i32> %0
    123 }
    124 
    125 define <4 x i32> @sdiv_negative(<4 x i32> %var) {
    126 ; SSE-LABEL: sdiv_negative:
    127 ; SSE:       # BB#0: # %entry
    128 ; SSE-NEXT:    movdqa %xmm0, %xmm1
    129 ; SSE-NEXT:    psrad $31, %xmm1
    130 ; SSE-NEXT:    psrld $28, %xmm1
    131 ; SSE-NEXT:    paddd %xmm0, %xmm1
    132 ; SSE-NEXT:    psrad $4, %xmm1
    133 ; SSE-NEXT:    pxor %xmm0, %xmm0
    134 ; SSE-NEXT:    psubd %xmm1, %xmm0
    135 ; SSE-NEXT:    retq
    136 ;
    137 ; AVX-LABEL: sdiv_negative:
    138 ; AVX:       # BB#0: # %entry
    139 ; AVX-NEXT:    vpsrad $31, %xmm0, %xmm1
    140 ; AVX-NEXT:    vpsrld $28, %xmm1, %xmm1
    141 ; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
    142 ; AVX-NEXT:    vpsrad $4, %xmm0, %xmm0
    143 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    144 ; AVX-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
    145 ; AVX-NEXT:    retq
    146 entry:
    147 %0 = sdiv <4 x i32> %var, <i32 -16, i32 -16, i32 -16, i32 -16>
    148 ret <4 x i32> %0
    149 }
    150 
    151 define <8 x i32> @sdiv8x32(<8 x i32> %var) {
    152 ; SSE-LABEL: sdiv8x32:
    153 ; SSE:       # BB#0: # %entry
    154 ; SSE-NEXT:    movdqa %xmm0, %xmm2
    155 ; SSE-NEXT:    psrad $31, %xmm2
    156 ; SSE-NEXT:    psrld $26, %xmm2
    157 ; SSE-NEXT:    paddd %xmm0, %xmm2
    158 ; SSE-NEXT:    psrad $6, %xmm2
    159 ; SSE-NEXT:    movdqa %xmm1, %xmm3
    160 ; SSE-NEXT:    psrad $31, %xmm3
    161 ; SSE-NEXT:    psrld $26, %xmm3
    162 ; SSE-NEXT:    paddd %xmm1, %xmm3
    163 ; SSE-NEXT:    psrad $6, %xmm3
    164 ; SSE-NEXT:    movdqa %xmm2, %xmm0
    165 ; SSE-NEXT:    movdqa %xmm3, %xmm1
    166 ; SSE-NEXT:    retq
    167 ;
    168 ; AVX1-LABEL: sdiv8x32:
    169 ; AVX1:       # BB#0: # %entry
    170 ; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
    171 ; AVX1-NEXT:    vpsrld $26, %xmm1, %xmm1
    172 ; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
    173 ; AVX1-NEXT:    vpsrad $6, %xmm1, %xmm1
    174 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    175 ; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm2
    176 ; AVX1-NEXT:    vpsrld $26, %xmm2, %xmm2
    177 ; AVX1-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
    178 ; AVX1-NEXT:    vpsrad $6, %xmm0, %xmm0
    179 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    180 ; AVX1-NEXT:    retq
    181 ;
    182 ; AVX2-LABEL: sdiv8x32:
    183 ; AVX2:       # BB#0: # %entry
    184 ; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm1
    185 ; AVX2-NEXT:    vpsrld $26, %ymm1, %ymm1
    186 ; AVX2-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
    187 ; AVX2-NEXT:    vpsrad $6, %ymm0, %ymm0
    188 ; AVX2-NEXT:    retq
    189 entry:
    190 %0 = sdiv <8 x i32> %var, <i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64>
    191 ret <8 x i32> %0
    192 }
    193 
    194 define <16 x i16> @sdiv16x16(<16 x i16> %var) {
    195 ; SSE-LABEL: sdiv16x16:
    196 ; SSE:       # BB#0: # %entry
    197 ; SSE-NEXT:    movdqa %xmm0, %xmm2
    198 ; SSE-NEXT:    psraw $15, %xmm2
    199 ; SSE-NEXT:    psrlw $14, %xmm2
    200 ; SSE-NEXT:    paddw %xmm0, %xmm2
    201 ; SSE-NEXT:    psraw $2, %xmm2
    202 ; SSE-NEXT:    movdqa %xmm1, %xmm3
    203 ; SSE-NEXT:    psraw $15, %xmm3
    204 ; SSE-NEXT:    psrlw $14, %xmm3
    205 ; SSE-NEXT:    paddw %xmm1, %xmm3
    206 ; SSE-NEXT:    psraw $2, %xmm3
    207 ; SSE-NEXT:    movdqa %xmm2, %xmm0
    208 ; SSE-NEXT:    movdqa %xmm3, %xmm1
    209 ; SSE-NEXT:    retq
    210 ;
    211 ; AVX1-LABEL: sdiv16x16:
    212 ; AVX1:       # BB#0: # %entry
    213 ; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm1
    214 ; AVX1-NEXT:    vpsrlw $14, %xmm1, %xmm1
    215 ; AVX1-NEXT:    vpaddw %xmm1, %xmm0, %xmm1
    216 ; AVX1-NEXT:    vpsraw $2, %xmm1, %xmm1
    217 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    218 ; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm2
    219 ; AVX1-NEXT:    vpsrlw $14, %xmm2, %xmm2
    220 ; AVX1-NEXT:    vpaddw %xmm2, %xmm0, %xmm0
    221 ; AVX1-NEXT:    vpsraw $2, %xmm0, %xmm0
    222 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    223 ; AVX1-NEXT:    retq
    224 ;
    225 ; AVX2-LABEL: sdiv16x16:
    226 ; AVX2:       # BB#0: # %entry
    227 ; AVX2-NEXT:    vpsraw $15, %ymm0, %ymm1
    228 ; AVX2-NEXT:    vpsrlw $14, %ymm1, %ymm1
    229 ; AVX2-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
    230 ; AVX2-NEXT:    vpsraw $2, %ymm0, %ymm0
    231 ; AVX2-NEXT:    retq
    232 entry:
    233   %a0 = sdiv <16 x i16> %var, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
    234   ret <16 x i16> %a0
    235 }
    236 
    237 define <4 x i32> @sdiv_non_splat(<4 x i32> %x) {
    238 ; SSE-LABEL: sdiv_non_splat:
    239 ; SSE:       # BB#0:
    240 ; SSE-NEXT:    pextrd $1, %xmm0, %eax
    241 ; SSE-NEXT:    xorl %ecx, %ecx
    242 ; SSE-NEXT:    cltd
    243 ; SSE-NEXT:    idivl %ecx
    244 ; SSE-NEXT:    movd %xmm0, %edx
    245 ; SSE-NEXT:    movl %edx, %esi
    246 ; SSE-NEXT:    shrl $31, %esi
    247 ; SSE-NEXT:    addl %edx, %esi
    248 ; SSE-NEXT:    sarl %esi
    249 ; SSE-NEXT:    movd %esi, %xmm1
    250 ; SSE-NEXT:    pinsrd $1, %eax, %xmm1
    251 ; SSE-NEXT:    pextrd $2, %xmm0, %eax
    252 ; SSE-NEXT:    cltd
    253 ; SSE-NEXT:    idivl %ecx
    254 ; SSE-NEXT:    pinsrd $2, %eax, %xmm1
    255 ; SSE-NEXT:    pextrd $3, %xmm0, %eax
    256 ; SSE-NEXT:    cltd
    257 ; SSE-NEXT:    idivl %ecx
    258 ; SSE-NEXT:    pinsrd $3, %eax, %xmm1
    259 ; SSE-NEXT:    movdqa %xmm1, %xmm0
    260 ; SSE-NEXT:    retq
    261 ;
    262 ; AVX-LABEL: sdiv_non_splat:
    263 ; AVX:       # BB#0:
    264 ; AVX-NEXT:    vpextrd $1, %xmm0, %eax
    265 ; AVX-NEXT:    xorl %ecx, %ecx
    266 ; AVX-NEXT:    cltd
    267 ; AVX-NEXT:    idivl %ecx
    268 ; AVX-NEXT:    vmovd %xmm0, %edx
    269 ; AVX-NEXT:    movl %edx, %esi
    270 ; AVX-NEXT:    shrl $31, %esi
    271 ; AVX-NEXT:    addl %edx, %esi
    272 ; AVX-NEXT:    sarl %esi
    273 ; AVX-NEXT:    vmovd %esi, %xmm1
    274 ; AVX-NEXT:    vpinsrd $1, %eax, %xmm1, %xmm1
    275 ; AVX-NEXT:    vpextrd $2, %xmm0, %eax
    276 ; AVX-NEXT:    cltd
    277 ; AVX-NEXT:    idivl %ecx
    278 ; AVX-NEXT:    vpinsrd $2, %eax, %xmm1, %xmm1
    279 ; AVX-NEXT:    vpextrd $3, %xmm0, %eax
    280 ; AVX-NEXT:    cltd
    281 ; AVX-NEXT:    idivl %ecx
    282 ; AVX-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm0
    283 ; AVX-NEXT:    retq
    284   %y = sdiv <4 x i32> %x, <i32 2, i32 0, i32 0, i32 0>
    285   ret <4 x i32> %y
    286 }
    287